forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_parallel_requests
This commit is contained in:
commit
e6963217ba
79 changed files with 3913 additions and 180 deletions
|
@ -8,7 +8,7 @@
|
||||||
<img src="https://railway.app/button.svg" alt="Deploy on Railway">
|
<img src="https://railway.app/button.svg" alt="Deploy on Railway">
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, etc.]
|
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
|
||||||
<br>
|
<br>
|
||||||
</p>
|
</p>
|
||||||
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">OpenAI Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">OpenAI Proxy Server</a> | <a href="https://docs.litellm.ai/docs/hosted" target="_blank"> Hosted Proxy (Preview)</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
||||||
|
|
|
@ -9,13 +9,11 @@ services:
|
||||||
#########################################
|
#########################################
|
||||||
## Uncomment these lines to start proxy with a config.yaml file ##
|
## Uncomment these lines to start proxy with a config.yaml file ##
|
||||||
# volumes:
|
# volumes:
|
||||||
# - ./proxy_server_config.yaml:/app/config.yaml
|
|
||||||
# command: [ "--config", "./config.yaml", "--port", "4000"]
|
|
||||||
###############################################
|
###############################################
|
||||||
ports:
|
ports:
|
||||||
- "4000:4000" # Map the container port to the host, change the host port if necessary
|
- "4000:4000" # Map the container port to the host, change the host port if necessary
|
||||||
environment:
|
environment:
|
||||||
DATABASE_URL: "postgresql://postgres:example@db:5432/postgres"
|
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
|
||||||
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
|
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
|
||||||
env_file:
|
env_file:
|
||||||
- .env # Load local .env file
|
- .env # Load local .env file
|
||||||
|
@ -25,11 +23,31 @@ services:
|
||||||
image: postgres
|
image: postgres
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_PASSWORD: example
|
POSTGRES_DB: litellm
|
||||||
|
POSTGRES_USER: llmproxy
|
||||||
|
POSTGRES_PASSWORD: dbpassword9090
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "pg_isready"]
|
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
|
||||||
interval: 1s
|
interval: 1s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 10
|
retries: 10
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus
|
||||||
|
volumes:
|
||||||
|
- prometheus_data:/prometheus
|
||||||
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.path=/prometheus'
|
||||||
|
- '--storage.tsdb.retention.time=15d'
|
||||||
|
restart: always
|
||||||
|
|
||||||
# ...rest of your docker-compose config if any
|
volumes:
|
||||||
|
prometheus_data:
|
||||||
|
driver: local
|
||||||
|
|
||||||
|
|
||||||
|
# ...rest of your docker-compose config if any
|
||||||
|
|
72
docs/my-website/docs/observability/arize_integration.md
Normal file
72
docs/my-website/docs/observability/arize_integration.md
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
# 🔥 Arize AI - Logging LLM Input/Output
|
||||||
|
|
||||||
|
AI Observability and Evaluation Platform
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
This is community maintained, Please make an issue if you run into a bug
|
||||||
|
https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Pre-Requisites
|
||||||
|
Make an account on [Arize AI](https://app.arize.com/auth/login)
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
Use just 2 lines of code, to instantly log your responses **across all providers** with arize
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm.callbacks = ["arize"]
|
||||||
|
```
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["ARIZE_SPACE_KEY"] = ""
|
||||||
|
os.environ["ARIZE_API_KEY"] = "" # defaults to litellm-completion
|
||||||
|
|
||||||
|
# LLM API Keys
|
||||||
|
os.environ['OPENAI_API_KEY']=""
|
||||||
|
|
||||||
|
# set arize as a callback, litellm will send the data to arize
|
||||||
|
litellm.callbacks = ["arize"]
|
||||||
|
|
||||||
|
# openai call
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using with LiteLLM Proxy
|
||||||
|
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["arize"]
|
||||||
|
|
||||||
|
environment_variables:
|
||||||
|
ARIZE_SPACE_KEY: "d0*****"
|
||||||
|
ARIZE_API_KEY: "141a****"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support & Talk to Founders
|
||||||
|
|
||||||
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
147
docs/my-website/docs/observability/braintrust.md
Normal file
147
docs/my-website/docs/observability/braintrust.md
Normal file
|
@ -0,0 +1,147 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# ⚡️ Braintrust - Evals + Logging
|
||||||
|
|
||||||
|
[Braintrust](https://www.braintrust.dev/) manages evaluations, logging, prompt playground, to data management for AI products.
|
||||||
|
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```python
|
||||||
|
# pip install langfuse
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set env
|
||||||
|
os.environ["BRAINTRUST_API_KEY"] = ""
|
||||||
|
os.environ['OPENAI_API_KEY']=""
|
||||||
|
|
||||||
|
# set braintrust as a callback, litellm will send the data to braintrust
|
||||||
|
litellm.callbacks = ["braintrust"]
|
||||||
|
|
||||||
|
# openai call
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## OpenAI Proxy Usage
|
||||||
|
|
||||||
|
1. Add keys to env
|
||||||
|
```env
|
||||||
|
BRAINTRUST_API_KEY=""
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Add braintrust to callbacks
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["braintrust"]
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"model": "groq-llama3",
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": "Use your tools smartly"},
|
||||||
|
{ "role": "user", "content": "What time is it now? Use your tool"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Advanced - pass Project ID
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||||
|
],
|
||||||
|
metadata={
|
||||||
|
"project_id": "my-special-project"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
**Curl**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"model": "groq-llama3",
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": "Use your tools smartly"},
|
||||||
|
{ "role": "user", "content": "What time is it now? Use your tool"}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"project_id": "my-special-project"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**OpenAI SDK**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
# request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={ # pass in any provider-specific param, if not supported by openai, https://docs.litellm.ai/docs/completion/input#provider-specific-params
|
||||||
|
"metadata": { # 👈 use for logging additional params (e.g. to langfuse)
|
||||||
|
"project_id": "my-special-project"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
For more examples, [**Click Here**](../proxy/user_keys.md#chatcompletions)
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Full API Spec
|
||||||
|
|
||||||
|
Here's everything you can pass in metadata for a braintrust request
|
||||||
|
|
||||||
|
`braintrust_*` - any metadata field starting with `braintrust_` will be passed as metadata to the logging request
|
||||||
|
|
||||||
|
`project_id` - set the project id for a braintrust call. Default is `litellm`.
|
|
@ -1,4 +1,4 @@
|
||||||
# 🧠 Helicone - OSS LLM Observability Platform
|
# 🧊 Helicone - OSS LLM Observability Platform
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import Image from '@theme/IdealImage';
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
# Langsmith - Logging LLM Input/Output
|
# 🦜 Langsmith - Logging LLM Input/Output
|
||||||
|
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
@ -56,7 +56,7 @@ response = litellm.completion(
|
||||||
```
|
```
|
||||||
|
|
||||||
## Advanced
|
## Advanced
|
||||||
### Set Custom Project & Run names
|
### Set Langsmith fields - Custom Projec, Run names, tags
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -77,6 +77,7 @@ response = litellm.completion(
|
||||||
metadata={
|
metadata={
|
||||||
"run_name": "litellmRUN", # langsmith run name
|
"run_name": "litellmRUN", # langsmith run name
|
||||||
"project_name": "litellm-completion", # langsmith project name
|
"project_name": "litellm-completion", # langsmith project name
|
||||||
|
"tags": ["model1", "prod-2"] # tags to log on langsmith
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
print(response)
|
print(response)
|
||||||
|
|
|
@ -1,10 +1,16 @@
|
||||||
import Image from '@theme/IdealImage';
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Raw Request/Response Logging
|
# Raw Request/Response Logging
|
||||||
|
|
||||||
|
|
||||||
|
## Logging
|
||||||
See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.).
|
See the raw request/response sent by LiteLLM in your logging provider (OTEL/Langfuse/etc.).
|
||||||
|
|
||||||
**on SDK**
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# pip install langfuse
|
# pip install langfuse
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -34,13 +40,85 @@ response = litellm.completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
**on Proxy**
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
litellm_settings:
|
litellm_settings:
|
||||||
log_raw_request_response: True
|
log_raw_request_response: True
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
**Expected Log**
|
**Expected Log**
|
||||||
|
|
||||||
<Image img={require('../../img/raw_request_log.png')}/>
|
<Image img={require('../../img/raw_request_log.png')}/>
|
||||||
|
|
||||||
|
|
||||||
|
## Return Raw Response Headers
|
||||||
|
|
||||||
|
Return raw response headers from llm provider.
|
||||||
|
|
||||||
|
Currently only supported for openai.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
litellm.return_response_headers = True
|
||||||
|
|
||||||
|
## set ENV variables
|
||||||
|
os.environ["OPENAI_API_KEY"] = "your-api-key"
|
||||||
|
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response._hidden_params)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
api_key: os.environ/GROQ_API_KEY
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
return_response_headers: true
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-D '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{ "role": "system", "content": "Use your tools smartly"},
|
||||||
|
{ "role": "user", "content": "What time is it now? Use your tool"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
<Image img={require('../../img/raw_response_headers.png')}/>
|
223
docs/my-website/docs/oidc.md
Normal file
223
docs/my-website/docs/oidc.md
Normal file
|
@ -0,0 +1,223 @@
|
||||||
|
# OpenID Connect (OIDC)
|
||||||
|
LiteLLM supports using OpenID Connect (OIDC) for authentication to upstream services . This allows you to avoid storing sensitive credentials in your configuration files.
|
||||||
|
|
||||||
|
|
||||||
|
## OIDC Identity Provider (IdP)
|
||||||
|
|
||||||
|
LiteLLM supports the following OIDC identity providers:
|
||||||
|
|
||||||
|
| Provider | Config Name | Custom Audiences |
|
||||||
|
| -------------------------| ------------ | ---------------- |
|
||||||
|
| Google Cloud Run | `google` | Yes |
|
||||||
|
| CircleCI v1 | `circleci` | No |
|
||||||
|
| CircleCI v2 | `circleci_v2`| No |
|
||||||
|
| GitHub Actions | `github` | Yes |
|
||||||
|
| Azure Kubernetes Service | `azure` | No |
|
||||||
|
|
||||||
|
If you would like to use a different OIDC provider, please open an issue on GitHub.
|
||||||
|
|
||||||
|
|
||||||
|
## OIDC Connect Relying Party (RP)
|
||||||
|
|
||||||
|
LiteLLM supports the following OIDC relying parties / clients:
|
||||||
|
|
||||||
|
- Amazon Bedrock
|
||||||
|
- Azure OpenAI
|
||||||
|
- _(Coming soon) Google Cloud Vertex AI_
|
||||||
|
|
||||||
|
|
||||||
|
### Configuring OIDC
|
||||||
|
|
||||||
|
Wherever a secret key can be used, OIDC can be used in-place. The general format is:
|
||||||
|
|
||||||
|
```
|
||||||
|
oidc/config_name_here/audience_here
|
||||||
|
```
|
||||||
|
|
||||||
|
For providers that do not use the `audience` parameter, you can (and should) omit it:
|
||||||
|
|
||||||
|
```
|
||||||
|
oidc/config_name_here/
|
||||||
|
```
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Google Cloud Run -> Amazon Bedrock
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: claude-3-haiku-20240307
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/anthropic.claude-3-haiku-20240307-v1:0
|
||||||
|
aws_region_name: us-west-2
|
||||||
|
aws_session_name: "litellm"
|
||||||
|
aws_role_name: "arn:aws:iam::YOUR_THING_HERE:role/litellm-google-demo"
|
||||||
|
aws_web_identity_token: "oidc/google/https://example.com"
|
||||||
|
```
|
||||||
|
|
||||||
|
### CircleCI v2 -> Amazon Bedrock
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: command-r
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/cohere.command-r-v1:0
|
||||||
|
aws_region_name: us-west-2
|
||||||
|
aws_session_name: "my-test-session"
|
||||||
|
aws_role_name: "arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci"
|
||||||
|
aws_web_identity_token: "oidc/circleci_v2/"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Amazon IAM Role Configuration for CircleCI v2 -> Bedrock
|
||||||
|
|
||||||
|
The configuration below is only an example. You should adjust the permissions and trust relationship to match your specific use case.
|
||||||
|
|
||||||
|
Permissions:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"Version": "2012-10-17",
|
||||||
|
"Statement": [
|
||||||
|
{
|
||||||
|
"Sid": "VisualEditor0",
|
||||||
|
"Effect": "Allow",
|
||||||
|
"Action": [
|
||||||
|
"bedrock:InvokeModel",
|
||||||
|
"bedrock:InvokeModelWithResponseStream"
|
||||||
|
],
|
||||||
|
"Resource": [
|
||||||
|
"arn:aws:bedrock:*::foundation-model/anthropic.claude-3-haiku-20240307-v1:0",
|
||||||
|
"arn:aws:bedrock:*::foundation-model/cohere.command-r-v1:0"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
See https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html for more examples.
|
||||||
|
|
||||||
|
Trust Relationship:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"Version": "2012-10-17",
|
||||||
|
"Statement": [
|
||||||
|
{
|
||||||
|
"Effect": "Allow",
|
||||||
|
"Principal": {
|
||||||
|
"Federated": "arn:aws:iam::335785316107:oidc-provider/oidc.circleci.com/org/c5a99188-154f-4f69-8da2-b442b1bf78dd"
|
||||||
|
},
|
||||||
|
"Action": "sts:AssumeRoleWithWebIdentity",
|
||||||
|
"Condition": {
|
||||||
|
"StringEquals": {
|
||||||
|
"oidc.circleci.com/org/c5a99188-154f-4f69-8da2-b442b1bf78dd:aud": "c5a99188-154f-4f69-8da2-b442b1bf78dd"
|
||||||
|
},
|
||||||
|
"ForAnyValue:StringLike": {
|
||||||
|
"oidc.circleci.com/org/c5a99188-154f-4f69-8da2-b442b1bf78dd:sub": [
|
||||||
|
"org/c5a99188-154f-4f69-8da2-b442b1bf78dd/project/*/user/*/vcs-origin/github.com/BerriAI/litellm/vcs-ref/refs/heads/main",
|
||||||
|
"org/c5a99188-154f-4f69-8da2-b442b1bf78dd/project/*/user/*/vcs-origin/github.com/BerriAI/litellm/vcs-ref/refs/heads/litellm_*"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This trust relationship restricts CircleCI to only assume the role on the main branch and branches that start with `litellm_`.
|
||||||
|
|
||||||
|
For CircleCI (v1 and v2), you also need to add your organization's OIDC provider in your AWS IAM settings. See https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-idp_oidc.html for more information.
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
You should _never_ need to create an IAM user. If you did, you're not using OIDC correctly. You should only be creating a role with permissions and a trust relationship to your OIDC provider.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
### Google Cloud Run -> Azure OpenAI
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o-2024-05-13
|
||||||
|
litellm_params:
|
||||||
|
model: azure/gpt-4o-2024-05-13
|
||||||
|
azure_ad_token: "oidc/google/https://example.com"
|
||||||
|
api_version: "2024-06-01"
|
||||||
|
api_base: "https://demo-here.openai.azure.com"
|
||||||
|
model_info:
|
||||||
|
base_model: azure/gpt-4o-2024-05-13
|
||||||
|
```
|
||||||
|
|
||||||
|
For Azure OpenAI, you need to define `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, and optionally `AZURE_AUTHORITY_HOST` in your environment.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export AZURE_CLIENT_ID="91a43c21-cf21-4f34-9085-331015ea4f91" # Azure AD Application (Client) ID
|
||||||
|
export AZURE_TENANT_ID="f3b1cf79-eba8-40c3-8120-cb26aca169c2" # Will be the same across of all your Azure AD applications
|
||||||
|
export AZURE_AUTHORITY_HOST="https://login.microsoftonline.com" # 👈 Optional, defaults to "https://login.microsoftonline.com"
|
||||||
|
```
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
You can find `AZURE_CLIENT_ID` by visiting `https://login.microsoftonline.com/YOUR_DOMAIN_HERE/v2.0/.well-known/openid-configuration` and looking for the UUID in the `issuer` field.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
Don't set `AZURE_AUTHORITY_HOST` in your environment unless you need to override the default value. This way, if the default value changes in the future, you won't need to update your environment.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
By default, Azure AD applications use the audience `api://AzureADTokenExchange`. We recommend setting the audience to something more specific to your application.
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
#### Azure AD Application Configuration
|
||||||
|
|
||||||
|
Unfortunately, Azure is bit more complicated to set up than other OIDC relying parties like AWS. Basically, you have to:
|
||||||
|
|
||||||
|
1. Create an Azure application.
|
||||||
|
2. Add a federated credential for the OIDC IdP you're using (e.g. Google Cloud Run).
|
||||||
|
3. Add the Azure application to resource group that contains the Azure OpenAI resource(s).
|
||||||
|
4. Give the Azure application the necessary role to access the Azure OpenAI resource(s).
|
||||||
|
|
||||||
|
The custom role below is the recommended minimum permissions for the Azure application to access Azure OpenAI resources. You should adjust the permissions to match your specific use case.
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "/subscriptions/24ebb700-ec2f-417f-afad-78fe15dcc91f/providers/Microsoft.Authorization/roleDefinitions/baf42808-99ff-466d-b9da-f95bb0422c5f",
|
||||||
|
"properties": {
|
||||||
|
"roleName": "invoke-only",
|
||||||
|
"description": "",
|
||||||
|
"assignableScopes": [
|
||||||
|
"/subscriptions/24ebb700-ec2f-417f-afad-78fe15dcc91f/resourceGroups/your-openai-group-name"
|
||||||
|
],
|
||||||
|
"permissions": [
|
||||||
|
{
|
||||||
|
"actions": [],
|
||||||
|
"notActions": [],
|
||||||
|
"dataActions": [
|
||||||
|
"Microsoft.CognitiveServices/accounts/OpenAI/deployments/audio/action",
|
||||||
|
"Microsoft.CognitiveServices/accounts/OpenAI/deployments/search/action",
|
||||||
|
"Microsoft.CognitiveServices/accounts/OpenAI/deployments/completions/action",
|
||||||
|
"Microsoft.CognitiveServices/accounts/OpenAI/deployments/chat/completions/action",
|
||||||
|
"Microsoft.CognitiveServices/accounts/OpenAI/deployments/extensions/chat/completions/action",
|
||||||
|
"Microsoft.CognitiveServices/accounts/OpenAI/deployments/embeddings/action",
|
||||||
|
"Microsoft.CognitiveServices/accounts/OpenAI/images/generations/action"
|
||||||
|
],
|
||||||
|
"notDataActions": []
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
_Note: Your UUIDs will be different._
|
||||||
|
|
||||||
|
Please contact us for paid enterprise support if you need help setting up Azure AD applications.
|
|
@ -56,7 +56,7 @@ for chunk in response:
|
||||||
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
|
print(chunk["choices"][0]["delta"]["content"]) # same as openai format
|
||||||
```
|
```
|
||||||
|
|
||||||
## OpenAI Proxy Usage
|
## Usage with LiteLLM Proxy
|
||||||
|
|
||||||
Here's how to call Anthropic with the LiteLLM Proxy Server
|
Here's how to call Anthropic with the LiteLLM Proxy Server
|
||||||
|
|
||||||
|
@ -69,14 +69,6 @@ export ANTHROPIC_API_KEY="your-api-key"
|
||||||
### 2. Start the proxy
|
### 2. Start the proxy
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="cli" label="cli">
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ litellm --model claude-3-opus-20240229
|
|
||||||
|
|
||||||
# Server running on http://0.0.0.0:4000
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="config" label="config.yaml">
|
<TabItem value="config" label="config.yaml">
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -91,6 +83,14 @@ model_list:
|
||||||
litellm --config /path/to/config.yaml
|
litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
<TabItem value="cli" label="cli">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ litellm --model claude-3-opus-20240229
|
||||||
|
|
||||||
|
# Server running on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
### 3. Test it
|
### 3. Test it
|
||||||
|
|
60
docs/my-website/docs/providers/friendliai.md
Normal file
60
docs/my-website/docs/providers/friendliai.md
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
# FriendliAI
|
||||||
|
https://suite.friendli.ai/
|
||||||
|
|
||||||
|
**We support ALL FriendliAI models, just set `friendliai/` as a prefix when sending completion requests**
|
||||||
|
|
||||||
|
## API Key
|
||||||
|
```python
|
||||||
|
# env variable
|
||||||
|
os.environ['FRIENDLI_TOKEN']
|
||||||
|
os.environ['FRIENDLI_API_BASE'] # Optional. Set this when using dedicated endpoint.
|
||||||
|
```
|
||||||
|
|
||||||
|
## Sample Usage
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ['FRIENDLI_TOKEN'] = ""
|
||||||
|
response = completion(
|
||||||
|
model="friendliai/mixtral-8x7b-instruct-v0-1",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "hello from litellm"}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Sample Usage - Streaming
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ['FRIENDLI_TOKEN'] = ""
|
||||||
|
response = completion(
|
||||||
|
model="friendliai/mixtral-8x7b-instruct-v0-1",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "hello from litellm"}
|
||||||
|
],
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Supported Models
|
||||||
|
### Serverless Endpoints
|
||||||
|
We support ALL FriendliAI AI models, just set `friendliai/` as a prefix when sending completion requests
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| mixtral-8x7b-instruct | `completion(model="friendliai/mixtral-8x7b-instruct-v0-1", messages)` |
|
||||||
|
| meta-llama-3-8b-instruct | `completion(model="friendliai/meta-llama-3-8b-instruct", messages)` |
|
||||||
|
| meta-llama-3-70b-instruct | `completion(model="friendliai/meta-llama-3-70b-instruct", messages)` |
|
||||||
|
|
||||||
|
### Dedicated Endpoints
|
||||||
|
```
|
||||||
|
model="friendliai/$ENDPOINT_ID:$ADAPTER_ROUTE"
|
||||||
|
```
|
|
@ -1,3 +1,6 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Groq
|
# Groq
|
||||||
https://groq.com/
|
https://groq.com/
|
||||||
|
|
||||||
|
@ -20,7 +23,7 @@ import os
|
||||||
|
|
||||||
os.environ['GROQ_API_KEY'] = ""
|
os.environ['GROQ_API_KEY'] = ""
|
||||||
response = completion(
|
response = completion(
|
||||||
model="groq/llama2-70b-4096",
|
model="groq/llama3-8b-8192",
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "user", "content": "hello from litellm"}
|
{"role": "user", "content": "hello from litellm"}
|
||||||
],
|
],
|
||||||
|
@ -35,7 +38,7 @@ import os
|
||||||
|
|
||||||
os.environ['GROQ_API_KEY'] = ""
|
os.environ['GROQ_API_KEY'] = ""
|
||||||
response = completion(
|
response = completion(
|
||||||
model="groq/llama2-70b-4096",
|
model="groq/llama3-8b-8192",
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "user", "content": "hello from litellm"}
|
{"role": "user", "content": "hello from litellm"}
|
||||||
],
|
],
|
||||||
|
@ -47,6 +50,101 @@ for chunk in response:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Usage with LiteLLM Proxy
|
||||||
|
|
||||||
|
### 1. Set Groq Models on config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: groq-llama3-8b-8192 # Model Alias to use for requests
|
||||||
|
litellm_params:
|
||||||
|
model: groq/llama3-8b-8192
|
||||||
|
api_key: "os.environ/GROQ_API_KEY" # ensure you have `GROQ_API_KEY` in your .env
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Start Proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Test it
|
||||||
|
|
||||||
|
Make request to litellm proxy
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="Curl" label="Curl Request">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data ' {
|
||||||
|
"model": "groq-llama3-8b-8192",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="openai" label="OpenAI v1.0.0+">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(model="groq-llama3-8b-8192", messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
])
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="langchain" label="Langchain">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from langchain.chat_models import ChatOpenAI
|
||||||
|
from langchain.prompts.chat import (
|
||||||
|
ChatPromptTemplate,
|
||||||
|
HumanMessagePromptTemplate,
|
||||||
|
SystemMessagePromptTemplate,
|
||||||
|
)
|
||||||
|
from langchain.schema import HumanMessage, SystemMessage
|
||||||
|
|
||||||
|
chat = ChatOpenAI(
|
||||||
|
openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy
|
||||||
|
model = "groq-llama3-8b-8192",
|
||||||
|
temperature=0.1
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
SystemMessage(
|
||||||
|
content="You are a helpful assistant that im using to make a test request to."
|
||||||
|
),
|
||||||
|
HumanMessage(
|
||||||
|
content="test from litellm. tell me why it's amazing in 1 sentence"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
response = chat(messages)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Supported Models - ALL Groq Models Supported!
|
## Supported Models - ALL Groq Models Supported!
|
||||||
We support ALL Groq models, just set `groq/` as a prefix when sending completion requests
|
We support ALL Groq models, just set `groq/` as a prefix when sending completion requests
|
||||||
|
|
||||||
|
@ -114,7 +212,7 @@ tools = [
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="groq/llama2-70b-4096",
|
model="groq/llama3-8b-8192",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
tool_choice="auto", # auto is default, but we'll be explicit
|
tool_choice="auto", # auto is default, but we'll be explicit
|
||||||
|
@ -154,7 +252,7 @@ if tool_calls:
|
||||||
) # extend conversation with function response
|
) # extend conversation with function response
|
||||||
print(f"messages: {messages}")
|
print(f"messages: {messages}")
|
||||||
second_response = litellm.completion(
|
second_response = litellm.completion(
|
||||||
model="groq/llama2-70b-4096", messages=messages
|
model="groq/llama3-8b-8192", messages=messages
|
||||||
) # get a new response from the model where it can see the function response
|
) # get a new response from the model where it can see the function response
|
||||||
print("second response\n", second_response)
|
print("second response\n", second_response)
|
||||||
```
|
```
|
||||||
|
|
|
@ -749,6 +749,85 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Llama 3 API
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|------------------|--------------------------------------|
|
||||||
|
| meta/llama3-405b-instruct-maas | `completion('vertex_ai/meta/llama3-405b-instruct-maas', messages)` |
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ""
|
||||||
|
|
||||||
|
model = "meta/llama3-405b-instruct-maas"
|
||||||
|
|
||||||
|
vertex_ai_project = "your-vertex-project" # can also set this as os.environ["VERTEXAI_PROJECT"]
|
||||||
|
vertex_ai_location = "your-vertex-location" # can also set this as os.environ["VERTEXAI_LOCATION"]
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="vertex_ai/" + model,
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
temperature=0.7,
|
||||||
|
vertex_ai_project=vertex_ai_project,
|
||||||
|
vertex_ai_location=vertex_ai_location,
|
||||||
|
)
|
||||||
|
print("\nModel Response", response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="Proxy">
|
||||||
|
|
||||||
|
**1. Add to config**
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: anthropic-llama
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/meta/llama3-405b-instruct-maas
|
||||||
|
vertex_ai_project: "my-test-project"
|
||||||
|
vertex_ai_location: "us-east-1"
|
||||||
|
- model_name: anthropic-llama
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/meta/llama3-405b-instruct-maas
|
||||||
|
vertex_ai_project: "my-test-project"
|
||||||
|
vertex_ai_location: "us-west-1"
|
||||||
|
```
|
||||||
|
|
||||||
|
**2. Start proxy**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING at http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Test it!**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "anthropic-llama", # 👈 the 'model_name' in config
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "what llm are you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
## Model Garden
|
## Model Garden
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|------------------|--------------------------------------|
|
|------------------|--------------------------------------|
|
||||||
|
|
|
@ -119,8 +119,8 @@ All Possible Alert Types
|
||||||
|
|
||||||
```python
|
```python
|
||||||
AlertType = Literal[
|
AlertType = Literal[
|
||||||
"llm_exceptions",
|
"llm_exceptions", # LLM API Exceptions
|
||||||
"llm_too_slow",
|
"llm_too_slow", # LLM Responses slower than alerting_threshold
|
||||||
"llm_requests_hanging",
|
"llm_requests_hanging",
|
||||||
"budget_alerts",
|
"budget_alerts",
|
||||||
"db_exceptions",
|
"db_exceptions",
|
||||||
|
@ -133,6 +133,61 @@ AlertType = Literal[
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Advanced - set specific slack channels per alert type
|
||||||
|
|
||||||
|
Use this if you want to set specific channels per alert type
|
||||||
|
|
||||||
|
**This allows you to do the following**
|
||||||
|
```
|
||||||
|
llm_exceptions -> go to slack channel #llm-exceptions
|
||||||
|
spend_reports -> go to slack channel #llm-spend-reports
|
||||||
|
```
|
||||||
|
|
||||||
|
Set `alert_to_webhook_url` on your config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
alerting: ["slack"]
|
||||||
|
alerting_threshold: 0.0001 # (Seconds) set an artifically low threshold for testing alerting
|
||||||
|
alert_to_webhook_url: {
|
||||||
|
"llm_exceptions": "https://hooks.slack.com/services/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",
|
||||||
|
"llm_too_slow": "https://hooks.slack.com/services/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",
|
||||||
|
"llm_requests_hanging": "https://hooks.slack.com/services/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",
|
||||||
|
"budget_alerts": "https://hooks.slack.com/services/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",
|
||||||
|
"db_exceptions": "https://hooks.slack.com/services/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",
|
||||||
|
"daily_reports": "https://hooks.slack.com/services/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",
|
||||||
|
"spend_reports": "https://hooks.slack.com/services/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",
|
||||||
|
"cooldown_deployment": "https://hooks.slack.com/services/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",
|
||||||
|
"new_model_added": "https://hooks.slack.com/services/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",
|
||||||
|
"outage_alerts": "https://hooks.slack.com/services/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",
|
||||||
|
}
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["langfuse"]
|
||||||
|
```
|
||||||
|
|
||||||
|
Test it - send a valid llm request - expect to see a `llm_too_slow` alert in it's own slack channel
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -i http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello, Claude gm!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## Advanced - Using MS Teams Webhooks
|
## Advanced - Using MS Teams Webhooks
|
||||||
|
|
||||||
|
|
|
@ -266,6 +266,54 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Disable team from turning on/off guardrails
|
||||||
|
|
||||||
|
|
||||||
|
### 1. Disable team from modifying guardrails
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/team/update' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-D '{
|
||||||
|
"team_id": "4198d93c-d375-4c83-8d5a-71e7c5473e50",
|
||||||
|
"metadata": {"guardrails": {"modify_guardrails": false}}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Try to disable guardrails for a call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
--data '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Think of 10 random colors."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {"guardrails": {"hide_secrets": false}}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Get 403 Error
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"error": {
|
||||||
|
"message": {
|
||||||
|
"error": "Your team does not have permission to modify guardrails."
|
||||||
|
},
|
||||||
|
"type": "auth_error",
|
||||||
|
"param": "None",
|
||||||
|
"code": 403
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
Expect to NOT see `+1 412-612-9992` in your server logs on your callback.
|
Expect to NOT see `+1 412-612-9992` in your server logs on your callback.
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
|
@ -48,6 +48,20 @@ A number of these headers could be useful for troubleshooting, but the
|
||||||
`x-litellm-call-id` is the one that is most useful for tracking a request across
|
`x-litellm-call-id` is the one that is most useful for tracking a request across
|
||||||
components in your system, including in logging tools.
|
components in your system, including in logging tools.
|
||||||
|
|
||||||
|
## Redacting UserAPIKeyInfo
|
||||||
|
|
||||||
|
Redact information about the user api key (hashed token, user_id, team id, etc.), from logs.
|
||||||
|
|
||||||
|
Currently supported for Langfuse, OpenTelemetry, Logfire, ArizeAI logging.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["langfuse"]
|
||||||
|
redact_user_api_key_info: true
|
||||||
|
```
|
||||||
|
|
||||||
|
Removes any field with `user_api_key_*` from metadata.
|
||||||
|
|
||||||
## Logging Proxy Input/Output - Langfuse
|
## Logging Proxy Input/Output - Langfuse
|
||||||
|
|
||||||
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment
|
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse. Make sure to set `LANGFUSE_PUBLIC_KEY` and `LANGFUSE_SECRET_KEY` in your environment
|
||||||
|
@ -202,6 +216,9 @@ print(response)
|
||||||
|
|
||||||
### Team based Logging to Langfuse
|
### Team based Logging to Langfuse
|
||||||
|
|
||||||
|
[👉 Tutorial - Allow each team to use their own Langfuse Project / custom callbacks](team_logging)
|
||||||
|
<!--
|
||||||
|
|
||||||
**Example:**
|
**Example:**
|
||||||
|
|
||||||
This config would send langfuse logs to 2 different langfuse projects, based on the team id
|
This config would send langfuse logs to 2 different langfuse projects, based on the team id
|
||||||
|
@ -228,7 +245,7 @@ curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
-d '{"team_id": "ishaans-secret-project"}'
|
-d '{"team_id": "ishaans-secret-project"}'
|
||||||
```
|
```
|
||||||
|
|
||||||
All requests made with these keys will log data to their team-specific logging.
|
All requests made with these keys will log data to their team-specific logging. -->
|
||||||
|
|
||||||
### Redacting Messages, Response Content from Langfuse Logging
|
### Redacting Messages, Response Content from Langfuse Logging
|
||||||
|
|
||||||
|
@ -1106,6 +1123,52 @@ environment_variables:
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data ' {
|
||||||
|
"model": "fake-openai-endpoint",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, Claude gm!"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
Expect to see your log on Langfuse
|
||||||
|
<Image img={require('../../img/langsmith_new.png')} />
|
||||||
|
|
||||||
|
|
||||||
|
## Logging LLM IO to Arize AI
|
||||||
|
|
||||||
|
1. Set `success_callback: ["arize"]` on litellm config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["arize"]
|
||||||
|
|
||||||
|
environment_variables:
|
||||||
|
ARIZE_SPACE_KEY: "d0*****"
|
||||||
|
ARIZE_API_KEY: "141a****"
|
||||||
|
```
|
||||||
|
|
||||||
2. Start Proxy
|
2. Start Proxy
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
|
@ -70,3 +70,42 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
"user": "usha"
|
"user": "usha"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Team Based Logging
|
||||||
|
|
||||||
|
[👉 Tutorial - Allow each team to use their own Langfuse Project / custom callbacks](team_logging.md)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!--
|
||||||
|
## Logging / Caching
|
||||||
|
|
||||||
|
Turn on/off logging and caching for a specific team id.
|
||||||
|
|
||||||
|
**Example:**
|
||||||
|
|
||||||
|
This config would send langfuse logs to 2 different langfuse projects, based on the team id
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
default_team_settings:
|
||||||
|
- team_id: my-secret-project
|
||||||
|
success_callback: ["langfuse"]
|
||||||
|
langfuse_public_key: os.environ/LANGFUSE_PUB_KEY_1 # Project 1
|
||||||
|
langfuse_secret: os.environ/LANGFUSE_PRIVATE_KEY_1 # Project 1
|
||||||
|
- team_id: ishaans-secret-project
|
||||||
|
success_callback: ["langfuse"]
|
||||||
|
langfuse_public_key: os.environ/LANGFUSE_PUB_KEY_2 # Project 2
|
||||||
|
langfuse_secret: os.environ/LANGFUSE_SECRET_2 # Project 2
|
||||||
|
```
|
||||||
|
|
||||||
|
Now, when you [generate keys](./virtual_keys.md) for this team-id
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{"team_id": "ishaans-secret-project"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
All requests made with these keys will log data to their team-specific logging. -->
|
||||||
|
|
144
docs/my-website/docs/proxy/team_logging.md
Normal file
144
docs/my-website/docs/proxy/team_logging.md
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# 👥📊 Team Based Logging
|
||||||
|
|
||||||
|
Allow each team to use their own Langfuse Project / custom callbacks
|
||||||
|
|
||||||
|
**This allows you to do the following**
|
||||||
|
```
|
||||||
|
Team 1 -> Logs to Langfuse Project 1
|
||||||
|
Team 2 -> Logs to Langfuse Project 2
|
||||||
|
Team 3 -> Disabled Logging (for GDPR compliance)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Set Callbacks Per Team
|
||||||
|
|
||||||
|
### 1. Set callback for team
|
||||||
|
|
||||||
|
We make a request to `POST /team/{team_id}/callback` to add a callback for
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -X POST 'http:/localhost:4000/team/dbe2f686-a686-4896-864a-4c3924458709/callback' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"callback_name": "langfuse",
|
||||||
|
"callback_type": "success",
|
||||||
|
"callback_vars": {
|
||||||
|
"langfuse_public_key": "pk",
|
||||||
|
"langfuse_secret_key": "sk_",
|
||||||
|
"langfuse_host": "https://cloud.langfuse.com"
|
||||||
|
}
|
||||||
|
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Supported Values
|
||||||
|
|
||||||
|
| Field | Supported Values | Notes |
|
||||||
|
|-------|------------------|-------|
|
||||||
|
| `callback_name` | `"langfuse"` | Currently only supports "langfuse" |
|
||||||
|
| `callback_type` | `"success"`, `"failure"`, `"success_and_failure"` | |
|
||||||
|
| `callback_vars` | | dict of callback settings |
|
||||||
|
| `langfuse_public_key` | string | Required |
|
||||||
|
| `langfuse_secret_key` | string | Required |
|
||||||
|
| `langfuse_host` | string | Optional (defaults to https://cloud.langfuse.com) |
|
||||||
|
|
||||||
|
### 2. Create key for team
|
||||||
|
|
||||||
|
All keys created for team `dbe2f686-a686-4896-864a-4c3924458709` will log to langfuse project specified on [Step 1. Set callback for team](#1-set-callback-for-team)
|
||||||
|
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"team_id": "dbe2f686-a686-4896-864a-4c3924458709"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### 3. Make `/chat/completion` request for team
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -i http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-KbUuE0WNptC0jXapyMmLBA" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello, Claude gm!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expect this to be logged on the langfuse project specified on [Step 1. Set callback for team](#1-set-callback-for-team)
|
||||||
|
|
||||||
|
|
||||||
|
## Disable Logging for a Team
|
||||||
|
|
||||||
|
To disable logging for a specific team, you can use the following endpoint:
|
||||||
|
|
||||||
|
`POST /team/{team_id}/disable_logging`
|
||||||
|
|
||||||
|
This endpoint removes all success and failure callbacks for the specified team, effectively disabling logging.
|
||||||
|
|
||||||
|
### Step 1. Disable logging for team
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -X POST 'http://localhost:4000/team/YOUR_TEAM_ID/disable_logging' \
|
||||||
|
-H 'Authorization: Bearer YOUR_API_KEY'
|
||||||
|
```
|
||||||
|
Replace YOUR_TEAM_ID with the actual team ID
|
||||||
|
|
||||||
|
**Response**
|
||||||
|
A successful request will return a response similar to this:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "success",
|
||||||
|
"message": "Logging disabled for team YOUR_TEAM_ID",
|
||||||
|
"data": {
|
||||||
|
"team_id": "YOUR_TEAM_ID",
|
||||||
|
"success_callbacks": [],
|
||||||
|
"failure_callbacks": []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2. Test it - `/chat/completions`
|
||||||
|
|
||||||
|
Use a key generated for team = `team_id` - you should see no logs on your configured success callback (eg. Langfuse)
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -i http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-KbUuE0WNptC0jXapyMmLBA" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello, Claude gm!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Debugging / Troubleshooting
|
||||||
|
|
||||||
|
- Check active callbacks for team using `GET /team/{team_id}/callback`
|
||||||
|
|
||||||
|
Use this to check what success/failure callbacks are active for team=`team_id`
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -X GET 'http://localhost:4000/team/dbe2f686-a686-4896-864a-4c3924458709/callback' \
|
||||||
|
-H 'Authorization: Bearer sk-1234'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Team Logging Endpoints
|
||||||
|
|
||||||
|
- [`POST /team/{team_id}/callback` Add a success/failure callback to a team](https://litellm-api.up.railway.app/#/team%20management/add_team_callbacks_team__team_id__callback_post)
|
||||||
|
- [`GET /team/{team_id}/callback` - Get the success/failure callbacks and variables for a team](https://litellm-api.up.railway.app/#/team%20management/get_team_callbacks_team__team_id__callback_get)
|
||||||
|
|
||||||
|
|
||||||
|
|
BIN
docs/my-website/img/raw_response_headers.png
Normal file
BIN
docs/my-website/img/raw_response_headers.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 117 KiB |
|
@ -44,19 +44,20 @@ const sidebars = {
|
||||||
"proxy/cost_tracking",
|
"proxy/cost_tracking",
|
||||||
"proxy/self_serve",
|
"proxy/self_serve",
|
||||||
"proxy/virtual_keys",
|
"proxy/virtual_keys",
|
||||||
"proxy/tag_routing",
|
|
||||||
"proxy/users",
|
|
||||||
"proxy/team_budgets",
|
|
||||||
"proxy/customers",
|
|
||||||
"proxy/billing",
|
|
||||||
"proxy/guardrails",
|
|
||||||
"proxy/token_auth",
|
|
||||||
"proxy/alerting",
|
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "🪢 Logging",
|
label: "🪢 Logging",
|
||||||
items: ["proxy/logging", "proxy/streaming_logging"],
|
items: ["proxy/logging", "proxy/streaming_logging"],
|
||||||
},
|
},
|
||||||
|
"proxy/team_logging",
|
||||||
|
"proxy/guardrails",
|
||||||
|
"proxy/tag_routing",
|
||||||
|
"proxy/users",
|
||||||
|
"proxy/team_budgets",
|
||||||
|
"proxy/customers",
|
||||||
|
"proxy/billing",
|
||||||
|
"proxy/token_auth",
|
||||||
|
"proxy/alerting",
|
||||||
"proxy/ui",
|
"proxy/ui",
|
||||||
"proxy/prometheus",
|
"proxy/prometheus",
|
||||||
"proxy/pass_through",
|
"proxy/pass_through",
|
||||||
|
@ -157,6 +158,7 @@ const sidebars = {
|
||||||
"providers/triton-inference-server",
|
"providers/triton-inference-server",
|
||||||
"providers/ollama",
|
"providers/ollama",
|
||||||
"providers/perplexity",
|
"providers/perplexity",
|
||||||
|
"providers/friendliai",
|
||||||
"providers/groq",
|
"providers/groq",
|
||||||
"providers/deepseek",
|
"providers/deepseek",
|
||||||
"providers/fireworks_ai",
|
"providers/fireworks_ai",
|
||||||
|
@ -183,7 +185,14 @@ const sidebars = {
|
||||||
"scheduler",
|
"scheduler",
|
||||||
"set_keys",
|
"set_keys",
|
||||||
"budget_manager",
|
"budget_manager",
|
||||||
"secret",
|
{
|
||||||
|
type: "category",
|
||||||
|
label: "Secret Manager",
|
||||||
|
items: [
|
||||||
|
"secret",
|
||||||
|
"oidc"
|
||||||
|
]
|
||||||
|
},
|
||||||
"completion/token_usage",
|
"completion/token_usage",
|
||||||
"load_test",
|
"load_test",
|
||||||
{
|
{
|
||||||
|
@ -192,17 +201,19 @@ const sidebars = {
|
||||||
items: [
|
items: [
|
||||||
"observability/langfuse_integration",
|
"observability/langfuse_integration",
|
||||||
"observability/logfire_integration",
|
"observability/logfire_integration",
|
||||||
|
"observability/langsmith_integration",
|
||||||
|
"observability/arize_integration",
|
||||||
"debugging/local_debugging",
|
"debugging/local_debugging",
|
||||||
"observability/raw_request_response",
|
"observability/raw_request_response",
|
||||||
"observability/custom_callback",
|
"observability/custom_callback",
|
||||||
"observability/scrub_data",
|
"observability/scrub_data",
|
||||||
"observability/helicone_integration",
|
"observability/braintrust",
|
||||||
"observability/sentry",
|
"observability/sentry",
|
||||||
"observability/lago",
|
"observability/lago",
|
||||||
|
"observability/helicone_integration",
|
||||||
"observability/openmeter",
|
"observability/openmeter",
|
||||||
"observability/promptlayer_integration",
|
"observability/promptlayer_integration",
|
||||||
"observability/wandb_integration",
|
"observability/wandb_integration",
|
||||||
"observability/langsmith_integration",
|
|
||||||
"observability/slack_integration",
|
"observability/slack_integration",
|
||||||
"observability/traceloop_integration",
|
"observability/traceloop_integration",
|
||||||
"observability/athina_integration",
|
"observability/athina_integration",
|
||||||
|
|
|
@ -4,7 +4,7 @@ import warnings
|
||||||
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
|
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
|
||||||
### INIT VARIABLES ###
|
### INIT VARIABLES ###
|
||||||
import threading, requests, os
|
import threading, requests, os
|
||||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal
|
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.caching import Cache
|
from litellm.caching import Cache
|
||||||
from litellm._logging import (
|
from litellm._logging import (
|
||||||
|
@ -38,8 +38,18 @@ success_callback: List[Union[str, Callable]] = []
|
||||||
failure_callback: List[Union[str, Callable]] = []
|
failure_callback: List[Union[str, Callable]] = []
|
||||||
service_callback: List[Union[str, Callable]] = []
|
service_callback: List[Union[str, Callable]] = []
|
||||||
_custom_logger_compatible_callbacks_literal = Literal[
|
_custom_logger_compatible_callbacks_literal = Literal[
|
||||||
"lago", "openmeter", "logfire", "dynamic_rate_limiter", "langsmith", "galileo"
|
"lago",
|
||||||
|
"openmeter",
|
||||||
|
"logfire",
|
||||||
|
"dynamic_rate_limiter",
|
||||||
|
"langsmith",
|
||||||
|
"galileo",
|
||||||
|
"braintrust",
|
||||||
|
"arize",
|
||||||
]
|
]
|
||||||
|
_known_custom_logger_compatible_callbacks: List = list(
|
||||||
|
get_args(_custom_logger_compatible_callbacks_literal)
|
||||||
|
)
|
||||||
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
callbacks: List[Union[Callable, _custom_logger_compatible_callbacks_literal]] = []
|
||||||
_langfuse_default_tags: Optional[
|
_langfuse_default_tags: Optional[
|
||||||
List[
|
List[
|
||||||
|
@ -67,6 +77,7 @@ post_call_rules: List[Callable] = []
|
||||||
turn_off_message_logging: Optional[bool] = False
|
turn_off_message_logging: Optional[bool] = False
|
||||||
log_raw_request_response: bool = False
|
log_raw_request_response: bool = False
|
||||||
redact_messages_in_exceptions: Optional[bool] = False
|
redact_messages_in_exceptions: Optional[bool] = False
|
||||||
|
redact_user_api_key_info: Optional[bool] = False
|
||||||
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
store_audit_logs = False # Enterprise feature, allow users to see audit logs
|
||||||
## end of callbacks #############
|
## end of callbacks #############
|
||||||
|
|
||||||
|
@ -346,6 +357,7 @@ vertex_text_models: List = []
|
||||||
vertex_code_text_models: List = []
|
vertex_code_text_models: List = []
|
||||||
vertex_embedding_models: List = []
|
vertex_embedding_models: List = []
|
||||||
vertex_anthropic_models: List = []
|
vertex_anthropic_models: List = []
|
||||||
|
vertex_llama3_models: List = []
|
||||||
ai21_models: List = []
|
ai21_models: List = []
|
||||||
nlp_cloud_models: List = []
|
nlp_cloud_models: List = []
|
||||||
aleph_alpha_models: List = []
|
aleph_alpha_models: List = []
|
||||||
|
@ -388,6 +400,9 @@ for key, value in model_cost.items():
|
||||||
elif value.get("litellm_provider") == "vertex_ai-anthropic_models":
|
elif value.get("litellm_provider") == "vertex_ai-anthropic_models":
|
||||||
key = key.replace("vertex_ai/", "")
|
key = key.replace("vertex_ai/", "")
|
||||||
vertex_anthropic_models.append(key)
|
vertex_anthropic_models.append(key)
|
||||||
|
elif value.get("litellm_provider") == "vertex_ai-llama_models":
|
||||||
|
key = key.replace("vertex_ai/", "")
|
||||||
|
vertex_llama3_models.append(key)
|
||||||
elif value.get("litellm_provider") == "ai21":
|
elif value.get("litellm_provider") == "ai21":
|
||||||
ai21_models.append(key)
|
ai21_models.append(key)
|
||||||
elif value.get("litellm_provider") == "nlp_cloud":
|
elif value.get("litellm_provider") == "nlp_cloud":
|
||||||
|
@ -817,6 +832,7 @@ from .llms.petals import PetalsConfig
|
||||||
from .llms.vertex_httpx import VertexGeminiConfig, GoogleAIStudioGeminiConfig
|
from .llms.vertex_httpx import VertexGeminiConfig, GoogleAIStudioGeminiConfig
|
||||||
from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
|
from .llms.vertex_ai import VertexAIConfig, VertexAITextEmbeddingConfig
|
||||||
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
|
from .llms.vertex_ai_anthropic import VertexAIAnthropicConfig
|
||||||
|
from .llms.vertex_ai_llama import VertexAILlama3Config
|
||||||
from .llms.sagemaker import SagemakerConfig
|
from .llms.sagemaker import SagemakerConfig
|
||||||
from .llms.ollama import OllamaConfig
|
from .llms.ollama import OllamaConfig
|
||||||
from .llms.ollama_chat import OllamaChatConfig
|
from .llms.ollama_chat import OllamaChatConfig
|
||||||
|
@ -872,6 +888,7 @@ from .exceptions import (
|
||||||
APIError,
|
APIError,
|
||||||
Timeout,
|
Timeout,
|
||||||
APIConnectionError,
|
APIConnectionError,
|
||||||
|
UnsupportedParamsError,
|
||||||
APIResponseValidationError,
|
APIResponseValidationError,
|
||||||
UnprocessableEntityError,
|
UnprocessableEntityError,
|
||||||
InternalServerError,
|
InternalServerError,
|
||||||
|
|
|
@ -682,11 +682,39 @@ class JSONSchemaValidationError(APIError):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class UnsupportedParamsError(BadRequestError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message,
|
||||||
|
llm_provider: Optional[str] = None,
|
||||||
|
model: Optional[str] = None,
|
||||||
|
status_code: int = 400,
|
||||||
|
response: Optional[httpx.Response] = None,
|
||||||
|
litellm_debug_info: Optional[str] = None,
|
||||||
|
max_retries: Optional[int] = None,
|
||||||
|
num_retries: Optional[int] = None,
|
||||||
|
):
|
||||||
|
self.status_code = 400
|
||||||
|
self.message = "litellm.UnsupportedParamsError: {}".format(message)
|
||||||
|
self.model = model
|
||||||
|
self.llm_provider = llm_provider
|
||||||
|
self.litellm_debug_info = litellm_debug_info
|
||||||
|
response = response or httpx.Response(
|
||||||
|
status_code=self.status_code,
|
||||||
|
request=httpx.Request(
|
||||||
|
method="GET", url="https://litellm.ai"
|
||||||
|
), # mock request object
|
||||||
|
)
|
||||||
|
self.max_retries = max_retries
|
||||||
|
self.num_retries = num_retries
|
||||||
|
|
||||||
|
|
||||||
LITELLM_EXCEPTION_TYPES = [
|
LITELLM_EXCEPTION_TYPES = [
|
||||||
AuthenticationError,
|
AuthenticationError,
|
||||||
NotFoundError,
|
NotFoundError,
|
||||||
BadRequestError,
|
BadRequestError,
|
||||||
UnprocessableEntityError,
|
UnprocessableEntityError,
|
||||||
|
UnsupportedParamsError,
|
||||||
Timeout,
|
Timeout,
|
||||||
PermissionDeniedError,
|
PermissionDeniedError,
|
||||||
RateLimitError,
|
RateLimitError,
|
||||||
|
|
286
litellm/integrations/_types/open_inference.py
Normal file
286
litellm/integrations/_types/open_inference.py
Normal file
|
@ -0,0 +1,286 @@
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class SpanAttributes:
|
||||||
|
OUTPUT_VALUE = "output.value"
|
||||||
|
OUTPUT_MIME_TYPE = "output.mime_type"
|
||||||
|
"""
|
||||||
|
The type of output.value. If unspecified, the type is plain text by default.
|
||||||
|
If type is JSON, the value is a string representing a JSON object.
|
||||||
|
"""
|
||||||
|
INPUT_VALUE = "input.value"
|
||||||
|
INPUT_MIME_TYPE = "input.mime_type"
|
||||||
|
"""
|
||||||
|
The type of input.value. If unspecified, the type is plain text by default.
|
||||||
|
If type is JSON, the value is a string representing a JSON object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
EMBEDDING_EMBEDDINGS = "embedding.embeddings"
|
||||||
|
"""
|
||||||
|
A list of objects containing embedding data, including the vector and represented piece of text.
|
||||||
|
"""
|
||||||
|
EMBEDDING_MODEL_NAME = "embedding.model_name"
|
||||||
|
"""
|
||||||
|
The name of the embedding model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
LLM_FUNCTION_CALL = "llm.function_call"
|
||||||
|
"""
|
||||||
|
For models and APIs that support function calling. Records attributes such as the function
|
||||||
|
name and arguments to the called function.
|
||||||
|
"""
|
||||||
|
LLM_INVOCATION_PARAMETERS = "llm.invocation_parameters"
|
||||||
|
"""
|
||||||
|
Invocation parameters passed to the LLM or API, such as the model name, temperature, etc.
|
||||||
|
"""
|
||||||
|
LLM_INPUT_MESSAGES = "llm.input_messages"
|
||||||
|
"""
|
||||||
|
Messages provided to a chat API.
|
||||||
|
"""
|
||||||
|
LLM_OUTPUT_MESSAGES = "llm.output_messages"
|
||||||
|
"""
|
||||||
|
Messages received from a chat API.
|
||||||
|
"""
|
||||||
|
LLM_MODEL_NAME = "llm.model_name"
|
||||||
|
"""
|
||||||
|
The name of the model being used.
|
||||||
|
"""
|
||||||
|
LLM_PROMPTS = "llm.prompts"
|
||||||
|
"""
|
||||||
|
Prompts provided to a completions API.
|
||||||
|
"""
|
||||||
|
LLM_PROMPT_TEMPLATE = "llm.prompt_template.template"
|
||||||
|
"""
|
||||||
|
The prompt template as a Python f-string.
|
||||||
|
"""
|
||||||
|
LLM_PROMPT_TEMPLATE_VARIABLES = "llm.prompt_template.variables"
|
||||||
|
"""
|
||||||
|
A list of input variables to the prompt template.
|
||||||
|
"""
|
||||||
|
LLM_PROMPT_TEMPLATE_VERSION = "llm.prompt_template.version"
|
||||||
|
"""
|
||||||
|
The version of the prompt template being used.
|
||||||
|
"""
|
||||||
|
LLM_TOKEN_COUNT_PROMPT = "llm.token_count.prompt"
|
||||||
|
"""
|
||||||
|
Number of tokens in the prompt.
|
||||||
|
"""
|
||||||
|
LLM_TOKEN_COUNT_COMPLETION = "llm.token_count.completion"
|
||||||
|
"""
|
||||||
|
Number of tokens in the completion.
|
||||||
|
"""
|
||||||
|
LLM_TOKEN_COUNT_TOTAL = "llm.token_count.total"
|
||||||
|
"""
|
||||||
|
Total number of tokens, including both prompt and completion.
|
||||||
|
"""
|
||||||
|
|
||||||
|
TOOL_NAME = "tool.name"
|
||||||
|
"""
|
||||||
|
Name of the tool being used.
|
||||||
|
"""
|
||||||
|
TOOL_DESCRIPTION = "tool.description"
|
||||||
|
"""
|
||||||
|
Description of the tool's purpose, typically used to select the tool.
|
||||||
|
"""
|
||||||
|
TOOL_PARAMETERS = "tool.parameters"
|
||||||
|
"""
|
||||||
|
Parameters of the tool represented a dictionary JSON string, e.g.
|
||||||
|
see https://platform.openai.com/docs/guides/gpt/function-calling
|
||||||
|
"""
|
||||||
|
|
||||||
|
RETRIEVAL_DOCUMENTS = "retrieval.documents"
|
||||||
|
|
||||||
|
METADATA = "metadata"
|
||||||
|
"""
|
||||||
|
Metadata attributes are used to store user-defined key-value pairs.
|
||||||
|
For example, LangChain uses metadata to store user-defined attributes for a chain.
|
||||||
|
"""
|
||||||
|
|
||||||
|
TAG_TAGS = "tag.tags"
|
||||||
|
"""
|
||||||
|
Custom categorical tags for the span.
|
||||||
|
"""
|
||||||
|
|
||||||
|
OPENINFERENCE_SPAN_KIND = "openinference.span.kind"
|
||||||
|
|
||||||
|
SESSION_ID = "session.id"
|
||||||
|
"""
|
||||||
|
The id of the session
|
||||||
|
"""
|
||||||
|
USER_ID = "user.id"
|
||||||
|
"""
|
||||||
|
The id of the user
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class MessageAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for a message sent to or from an LLM
|
||||||
|
"""
|
||||||
|
|
||||||
|
MESSAGE_ROLE = "message.role"
|
||||||
|
"""
|
||||||
|
The role of the message, such as "user", "agent", "function".
|
||||||
|
"""
|
||||||
|
MESSAGE_CONTENT = "message.content"
|
||||||
|
"""
|
||||||
|
The content of the message to or from the llm, must be a string.
|
||||||
|
"""
|
||||||
|
MESSAGE_CONTENTS = "message.contents"
|
||||||
|
"""
|
||||||
|
The message contents to the llm, it is an array of
|
||||||
|
`message_content` prefixed attributes.
|
||||||
|
"""
|
||||||
|
MESSAGE_NAME = "message.name"
|
||||||
|
"""
|
||||||
|
The name of the message, often used to identify the function
|
||||||
|
that was used to generate the message.
|
||||||
|
"""
|
||||||
|
MESSAGE_TOOL_CALLS = "message.tool_calls"
|
||||||
|
"""
|
||||||
|
The tool calls generated by the model, such as function calls.
|
||||||
|
"""
|
||||||
|
MESSAGE_FUNCTION_CALL_NAME = "message.function_call_name"
|
||||||
|
"""
|
||||||
|
The function name that is a part of the message list.
|
||||||
|
This is populated for role 'function' or 'agent' as a mechanism to identify
|
||||||
|
the function that was called during the execution of a tool.
|
||||||
|
"""
|
||||||
|
MESSAGE_FUNCTION_CALL_ARGUMENTS_JSON = "message.function_call_arguments_json"
|
||||||
|
"""
|
||||||
|
The JSON string representing the arguments passed to the function
|
||||||
|
during a function call.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class MessageContentAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for the contents of user messages sent to an LLM.
|
||||||
|
"""
|
||||||
|
|
||||||
|
MESSAGE_CONTENT_TYPE = "message_content.type"
|
||||||
|
"""
|
||||||
|
The type of the content, such as "text" or "image".
|
||||||
|
"""
|
||||||
|
MESSAGE_CONTENT_TEXT = "message_content.text"
|
||||||
|
"""
|
||||||
|
The text content of the message, if the type is "text".
|
||||||
|
"""
|
||||||
|
MESSAGE_CONTENT_IMAGE = "message_content.image"
|
||||||
|
"""
|
||||||
|
The image content of the message, if the type is "image".
|
||||||
|
An image can be made available to the model by passing a link to
|
||||||
|
the image or by passing the base64 encoded image directly in the
|
||||||
|
request.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ImageAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for images
|
||||||
|
"""
|
||||||
|
|
||||||
|
IMAGE_URL = "image.url"
|
||||||
|
"""
|
||||||
|
An http or base64 image url
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for a document.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DOCUMENT_ID = "document.id"
|
||||||
|
"""
|
||||||
|
The id of the document.
|
||||||
|
"""
|
||||||
|
DOCUMENT_SCORE = "document.score"
|
||||||
|
"""
|
||||||
|
The score of the document
|
||||||
|
"""
|
||||||
|
DOCUMENT_CONTENT = "document.content"
|
||||||
|
"""
|
||||||
|
The content of the document.
|
||||||
|
"""
|
||||||
|
DOCUMENT_METADATA = "document.metadata"
|
||||||
|
"""
|
||||||
|
The metadata of the document represented as a dictionary
|
||||||
|
JSON string, e.g. `"{ 'title': 'foo' }"`
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class RerankerAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for a reranker
|
||||||
|
"""
|
||||||
|
|
||||||
|
RERANKER_INPUT_DOCUMENTS = "reranker.input_documents"
|
||||||
|
"""
|
||||||
|
List of documents as input to the reranker
|
||||||
|
"""
|
||||||
|
RERANKER_OUTPUT_DOCUMENTS = "reranker.output_documents"
|
||||||
|
"""
|
||||||
|
List of documents as output from the reranker
|
||||||
|
"""
|
||||||
|
RERANKER_QUERY = "reranker.query"
|
||||||
|
"""
|
||||||
|
Query string for the reranker
|
||||||
|
"""
|
||||||
|
RERANKER_MODEL_NAME = "reranker.model_name"
|
||||||
|
"""
|
||||||
|
Model name of the reranker
|
||||||
|
"""
|
||||||
|
RERANKER_TOP_K = "reranker.top_k"
|
||||||
|
"""
|
||||||
|
Top K parameter of the reranker
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for an embedding
|
||||||
|
"""
|
||||||
|
|
||||||
|
EMBEDDING_TEXT = "embedding.text"
|
||||||
|
"""
|
||||||
|
The text represented by the embedding.
|
||||||
|
"""
|
||||||
|
EMBEDDING_VECTOR = "embedding.vector"
|
||||||
|
"""
|
||||||
|
The embedding vector.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class ToolCallAttributes:
|
||||||
|
"""
|
||||||
|
Attributes for a tool call
|
||||||
|
"""
|
||||||
|
|
||||||
|
TOOL_CALL_FUNCTION_NAME = "tool_call.function.name"
|
||||||
|
"""
|
||||||
|
The name of function that is being called during a tool call.
|
||||||
|
"""
|
||||||
|
TOOL_CALL_FUNCTION_ARGUMENTS_JSON = "tool_call.function.arguments"
|
||||||
|
"""
|
||||||
|
The JSON string representing the arguments passed to the function
|
||||||
|
during a tool call.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class OpenInferenceSpanKindValues(Enum):
|
||||||
|
TOOL = "TOOL"
|
||||||
|
CHAIN = "CHAIN"
|
||||||
|
LLM = "LLM"
|
||||||
|
RETRIEVER = "RETRIEVER"
|
||||||
|
EMBEDDING = "EMBEDDING"
|
||||||
|
AGENT = "AGENT"
|
||||||
|
RERANKER = "RERANKER"
|
||||||
|
UNKNOWN = "UNKNOWN"
|
||||||
|
GUARDRAIL = "GUARDRAIL"
|
||||||
|
EVALUATOR = "EVALUATOR"
|
||||||
|
|
||||||
|
|
||||||
|
class OpenInferenceMimeTypeValues(Enum):
|
||||||
|
TEXT = "text/plain"
|
||||||
|
JSON = "application/json"
|
114
litellm/integrations/arize_ai.py
Normal file
114
litellm/integrations/arize_ai.py
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
"""
|
||||||
|
arize AI is OTEL compatible
|
||||||
|
|
||||||
|
this file has Arize ai specific helper functions
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from opentelemetry.trace import Span as _Span
|
||||||
|
|
||||||
|
Span = _Span
|
||||||
|
else:
|
||||||
|
Span = Any
|
||||||
|
|
||||||
|
|
||||||
|
def set_arize_ai_attributes(span: Span, kwargs, response_obj):
|
||||||
|
from litellm.integrations._types.open_inference import (
|
||||||
|
MessageAttributes,
|
||||||
|
MessageContentAttributes,
|
||||||
|
OpenInferenceSpanKindValues,
|
||||||
|
SpanAttributes,
|
||||||
|
)
|
||||||
|
|
||||||
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
litellm_params = kwargs.get("litellm_params", {}) or {}
|
||||||
|
|
||||||
|
#############################################
|
||||||
|
############ LLM CALL METADATA ##############
|
||||||
|
#############################################
|
||||||
|
# commented out for now - looks like Arize AI could not log this
|
||||||
|
# metadata = litellm_params.get("metadata", {}) or {}
|
||||||
|
# span.set_attribute(SpanAttributes.METADATA, str(metadata))
|
||||||
|
|
||||||
|
#############################################
|
||||||
|
########## LLM Request Attributes ###########
|
||||||
|
#############################################
|
||||||
|
|
||||||
|
# The name of the LLM a request is being made to
|
||||||
|
if kwargs.get("model"):
|
||||||
|
span.set_attribute(SpanAttributes.LLM_MODEL_NAME, kwargs.get("model"))
|
||||||
|
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.OPENINFERENCE_SPAN_KIND, OpenInferenceSpanKindValues.LLM.value
|
||||||
|
)
|
||||||
|
messages = kwargs.get("messages")
|
||||||
|
|
||||||
|
# for /chat/completions
|
||||||
|
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||||
|
if messages:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.INPUT_VALUE,
|
||||||
|
messages[-1].get("content", ""), # get the last message for input
|
||||||
|
)
|
||||||
|
|
||||||
|
# LLM_INPUT_MESSAGES shows up under `input_messages` tab on the span page
|
||||||
|
for idx, msg in enumerate(messages):
|
||||||
|
# Set the role per message
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_ROLE}",
|
||||||
|
msg["role"],
|
||||||
|
)
|
||||||
|
# Set the content per message
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_INPUT_MESSAGES}.{idx}.{MessageAttributes.MESSAGE_CONTENT}",
|
||||||
|
msg.get("content", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
# The Generative AI Provider: Azure, OpenAI, etc.
|
||||||
|
span.set_attribute(SpanAttributes.LLM_INVOCATION_PARAMETERS, str(optional_params))
|
||||||
|
|
||||||
|
if optional_params.get("user"):
|
||||||
|
span.set_attribute(SpanAttributes.USER_ID, optional_params.get("user"))
|
||||||
|
|
||||||
|
#############################################
|
||||||
|
########## LLM Response Attributes ##########
|
||||||
|
# https://docs.arize.com/arize/large-language-models/tracing/semantic-conventions
|
||||||
|
#############################################
|
||||||
|
for choice in response_obj.get("choices"):
|
||||||
|
response_message = choice.get("message", {})
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.OUTPUT_VALUE, response_message.get("content", "")
|
||||||
|
)
|
||||||
|
|
||||||
|
# This shows up under `output_messages` tab on the span page
|
||||||
|
# This code assumes a single response
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_ROLE}",
|
||||||
|
response_message["role"],
|
||||||
|
)
|
||||||
|
span.set_attribute(
|
||||||
|
f"{SpanAttributes.LLM_OUTPUT_MESSAGES}.0.{MessageAttributes.MESSAGE_CONTENT}",
|
||||||
|
response_message.get("content", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
usage = response_obj.get("usage")
|
||||||
|
if usage:
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_TOKEN_COUNT_TOTAL,
|
||||||
|
usage.get("total_tokens"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# The number of tokens used in the LLM response (completion).
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_TOKEN_COUNT_COMPLETION,
|
||||||
|
usage.get("completion_tokens"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# The number of tokens used in the LLM prompt.
|
||||||
|
span.set_attribute(
|
||||||
|
SpanAttributes.LLM_TOKEN_COUNT_PROMPT,
|
||||||
|
usage.get("prompt_tokens"),
|
||||||
|
)
|
||||||
|
pass
|
369
litellm/integrations/braintrust_logging.py
Normal file
369
litellm/integrations/braintrust_logging.py
Normal file
|
@ -0,0 +1,369 @@
|
||||||
|
# What is this?
|
||||||
|
## Log success + failure events to Braintrust
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
import traceback
|
||||||
|
import uuid
|
||||||
|
from typing import Literal, Optional
|
||||||
|
|
||||||
|
import dotenv
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import verbose_logger
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
from litellm.utils import get_formatted_prompt
|
||||||
|
|
||||||
|
global_braintrust_http_handler = AsyncHTTPHandler()
|
||||||
|
global_braintrust_sync_http_handler = HTTPHandler()
|
||||||
|
API_BASE = "https://api.braintrustdata.com/v1"
|
||||||
|
|
||||||
|
|
||||||
|
def get_utc_datetime():
|
||||||
|
import datetime as dt
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
if hasattr(dt, "UTC"):
|
||||||
|
return datetime.now(dt.UTC) # type: ignore
|
||||||
|
else:
|
||||||
|
return datetime.utcnow() # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
class BraintrustLogger(CustomLogger):
|
||||||
|
def __init__(
|
||||||
|
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||||
|
) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self.validate_environment(api_key=api_key)
|
||||||
|
self.api_base = api_base or API_BASE
|
||||||
|
self.default_project_id = None
|
||||||
|
self.api_key: str = api_key or os.getenv("BRAINTRUST_API_KEY") # type: ignore
|
||||||
|
self.headers = {
|
||||||
|
"Authorization": "Bearer " + self.api_key,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
def validate_environment(self, api_key: Optional[str]):
|
||||||
|
"""
|
||||||
|
Expects
|
||||||
|
BRAINTRUST_API_KEY
|
||||||
|
|
||||||
|
in the environment
|
||||||
|
"""
|
||||||
|
missing_keys = []
|
||||||
|
if api_key is None and os.getenv("BRAINTRUST_API_KEY", None) is None:
|
||||||
|
missing_keys.append("BRAINTRUST_API_KEY")
|
||||||
|
|
||||||
|
if len(missing_keys) > 0:
|
||||||
|
raise Exception("Missing keys={} in environment.".format(missing_keys))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def add_metadata_from_header(litellm_params: dict, metadata: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Adds metadata from proxy request headers to Langfuse logging if keys start with "langfuse_"
|
||||||
|
and overwrites litellm_params.metadata if already included.
|
||||||
|
|
||||||
|
For example if you want to append your trace to an existing `trace_id` via header, send
|
||||||
|
`headers: { ..., langfuse_existing_trace_id: your-existing-trace-id }` via proxy request.
|
||||||
|
"""
|
||||||
|
if litellm_params is None:
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
if litellm_params.get("proxy_server_request") is None:
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
if metadata is None:
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
proxy_headers = (
|
||||||
|
litellm_params.get("proxy_server_request", {}).get("headers", {}) or {}
|
||||||
|
)
|
||||||
|
|
||||||
|
for metadata_param_key in proxy_headers:
|
||||||
|
if metadata_param_key.startswith("braintrust"):
|
||||||
|
trace_param_key = metadata_param_key.replace("braintrust", "", 1)
|
||||||
|
if trace_param_key in metadata:
|
||||||
|
verbose_logger.warning(
|
||||||
|
f"Overwriting Braintrust `{trace_param_key}` from request header"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"Found Braintrust `{trace_param_key}` in request header"
|
||||||
|
)
|
||||||
|
metadata[trace_param_key] = proxy_headers.get(metadata_param_key)
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
async def create_default_project_and_experiment(self):
|
||||||
|
project = await global_braintrust_http_handler.post(
|
||||||
|
f"{self.api_base}/project", headers=self.headers, json={"name": "litellm"}
|
||||||
|
)
|
||||||
|
|
||||||
|
project_dict = project.json()
|
||||||
|
|
||||||
|
self.default_project_id = project_dict["id"]
|
||||||
|
|
||||||
|
def create_sync_default_project_and_experiment(self):
|
||||||
|
project = global_braintrust_sync_http_handler.post(
|
||||||
|
f"{self.api_base}/project", headers=self.headers, json={"name": "litellm"}
|
||||||
|
)
|
||||||
|
|
||||||
|
project_dict = project.json()
|
||||||
|
|
||||||
|
self.default_project_id = project_dict["id"]
|
||||||
|
|
||||||
|
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
|
||||||
|
try:
|
||||||
|
litellm_call_id = kwargs.get("litellm_call_id")
|
||||||
|
project_id = kwargs.get("project_id", None)
|
||||||
|
if project_id is None:
|
||||||
|
if self.default_project_id is None:
|
||||||
|
self.create_sync_default_project_and_experiment()
|
||||||
|
project_id = self.default_project_id
|
||||||
|
|
||||||
|
prompt = {"messages": kwargs.get("messages")}
|
||||||
|
|
||||||
|
if response_obj is not None and (
|
||||||
|
kwargs.get("call_type", None) == "embedding"
|
||||||
|
or isinstance(response_obj, litellm.EmbeddingResponse)
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = None
|
||||||
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.ModelResponse
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = response_obj["choices"][0]["message"].json()
|
||||||
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.TextCompletionResponse
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = response_obj.choices[0].text
|
||||||
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.ImageResponse
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = response_obj["data"]
|
||||||
|
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
metadata = (
|
||||||
|
litellm_params.get("metadata", {}) or {}
|
||||||
|
) # if litellm_params['metadata'] == None
|
||||||
|
metadata = self.add_metadata_from_header(litellm_params, metadata)
|
||||||
|
clean_metadata = {}
|
||||||
|
try:
|
||||||
|
metadata = copy.deepcopy(
|
||||||
|
metadata
|
||||||
|
) # Avoid modifying the original metadata
|
||||||
|
except:
|
||||||
|
new_metadata = {}
|
||||||
|
for key, value in metadata.items():
|
||||||
|
if (
|
||||||
|
isinstance(value, list)
|
||||||
|
or isinstance(value, dict)
|
||||||
|
or isinstance(value, str)
|
||||||
|
or isinstance(value, int)
|
||||||
|
or isinstance(value, float)
|
||||||
|
):
|
||||||
|
new_metadata[key] = copy.deepcopy(value)
|
||||||
|
metadata = new_metadata
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
if isinstance(metadata, dict):
|
||||||
|
for key, value in metadata.items():
|
||||||
|
|
||||||
|
# generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
|
||||||
|
if (
|
||||||
|
litellm._langfuse_default_tags is not None
|
||||||
|
and isinstance(litellm._langfuse_default_tags, list)
|
||||||
|
and key in litellm._langfuse_default_tags
|
||||||
|
):
|
||||||
|
tags.append(f"{key}:{value}")
|
||||||
|
|
||||||
|
# clean litellm metadata before logging
|
||||||
|
if key in [
|
||||||
|
"headers",
|
||||||
|
"endpoint",
|
||||||
|
"caching_groups",
|
||||||
|
"previous_models",
|
||||||
|
]:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
clean_metadata[key] = value
|
||||||
|
|
||||||
|
cost = kwargs.get("response_cost", None)
|
||||||
|
if cost is not None:
|
||||||
|
clean_metadata["litellm_response_cost"] = cost
|
||||||
|
|
||||||
|
metrics: Optional[dict] = None
|
||||||
|
if (
|
||||||
|
response_obj is not None
|
||||||
|
and hasattr(response_obj, "usage")
|
||||||
|
and isinstance(response_obj.usage, litellm.Usage)
|
||||||
|
):
|
||||||
|
generation_id = litellm.utils.get_logging_id(start_time, response_obj)
|
||||||
|
metrics = {
|
||||||
|
"prompt_tokens": response_obj.usage.prompt_tokens,
|
||||||
|
"completion_tokens": response_obj.usage.completion_tokens,
|
||||||
|
"total_tokens": response_obj.usage.total_tokens,
|
||||||
|
"total_cost": cost,
|
||||||
|
}
|
||||||
|
|
||||||
|
request_data = {
|
||||||
|
"id": litellm_call_id,
|
||||||
|
"input": prompt,
|
||||||
|
"output": output,
|
||||||
|
"metadata": clean_metadata,
|
||||||
|
"tags": tags,
|
||||||
|
}
|
||||||
|
if metrics is not None:
|
||||||
|
request_data["metrics"] = metrics
|
||||||
|
|
||||||
|
try:
|
||||||
|
global_braintrust_sync_http_handler.post(
|
||||||
|
url=f"{self.api_base}/project_logs/{project_id}/insert",
|
||||||
|
json={"events": [request_data]},
|
||||||
|
headers=self.headers,
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
raise Exception(e.response.text)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"Error logging to braintrust - Exception received - {}\n{}".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
verbose_logger.debug("REACHES BRAINTRUST SUCCESS")
|
||||||
|
try:
|
||||||
|
litellm_call_id = kwargs.get("litellm_call_id")
|
||||||
|
project_id = kwargs.get("project_id", None)
|
||||||
|
if project_id is None:
|
||||||
|
if self.default_project_id is None:
|
||||||
|
await self.create_default_project_and_experiment()
|
||||||
|
project_id = self.default_project_id
|
||||||
|
|
||||||
|
prompt = {"messages": kwargs.get("messages")}
|
||||||
|
|
||||||
|
if response_obj is not None and (
|
||||||
|
kwargs.get("call_type", None) == "embedding"
|
||||||
|
or isinstance(response_obj, litellm.EmbeddingResponse)
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = None
|
||||||
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.ModelResponse
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = response_obj["choices"][0]["message"].json()
|
||||||
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.TextCompletionResponse
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = response_obj.choices[0].text
|
||||||
|
elif response_obj is not None and isinstance(
|
||||||
|
response_obj, litellm.ImageResponse
|
||||||
|
):
|
||||||
|
input = prompt
|
||||||
|
output = response_obj["data"]
|
||||||
|
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
metadata = (
|
||||||
|
litellm_params.get("metadata", {}) or {}
|
||||||
|
) # if litellm_params['metadata'] == None
|
||||||
|
metadata = self.add_metadata_from_header(litellm_params, metadata)
|
||||||
|
clean_metadata = {}
|
||||||
|
try:
|
||||||
|
metadata = copy.deepcopy(
|
||||||
|
metadata
|
||||||
|
) # Avoid modifying the original metadata
|
||||||
|
except:
|
||||||
|
new_metadata = {}
|
||||||
|
for key, value in metadata.items():
|
||||||
|
if (
|
||||||
|
isinstance(value, list)
|
||||||
|
or isinstance(value, dict)
|
||||||
|
or isinstance(value, str)
|
||||||
|
or isinstance(value, int)
|
||||||
|
or isinstance(value, float)
|
||||||
|
):
|
||||||
|
new_metadata[key] = copy.deepcopy(value)
|
||||||
|
metadata = new_metadata
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
if isinstance(metadata, dict):
|
||||||
|
for key, value in metadata.items():
|
||||||
|
|
||||||
|
# generate langfuse tags - Default Tags sent to Langfuse from LiteLLM Proxy
|
||||||
|
if (
|
||||||
|
litellm._langfuse_default_tags is not None
|
||||||
|
and isinstance(litellm._langfuse_default_tags, list)
|
||||||
|
and key in litellm._langfuse_default_tags
|
||||||
|
):
|
||||||
|
tags.append(f"{key}:{value}")
|
||||||
|
|
||||||
|
# clean litellm metadata before logging
|
||||||
|
if key in [
|
||||||
|
"headers",
|
||||||
|
"endpoint",
|
||||||
|
"caching_groups",
|
||||||
|
"previous_models",
|
||||||
|
]:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
clean_metadata[key] = value
|
||||||
|
|
||||||
|
cost = kwargs.get("response_cost", None)
|
||||||
|
if cost is not None:
|
||||||
|
clean_metadata["litellm_response_cost"] = cost
|
||||||
|
|
||||||
|
metrics: Optional[dict] = None
|
||||||
|
if (
|
||||||
|
response_obj is not None
|
||||||
|
and hasattr(response_obj, "usage")
|
||||||
|
and isinstance(response_obj.usage, litellm.Usage)
|
||||||
|
):
|
||||||
|
generation_id = litellm.utils.get_logging_id(start_time, response_obj)
|
||||||
|
metrics = {
|
||||||
|
"prompt_tokens": response_obj.usage.prompt_tokens,
|
||||||
|
"completion_tokens": response_obj.usage.completion_tokens,
|
||||||
|
"total_tokens": response_obj.usage.total_tokens,
|
||||||
|
"total_cost": cost,
|
||||||
|
}
|
||||||
|
|
||||||
|
request_data = {
|
||||||
|
"id": litellm_call_id,
|
||||||
|
"input": prompt,
|
||||||
|
"output": output,
|
||||||
|
"metadata": clean_metadata,
|
||||||
|
"tags": tags,
|
||||||
|
}
|
||||||
|
|
||||||
|
if metrics is not None:
|
||||||
|
request_data["metrics"] = metrics
|
||||||
|
|
||||||
|
try:
|
||||||
|
await global_braintrust_http_handler.post(
|
||||||
|
url=f"{self.api_base}/project_logs/{project_id}/insert",
|
||||||
|
json={"events": [request_data]},
|
||||||
|
headers=self.headers,
|
||||||
|
)
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
raise Exception(e.response.text)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"Error logging to braintrust - Exception received - {}\n{}".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
return super().log_failure_event(kwargs, response_obj, start_time, end_time)
|
|
@ -8,6 +8,7 @@ from packaging.version import Version
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
|
||||||
|
|
||||||
|
|
||||||
class LangFuseLogger:
|
class LangFuseLogger:
|
||||||
|
@ -382,6 +383,8 @@ class LangFuseLogger:
|
||||||
mask_input = clean_metadata.pop("mask_input", False)
|
mask_input = clean_metadata.pop("mask_input", False)
|
||||||
mask_output = clean_metadata.pop("mask_output", False)
|
mask_output = clean_metadata.pop("mask_output", False)
|
||||||
|
|
||||||
|
clean_metadata = redact_user_api_key_info(metadata=clean_metadata)
|
||||||
|
|
||||||
if trace_name is None and existing_trace_id is None:
|
if trace_name is None and existing_trace_id is None:
|
||||||
# just log `litellm-{call_type}` as the trace name
|
# just log `litellm-{call_type}` as the trace name
|
||||||
## DO NOT SET TRACE_NAME if trace-id set. this can lead to overwriting of past traces.
|
## DO NOT SET TRACE_NAME if trace-id set. this can lead to overwriting of past traces.
|
||||||
|
|
|
@ -79,6 +79,7 @@ class LangsmithLogger(CustomLogger):
|
||||||
project_name = metadata.get("project_name", self.langsmith_project)
|
project_name = metadata.get("project_name", self.langsmith_project)
|
||||||
run_name = metadata.get("run_name", self.langsmith_default_run_name)
|
run_name = metadata.get("run_name", self.langsmith_default_run_name)
|
||||||
run_id = metadata.get("id", None)
|
run_id = metadata.get("id", None)
|
||||||
|
tags = metadata.get("tags", []) or []
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
|
f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
|
||||||
)
|
)
|
||||||
|
@ -122,6 +123,7 @@ class LangsmithLogger(CustomLogger):
|
||||||
"session_name": project_name,
|
"session_name": project_name,
|
||||||
"start_time": start_time,
|
"start_time": start_time,
|
||||||
"end_time": end_time,
|
"end_time": end_time,
|
||||||
|
"tags": tags,
|
||||||
}
|
}
|
||||||
|
|
||||||
if run_id:
|
if run_id:
|
||||||
|
|
|
@ -1,17 +1,21 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success + failure, log events to Logfire
|
# On success + failure, log events to Logfire
|
||||||
|
|
||||||
import dotenv, os
|
import os
|
||||||
|
|
||||||
|
import dotenv
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
from litellm._logging import print_verbose, verbose_logger
|
|
||||||
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Dict, NamedTuple
|
from typing import Any, Dict, NamedTuple
|
||||||
|
|
||||||
from typing_extensions import LiteralString
|
from typing_extensions import LiteralString
|
||||||
|
|
||||||
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
|
from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
|
||||||
|
|
||||||
|
|
||||||
class SpanConfig(NamedTuple):
|
class SpanConfig(NamedTuple):
|
||||||
message_template: LiteralString
|
message_template: LiteralString
|
||||||
|
@ -135,6 +139,8 @@ class LogfireLogger:
|
||||||
else:
|
else:
|
||||||
clean_metadata[key] = value
|
clean_metadata[key] = value
|
||||||
|
|
||||||
|
clean_metadata = redact_user_api_key_info(metadata=clean_metadata)
|
||||||
|
|
||||||
# Build the initial payload
|
# Build the initial payload
|
||||||
payload = {
|
payload = {
|
||||||
"id": id,
|
"id": id,
|
||||||
|
|
|
@ -2,11 +2,12 @@ import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
from litellm.litellm_core_utils.redact_messages import redact_user_api_key_info
|
||||||
from litellm.types.services import ServiceLoggerPayload
|
from litellm.types.services import ServiceLoggerPayload
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -27,9 +28,10 @@ else:
|
||||||
|
|
||||||
|
|
||||||
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
||||||
LITELLM_RESOURCE = {
|
LITELLM_RESOURCE: Dict[Any, Any] = {
|
||||||
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
||||||
"deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
|
"deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
|
||||||
|
"model_id": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
||||||
}
|
}
|
||||||
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
|
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
|
||||||
LITELLM_REQUEST_SPAN_NAME = "litellm_request"
|
LITELLM_REQUEST_SPAN_NAME = "litellm_request"
|
||||||
|
@ -68,7 +70,9 @@ class OpenTelemetryConfig:
|
||||||
|
|
||||||
|
|
||||||
class OpenTelemetry(CustomLogger):
|
class OpenTelemetry(CustomLogger):
|
||||||
def __init__(self, config=OpenTelemetryConfig.from_env()):
|
def __init__(
|
||||||
|
self, config=OpenTelemetryConfig.from_env(), callback_name: Optional[str] = None
|
||||||
|
):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.sdk.resources import Resource
|
from opentelemetry.sdk.resources import Resource
|
||||||
from opentelemetry.sdk.trace import TracerProvider
|
from opentelemetry.sdk.trace import TracerProvider
|
||||||
|
@ -79,6 +83,7 @@ class OpenTelemetry(CustomLogger):
|
||||||
self.OTEL_HEADERS = self.config.headers
|
self.OTEL_HEADERS = self.config.headers
|
||||||
provider = TracerProvider(resource=Resource(attributes=LITELLM_RESOURCE))
|
provider = TracerProvider(resource=Resource(attributes=LITELLM_RESOURCE))
|
||||||
provider.add_span_processor(self._get_span_processor())
|
provider.add_span_processor(self._get_span_processor())
|
||||||
|
self.callback_name = callback_name
|
||||||
|
|
||||||
trace.set_tracer_provider(provider)
|
trace.set_tracer_provider(provider)
|
||||||
self.tracer = trace.get_tracer(LITELLM_TRACER_NAME)
|
self.tracer = trace.get_tracer(LITELLM_TRACER_NAME)
|
||||||
|
@ -120,8 +125,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
_start_time_ns = start_time
|
_start_time_ns = 0
|
||||||
_end_time_ns = end_time
|
_end_time_ns = 0
|
||||||
|
|
||||||
if isinstance(start_time, float):
|
if isinstance(start_time, float):
|
||||||
_start_time_ns = int(int(start_time) * 1e9)
|
_start_time_ns = int(int(start_time) * 1e9)
|
||||||
|
@ -159,8 +164,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
_start_time_ns = start_time
|
_start_time_ns = 0
|
||||||
_end_time_ns = end_time
|
_end_time_ns = 0
|
||||||
|
|
||||||
if isinstance(start_time, float):
|
if isinstance(start_time, float):
|
||||||
_start_time_ns = int(int(start_time) * 1e9)
|
_start_time_ns = int(int(start_time) * 1e9)
|
||||||
|
@ -294,6 +299,11 @@ class OpenTelemetry(CustomLogger):
|
||||||
return isinstance(value, (str, bool, int, float))
|
return isinstance(value, (str, bool, int, float))
|
||||||
|
|
||||||
def set_attributes(self, span: Span, kwargs, response_obj):
|
def set_attributes(self, span: Span, kwargs, response_obj):
|
||||||
|
if self.callback_name == "arize":
|
||||||
|
from litellm.integrations.arize_ai import set_arize_ai_attributes
|
||||||
|
|
||||||
|
set_arize_ai_attributes(span, kwargs, response_obj)
|
||||||
|
return
|
||||||
from litellm.proxy._types import SpanAttributes
|
from litellm.proxy._types import SpanAttributes
|
||||||
|
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
@ -306,7 +316,9 @@ class OpenTelemetry(CustomLogger):
|
||||||
#############################################
|
#############################################
|
||||||
metadata = litellm_params.get("metadata", {}) or {}
|
metadata = litellm_params.get("metadata", {}) or {}
|
||||||
|
|
||||||
for key, value in metadata.items():
|
clean_metadata = redact_user_api_key_info(metadata=metadata)
|
||||||
|
|
||||||
|
for key, value in clean_metadata.items():
|
||||||
if self.is_primitive(value):
|
if self.is_primitive(value):
|
||||||
span.set_attribute("metadata.{}".format(key), value)
|
span.set_attribute("metadata.{}".format(key), value)
|
||||||
|
|
||||||
|
@ -612,8 +624,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
_start_time_ns = logging_payload.start_time
|
_start_time_ns = 0
|
||||||
_end_time_ns = logging_payload.end_time
|
_end_time_ns = 0
|
||||||
|
|
||||||
start_time = logging_payload.start_time
|
start_time = logging_payload.start_time
|
||||||
end_time = logging_payload.end_time
|
end_time = logging_payload.end_time
|
||||||
|
@ -658,8 +670,8 @@ class OpenTelemetry(CustomLogger):
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from opentelemetry.trace import Status, StatusCode
|
from opentelemetry.trace import Status, StatusCode
|
||||||
|
|
||||||
_start_time_ns = logging_payload.start_time
|
_start_time_ns = 0
|
||||||
_end_time_ns = logging_payload.end_time
|
_end_time_ns = 0
|
||||||
|
|
||||||
start_time = logging_payload.start_time
|
start_time = logging_payload.start_time
|
||||||
end_time = logging_payload.end_time
|
end_time = logging_payload.end_time
|
||||||
|
|
|
@ -53,6 +53,7 @@ from litellm.utils import (
|
||||||
from ..integrations.aispend import AISpendLogger
|
from ..integrations.aispend import AISpendLogger
|
||||||
from ..integrations.athina import AthinaLogger
|
from ..integrations.athina import AthinaLogger
|
||||||
from ..integrations.berrispend import BerriSpendLogger
|
from ..integrations.berrispend import BerriSpendLogger
|
||||||
|
from ..integrations.braintrust_logging import BraintrustLogger
|
||||||
from ..integrations.clickhouse import ClickhouseLogger
|
from ..integrations.clickhouse import ClickhouseLogger
|
||||||
from ..integrations.custom_logger import CustomLogger
|
from ..integrations.custom_logger import CustomLogger
|
||||||
from ..integrations.datadog import DataDogLogger
|
from ..integrations.datadog import DataDogLogger
|
||||||
|
@ -1945,7 +1946,14 @@ def _init_custom_logger_compatible_class(
|
||||||
_openmeter_logger = OpenMeterLogger()
|
_openmeter_logger = OpenMeterLogger()
|
||||||
_in_memory_loggers.append(_openmeter_logger)
|
_in_memory_loggers.append(_openmeter_logger)
|
||||||
return _openmeter_logger # type: ignore
|
return _openmeter_logger # type: ignore
|
||||||
|
elif logging_integration == "braintrust":
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, BraintrustLogger):
|
||||||
|
return callback # type: ignore
|
||||||
|
|
||||||
|
braintrust_logger = BraintrustLogger()
|
||||||
|
_in_memory_loggers.append(braintrust_logger)
|
||||||
|
return braintrust_logger # type: ignore
|
||||||
elif logging_integration == "langsmith":
|
elif logging_integration == "langsmith":
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if isinstance(callback, LangsmithLogger):
|
if isinstance(callback, LangsmithLogger):
|
||||||
|
@ -1954,6 +1962,43 @@ def _init_custom_logger_compatible_class(
|
||||||
_langsmith_logger = LangsmithLogger()
|
_langsmith_logger = LangsmithLogger()
|
||||||
_in_memory_loggers.append(_langsmith_logger)
|
_in_memory_loggers.append(_langsmith_logger)
|
||||||
return _langsmith_logger # type: ignore
|
return _langsmith_logger # type: ignore
|
||||||
|
elif logging_integration == "arize":
|
||||||
|
if "ARIZE_SPACE_KEY" not in os.environ:
|
||||||
|
raise ValueError("ARIZE_SPACE_KEY not found in environment variables")
|
||||||
|
if "ARIZE_API_KEY" not in os.environ:
|
||||||
|
raise ValueError("ARIZE_API_KEY not found in environment variables")
|
||||||
|
from litellm.integrations.opentelemetry import (
|
||||||
|
OpenTelemetry,
|
||||||
|
OpenTelemetryConfig,
|
||||||
|
)
|
||||||
|
|
||||||
|
otel_config = OpenTelemetryConfig(
|
||||||
|
exporter="otlp_grpc",
|
||||||
|
endpoint="https://otlp.arize.com/v1",
|
||||||
|
)
|
||||||
|
os.environ["OTEL_EXPORTER_OTLP_TRACES_HEADERS"] = (
|
||||||
|
f"space_key={os.getenv('ARIZE_SPACE_KEY')},api_key={os.getenv('ARIZE_API_KEY')}"
|
||||||
|
)
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if (
|
||||||
|
isinstance(callback, OpenTelemetry)
|
||||||
|
and callback.callback_name == "arize"
|
||||||
|
):
|
||||||
|
return callback # type: ignore
|
||||||
|
_otel_logger = OpenTelemetry(config=otel_config, callback_name="arize")
|
||||||
|
_in_memory_loggers.append(_otel_logger)
|
||||||
|
return _otel_logger # type: ignore
|
||||||
|
|
||||||
|
elif logging_integration == "otel":
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, OpenTelemetry):
|
||||||
|
return callback # type: ignore
|
||||||
|
|
||||||
|
otel_logger = OpenTelemetry()
|
||||||
|
_in_memory_loggers.append(otel_logger)
|
||||||
|
return otel_logger # type: ignore
|
||||||
|
|
||||||
elif logging_integration == "galileo":
|
elif logging_integration == "galileo":
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
|
@ -2019,6 +2064,10 @@ def get_custom_logger_compatible_class(
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if isinstance(callback, OpenMeterLogger):
|
if isinstance(callback, OpenMeterLogger):
|
||||||
return callback
|
return callback
|
||||||
|
elif logging_integration == "braintrust":
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, BraintrustLogger):
|
||||||
|
return callback
|
||||||
elif logging_integration == "galileo":
|
elif logging_integration == "galileo":
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if isinstance(callback, GalileoObserve):
|
if isinstance(callback, GalileoObserve):
|
||||||
|
@ -2027,6 +2076,25 @@ def get_custom_logger_compatible_class(
|
||||||
for callback in _in_memory_loggers:
|
for callback in _in_memory_loggers:
|
||||||
if isinstance(callback, LangsmithLogger):
|
if isinstance(callback, LangsmithLogger):
|
||||||
return callback
|
return callback
|
||||||
|
elif logging_integration == "otel":
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if isinstance(callback, OpenTelemetry):
|
||||||
|
return callback
|
||||||
|
elif logging_integration == "arize":
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
|
||||||
|
if "ARIZE_SPACE_KEY" not in os.environ:
|
||||||
|
raise ValueError("ARIZE_SPACE_KEY not found in environment variables")
|
||||||
|
if "ARIZE_API_KEY" not in os.environ:
|
||||||
|
raise ValueError("ARIZE_API_KEY not found in environment variables")
|
||||||
|
for callback in _in_memory_loggers:
|
||||||
|
if (
|
||||||
|
isinstance(callback, OpenTelemetry)
|
||||||
|
and callback.callback_name == "arize"
|
||||||
|
):
|
||||||
|
return callback
|
||||||
elif logging_integration == "logfire":
|
elif logging_integration == "logfire":
|
||||||
if "LOGFIRE_TOKEN" not in os.environ:
|
if "LOGFIRE_TOKEN" not in os.environ:
|
||||||
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
|
raise ValueError("LOGFIRE_TOKEN not found in environment variables")
|
||||||
|
|
|
@ -87,3 +87,33 @@ def redact_message_input_output_from_logging(
|
||||||
|
|
||||||
# by default return result
|
# by default return result
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def redact_user_api_key_info(metadata: dict) -> dict:
|
||||||
|
"""
|
||||||
|
removes any user_api_key_info before passing to logging object, if flag set
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
SDK
|
||||||
|
```python
|
||||||
|
litellm.redact_user_api_key_info = True
|
||||||
|
```
|
||||||
|
|
||||||
|
PROXY:
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
redact_user_api_key_info: true
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
if litellm.redact_user_api_key_info is not True:
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
new_metadata = {}
|
||||||
|
for k, v in metadata.items():
|
||||||
|
if isinstance(k, str) and k.startswith("user_api_key"):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
new_metadata[k] = v
|
||||||
|
|
||||||
|
return new_metadata
|
||||||
|
|
|
@ -385,6 +385,11 @@ class AnthropicConfig:
|
||||||
if "user_id" in anthropic_message_request["metadata"]:
|
if "user_id" in anthropic_message_request["metadata"]:
|
||||||
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
new_kwargs["user"] = anthropic_message_request["metadata"]["user_id"]
|
||||||
|
|
||||||
|
# Pass litellm proxy specific metadata
|
||||||
|
if "litellm_metadata" in anthropic_message_request:
|
||||||
|
# metadata will be passed to litellm.acompletion(), it's a litellm_param
|
||||||
|
new_kwargs["metadata"] = anthropic_message_request.pop("litellm_metadata")
|
||||||
|
|
||||||
## CONVERT TOOL CHOICE
|
## CONVERT TOOL CHOICE
|
||||||
if "tool_choice" in anthropic_message_request:
|
if "tool_choice" in anthropic_message_request:
|
||||||
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
new_kwargs["tool_choice"] = self.translate_anthropic_tool_choice_to_openai(
|
||||||
|
@ -775,8 +780,17 @@ class AnthropicChatCompletion(BaseLLM):
|
||||||
system_prompt = ""
|
system_prompt = ""
|
||||||
for idx, message in enumerate(messages):
|
for idx, message in enumerate(messages):
|
||||||
if message["role"] == "system":
|
if message["role"] == "system":
|
||||||
system_prompt += message["content"]
|
valid_content: bool = False
|
||||||
system_prompt_indices.append(idx)
|
if isinstance(message["content"], str):
|
||||||
|
system_prompt += message["content"]
|
||||||
|
valid_content = True
|
||||||
|
elif isinstance(message["content"], list):
|
||||||
|
for content in message["content"]:
|
||||||
|
system_prompt += content.get("text", "")
|
||||||
|
valid_content = True
|
||||||
|
|
||||||
|
if valid_content:
|
||||||
|
system_prompt_indices.append(idx)
|
||||||
if len(system_prompt_indices) > 0:
|
if len(system_prompt_indices) > 0:
|
||||||
for idx in reversed(system_prompt_indices):
|
for idx in reversed(system_prompt_indices):
|
||||||
messages.pop(idx)
|
messages.pop(idx)
|
||||||
|
|
|
@ -76,6 +76,8 @@ BEDROCK_CONVERSE_MODELS = [
|
||||||
"anthropic.claude-v1",
|
"anthropic.claude-v1",
|
||||||
"anthropic.claude-instant-v1",
|
"anthropic.claude-instant-v1",
|
||||||
"ai21.jamba-instruct-v1:0",
|
"ai21.jamba-instruct-v1:0",
|
||||||
|
"meta.llama3-1-8b-instruct-v1:0",
|
||||||
|
"meta.llama3-1-70b-instruct-v1:0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -1729,7 +1731,7 @@ class BedrockConverseLLM(BaseLLM):
|
||||||
headers={},
|
headers={},
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
) -> Union[ModelResponse, CustomStreamWrapper]:
|
) -> Union[ModelResponse, CustomStreamWrapper]:
|
||||||
if client is None:
|
if client is None or not isinstance(client, AsyncHTTPHandler):
|
||||||
_params = {}
|
_params = {}
|
||||||
if timeout is not None:
|
if timeout is not None:
|
||||||
if isinstance(timeout, float) or isinstance(timeout, int):
|
if isinstance(timeout, float) or isinstance(timeout, int):
|
||||||
|
|
|
@ -968,7 +968,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
except openai.UnprocessableEntityError as e:
|
except openai.UnprocessableEntityError as e:
|
||||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||||
if litellm.drop_params is True or drop_params is True:
|
if litellm.drop_params is True or drop_params is True:
|
||||||
if e.body is not None and e.body.get("detail"): # type: ignore
|
if e.body is not None and isinstance(e.body, dict) and e.body.get("detail"): # type: ignore
|
||||||
detail = e.body.get("detail") # type: ignore
|
detail = e.body.get("detail") # type: ignore
|
||||||
invalid_params: List[str] = []
|
invalid_params: List[str] = []
|
||||||
if (
|
if (
|
||||||
|
@ -1100,7 +1100,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
except openai.UnprocessableEntityError as e:
|
except openai.UnprocessableEntityError as e:
|
||||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||||
if litellm.drop_params is True or drop_params is True:
|
if litellm.drop_params is True or drop_params is True:
|
||||||
if e.body is not None and e.body.get("detail"): # type: ignore
|
if e.body is not None and isinstance(e.body, dict) and e.body.get("detail"): # type: ignore
|
||||||
detail = e.body.get("detail") # type: ignore
|
detail = e.body.get("detail") # type: ignore
|
||||||
invalid_params: List[str] = []
|
invalid_params: List[str] = []
|
||||||
if (
|
if (
|
||||||
|
@ -1231,7 +1231,7 @@ class OpenAIChatCompletion(BaseLLM):
|
||||||
except openai.UnprocessableEntityError as e:
|
except openai.UnprocessableEntityError as e:
|
||||||
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
## check if body contains unprocessable params - related issue https://github.com/BerriAI/litellm/issues/4800
|
||||||
if litellm.drop_params is True or drop_params is True:
|
if litellm.drop_params is True or drop_params is True:
|
||||||
if e.body is not None and e.body.get("detail"): # type: ignore
|
if e.body is not None and isinstance(e.body, dict) and e.body.get("detail"): # type: ignore
|
||||||
detail = e.body.get("detail") # type: ignore
|
detail = e.body.get("detail") # type: ignore
|
||||||
invalid_params: List[str] = []
|
invalid_params: List[str] = []
|
||||||
if (
|
if (
|
||||||
|
|
|
@ -1,23 +1,31 @@
|
||||||
import copy
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import types
|
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Callable, List, Optional
|
from typing import Any, Callable, Dict, List, Optional, Sequence, Union
|
||||||
|
|
||||||
import httpx # type: ignore
|
import httpx # type: ignore
|
||||||
import requests # type: ignore
|
import requests # type: ignore
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
from litellm.utils import (
|
||||||
|
Choices,
|
||||||
|
CustomStreamWrapper,
|
||||||
|
Delta,
|
||||||
|
EmbeddingResponse,
|
||||||
|
Message,
|
||||||
|
ModelResponse,
|
||||||
|
Usage,
|
||||||
|
map_finish_reason,
|
||||||
|
)
|
||||||
|
|
||||||
from .base import BaseLLM
|
from .base import BaseLLM
|
||||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||||
|
|
||||||
|
|
||||||
class TritonError(Exception):
|
class TritonError(Exception):
|
||||||
def __init__(self, status_code, message):
|
def __init__(self, status_code: int, message: str) -> None:
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
self.message = message
|
self.message = message
|
||||||
self.request = httpx.Request(
|
self.request = httpx.Request(
|
||||||
|
@ -41,8 +49,7 @@ class TritonChatCompletion(BaseLLM):
|
||||||
api_base: str,
|
api_base: str,
|
||||||
logging_obj=None,
|
logging_obj=None,
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
):
|
) -> EmbeddingResponse:
|
||||||
|
|
||||||
async_handler = AsyncHTTPHandler(
|
async_handler = AsyncHTTPHandler(
|
||||||
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
timeout=httpx.Timeout(timeout=600.0, connect=5.0)
|
||||||
)
|
)
|
||||||
|
@ -79,10 +86,10 @@ class TritonChatCompletion(BaseLLM):
|
||||||
|
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
def embedding(
|
async def embedding(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
input: list,
|
input: List[str],
|
||||||
timeout: float,
|
timeout: float,
|
||||||
api_base: str,
|
api_base: str,
|
||||||
model_response: litellm.utils.EmbeddingResponse,
|
model_response: litellm.utils.EmbeddingResponse,
|
||||||
|
@ -90,8 +97,8 @@ class TritonChatCompletion(BaseLLM):
|
||||||
logging_obj=None,
|
logging_obj=None,
|
||||||
optional_params=None,
|
optional_params=None,
|
||||||
client=None,
|
client=None,
|
||||||
aembedding=None,
|
aembedding: bool = False,
|
||||||
):
|
) -> EmbeddingResponse:
|
||||||
data_for_triton = {
|
data_for_triton = {
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{
|
||||||
|
@ -103,8 +110,6 @@ class TritonChatCompletion(BaseLLM):
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
## LOGGING
|
|
||||||
|
|
||||||
curl_string = f"curl {api_base} -X POST -H 'Content-Type: application/json' -d '{data_for_triton}'"
|
curl_string = f"curl {api_base} -X POST -H 'Content-Type: application/json' -d '{data_for_triton}'"
|
||||||
|
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
|
@ -116,8 +121,8 @@ class TritonChatCompletion(BaseLLM):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if aembedding == True:
|
if aembedding:
|
||||||
response = self.aembedding(
|
response = await self.aembedding(
|
||||||
data=data_for_triton,
|
data=data_for_triton,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
|
@ -130,6 +135,198 @@ class TritonChatCompletion(BaseLLM):
|
||||||
"Only async embedding supported for triton, please use litellm.aembedding() for now"
|
"Only async embedding supported for triton, please use litellm.aembedding() for now"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def completion(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: List[dict],
|
||||||
|
timeout: float,
|
||||||
|
api_base: str,
|
||||||
|
model_response: ModelResponse,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
logging_obj=None,
|
||||||
|
optional_params=None,
|
||||||
|
client=None,
|
||||||
|
stream: Optional[bool] = False,
|
||||||
|
acompletion: bool = False,
|
||||||
|
) -> ModelResponse:
|
||||||
|
type_of_model = ""
|
||||||
|
optional_params.pop("stream", False)
|
||||||
|
if api_base.endswith("generate"): ### This is a trtllm model
|
||||||
|
text_input = messages[0]["content"]
|
||||||
|
data_for_triton: Dict[str, Any] = {
|
||||||
|
"text_input": prompt_factory(model=model, messages=messages),
|
||||||
|
"parameters": {
|
||||||
|
"max_tokens": int(optional_params.get("max_tokens", 2000)),
|
||||||
|
"bad_words": [""],
|
||||||
|
"stop_words": [""],
|
||||||
|
},
|
||||||
|
"stream": bool(stream),
|
||||||
|
}
|
||||||
|
data_for_triton["parameters"].update(optional_params)
|
||||||
|
type_of_model = "trtllm"
|
||||||
|
|
||||||
|
elif api_base.endswith(
|
||||||
|
"infer"
|
||||||
|
): ### This is an infer model with a custom model on triton
|
||||||
|
text_input = messages[0]["content"]
|
||||||
|
data_for_triton = {
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "text_input",
|
||||||
|
"shape": [1],
|
||||||
|
"datatype": "BYTES",
|
||||||
|
"data": [text_input],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v in optional_params.items():
|
||||||
|
if not (k == "stream" or k == "max_retries"):
|
||||||
|
datatype = "INT32" if isinstance(v, int) else "BYTES"
|
||||||
|
datatype = "FP32" if isinstance(v, float) else datatype
|
||||||
|
data_for_triton["inputs"].append(
|
||||||
|
{"name": k, "shape": [1], "datatype": datatype, "data": [v]}
|
||||||
|
)
|
||||||
|
|
||||||
|
if "max_tokens" not in optional_params:
|
||||||
|
data_for_triton["inputs"].append(
|
||||||
|
{
|
||||||
|
"name": "max_tokens",
|
||||||
|
"shape": [1],
|
||||||
|
"datatype": "INT32",
|
||||||
|
"data": [20],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
type_of_model = "infer"
|
||||||
|
else: ## Unknown model type passthrough
|
||||||
|
data_for_triton = {
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "text_input",
|
||||||
|
"shape": [1],
|
||||||
|
"datatype": "BYTES",
|
||||||
|
"data": [messages[0]["content"]],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
if logging_obj:
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=messages,
|
||||||
|
api_key=api_key,
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": optional_params,
|
||||||
|
"api_base": api_base,
|
||||||
|
"http_client": client,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
json_data_for_triton: str = json.dumps(data_for_triton)
|
||||||
|
|
||||||
|
if acompletion:
|
||||||
|
return self.acompletion( # type: ignore
|
||||||
|
model,
|
||||||
|
json_data_for_triton,
|
||||||
|
headers=headers,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
api_base=api_base,
|
||||||
|
stream=stream,
|
||||||
|
model_response=model_response,
|
||||||
|
type_of_model=type_of_model,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
handler = HTTPHandler()
|
||||||
|
if stream:
|
||||||
|
return self._handle_stream(
|
||||||
|
handler, api_base, data_for_triton, model, logging_obj
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = handler.post(url=api_base, data=data_for_triton, headers=headers)
|
||||||
|
return self._handle_response(
|
||||||
|
response, model_response, logging_obj, type_of_model=type_of_model
|
||||||
|
)
|
||||||
|
|
||||||
|
async def acompletion(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
data_for_triton,
|
||||||
|
api_base,
|
||||||
|
stream,
|
||||||
|
logging_obj,
|
||||||
|
headers,
|
||||||
|
model_response,
|
||||||
|
type_of_model,
|
||||||
|
) -> ModelResponse:
|
||||||
|
handler = AsyncHTTPHandler()
|
||||||
|
if stream:
|
||||||
|
return self._ahandle_stream(
|
||||||
|
handler, api_base, data_for_triton, model, logging_obj
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = await handler.post(
|
||||||
|
url=api_base, data=data_for_triton, headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._handle_response(
|
||||||
|
response, model_response, logging_obj, type_of_model=type_of_model
|
||||||
|
)
|
||||||
|
|
||||||
|
def _handle_stream(self, handler, api_base, data_for_triton, model, logging_obj):
|
||||||
|
response = handler.post(
|
||||||
|
url=api_base + "_stream", data=data_for_triton, stream=True
|
||||||
|
)
|
||||||
|
streamwrapper = litellm.CustomStreamWrapper(
|
||||||
|
response.iter_lines(),
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="triton",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
for chunk in streamwrapper:
|
||||||
|
yield (chunk)
|
||||||
|
|
||||||
|
async def _ahandle_stream(
|
||||||
|
self, handler, api_base, data_for_triton, model, logging_obj
|
||||||
|
):
|
||||||
|
response = await handler.post(
|
||||||
|
url=api_base + "_stream", data=data_for_triton, stream=True
|
||||||
|
)
|
||||||
|
streamwrapper = litellm.CustomStreamWrapper(
|
||||||
|
response.aiter_lines(),
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="triton",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
async for chunk in streamwrapper:
|
||||||
|
yield (chunk)
|
||||||
|
|
||||||
|
def _handle_response(self, response, model_response, logging_obj, type_of_model):
|
||||||
|
if logging_obj:
|
||||||
|
logging_obj.post_call(original_response=response)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise TritonError(status_code=response.status_code, message=response.text)
|
||||||
|
|
||||||
|
_json_response = response.json()
|
||||||
|
model_response.model = _json_response.get("model_name", "None")
|
||||||
|
if type_of_model == "trtllm":
|
||||||
|
model_response.choices = [
|
||||||
|
Choices(index=0, message=Message(content=_json_response["text_output"]))
|
||||||
|
]
|
||||||
|
elif type_of_model == "infer":
|
||||||
|
model_response.choices = [
|
||||||
|
Choices(
|
||||||
|
index=0,
|
||||||
|
message=Message(content=_json_response["outputs"][0]["data"]),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
model_response.choices = [
|
||||||
|
Choices(index=0, message=Message(content=_json_response["outputs"]))
|
||||||
|
]
|
||||||
|
return model_response
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def split_embedding_by_shape(
|
def split_embedding_by_shape(
|
||||||
data: List[float], shape: List[int]
|
data: List[float], shape: List[int]
|
||||||
|
|
203
litellm/llms/vertex_ai_llama.py
Normal file
203
litellm/llms/vertex_ai_llama.py
Normal file
|
@ -0,0 +1,203 @@
|
||||||
|
# What is this?
|
||||||
|
## Handler for calling llama 3.1 API on Vertex AI
|
||||||
|
import copy
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import types
|
||||||
|
import uuid
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Callable, List, Optional, Tuple, Union
|
||||||
|
|
||||||
|
import httpx # type: ignore
|
||||||
|
import requests # type: ignore
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
|
from litellm.types.llms.anthropic import (
|
||||||
|
AnthropicMessagesTool,
|
||||||
|
AnthropicMessagesToolChoice,
|
||||||
|
)
|
||||||
|
from litellm.types.llms.openai import (
|
||||||
|
ChatCompletionToolParam,
|
||||||
|
ChatCompletionToolParamFunctionChunk,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import ResponseFormatChunk
|
||||||
|
from litellm.utils import CustomStreamWrapper, ModelResponse, Usage
|
||||||
|
|
||||||
|
from .base import BaseLLM
|
||||||
|
from .prompt_templates.factory import (
|
||||||
|
construct_tool_use_system_prompt,
|
||||||
|
contains_tag,
|
||||||
|
custom_prompt,
|
||||||
|
extract_between_tags,
|
||||||
|
parse_xml_params,
|
||||||
|
prompt_factory,
|
||||||
|
response_schema_prompt,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class VertexAIError(Exception):
|
||||||
|
def __init__(self, status_code, message):
|
||||||
|
self.status_code = status_code
|
||||||
|
self.message = message
|
||||||
|
self.request = httpx.Request(
|
||||||
|
method="POST", url=" https://cloud.google.com/vertex-ai/"
|
||||||
|
)
|
||||||
|
self.response = httpx.Response(status_code=status_code, request=self.request)
|
||||||
|
super().__init__(
|
||||||
|
self.message
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
|
||||||
|
class VertexAILlama3Config:
|
||||||
|
"""
|
||||||
|
Reference:https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/llama#streaming
|
||||||
|
|
||||||
|
The class `VertexAILlama3Config` provides configuration for the VertexAI's Llama API interface. Below are the parameters:
|
||||||
|
|
||||||
|
- `max_tokens` Required (integer) max tokens,
|
||||||
|
|
||||||
|
Note: Please make sure to modify the default parameters as required for your use case.
|
||||||
|
"""
|
||||||
|
|
||||||
|
max_tokens: Optional[int] = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
) -> None:
|
||||||
|
locals_ = locals()
|
||||||
|
for key, value in locals_.items():
|
||||||
|
if key == "max_tokens" and value is None:
|
||||||
|
value = self.max_tokens
|
||||||
|
if key != "self" and value is not None:
|
||||||
|
setattr(self.__class__, key, value)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get_config(cls):
|
||||||
|
return {
|
||||||
|
k: v
|
||||||
|
for k, v in cls.__dict__.items()
|
||||||
|
if not k.startswith("__")
|
||||||
|
and not isinstance(
|
||||||
|
v,
|
||||||
|
(
|
||||||
|
types.FunctionType,
|
||||||
|
types.BuiltinFunctionType,
|
||||||
|
classmethod,
|
||||||
|
staticmethod,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
and v is not None
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_supported_openai_params(self):
|
||||||
|
return [
|
||||||
|
"max_tokens",
|
||||||
|
"stream",
|
||||||
|
]
|
||||||
|
|
||||||
|
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||||
|
for param, value in non_default_params.items():
|
||||||
|
if param == "max_tokens":
|
||||||
|
optional_params["max_tokens"] = value
|
||||||
|
|
||||||
|
return optional_params
|
||||||
|
|
||||||
|
|
||||||
|
class VertexAILlama3(BaseLLM):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def create_vertex_llama3_url(
|
||||||
|
self, vertex_location: str, vertex_project: str
|
||||||
|
) -> str:
|
||||||
|
return f"https://{vertex_location}-aiplatform.googleapis.com/v1beta1/projects/{vertex_project}/locations/{vertex_location}/endpoints/openapi"
|
||||||
|
|
||||||
|
def completion(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
messages: list,
|
||||||
|
model_response: ModelResponse,
|
||||||
|
print_verbose: Callable,
|
||||||
|
encoding,
|
||||||
|
logging_obj,
|
||||||
|
optional_params: dict,
|
||||||
|
custom_prompt_dict: dict,
|
||||||
|
headers: Optional[dict],
|
||||||
|
timeout: Union[float, httpx.Timeout],
|
||||||
|
vertex_project=None,
|
||||||
|
vertex_location=None,
|
||||||
|
vertex_credentials=None,
|
||||||
|
litellm_params=None,
|
||||||
|
logger_fn=None,
|
||||||
|
acompletion: bool = False,
|
||||||
|
client=None,
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
import vertexai
|
||||||
|
from google.cloud import aiplatform
|
||||||
|
|
||||||
|
from litellm.llms.openai import OpenAIChatCompletion
|
||||||
|
from litellm.llms.vertex_httpx import VertexLLM
|
||||||
|
except Exception:
|
||||||
|
|
||||||
|
raise VertexAIError(
|
||||||
|
status_code=400,
|
||||||
|
message="""vertexai import failed please run `pip install -U "google-cloud-aiplatform>=1.38"`""",
|
||||||
|
)
|
||||||
|
|
||||||
|
if not (
|
||||||
|
hasattr(vertexai, "preview") or hasattr(vertexai.preview, "language_models")
|
||||||
|
):
|
||||||
|
raise VertexAIError(
|
||||||
|
status_code=400,
|
||||||
|
message="""Upgrade vertex ai. Run `pip install "google-cloud-aiplatform>=1.38"`""",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
|
||||||
|
vertex_httpx_logic = VertexLLM()
|
||||||
|
|
||||||
|
access_token, project_id = vertex_httpx_logic._ensure_access_token(
|
||||||
|
credentials=vertex_credentials, project_id=vertex_project
|
||||||
|
)
|
||||||
|
|
||||||
|
openai_chat_completions = OpenAIChatCompletion()
|
||||||
|
|
||||||
|
## Load Config
|
||||||
|
# config = litellm.VertexAILlama3.get_config()
|
||||||
|
# for k, v in config.items():
|
||||||
|
# if k not in optional_params:
|
||||||
|
# optional_params[k] = v
|
||||||
|
|
||||||
|
## CONSTRUCT API BASE
|
||||||
|
stream: bool = optional_params.get("stream", False) or False
|
||||||
|
|
||||||
|
optional_params["stream"] = stream
|
||||||
|
|
||||||
|
api_base = self.create_vertex_llama3_url(
|
||||||
|
vertex_location=vertex_location or "us-central1",
|
||||||
|
vertex_project=vertex_project or project_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
return openai_chat_completions.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
api_base=api_base,
|
||||||
|
api_key=access_token,
|
||||||
|
custom_prompt_dict=custom_prompt_dict,
|
||||||
|
model_response=model_response,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
optional_params=optional_params,
|
||||||
|
acompletion=acompletion,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
client=client,
|
||||||
|
timeout=timeout,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise VertexAIError(status_code=500, message=str(e))
|
|
@ -1033,7 +1033,7 @@ class VertexLLM(BaseLLM):
|
||||||
model=model, custom_llm_provider=_custom_llm_provider
|
model=model, custom_llm_provider=_custom_llm_provider
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.error(
|
verbose_logger.warning(
|
||||||
"Unable to identify if system message supported. Defaulting to 'False'. Received error message - {}\nAdd it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
|
"Unable to identify if system message supported. Defaulting to 'False'. Received error message - {}\nAdd it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
|
||||||
str(e)
|
str(e)
|
||||||
)
|
)
|
||||||
|
@ -1189,7 +1189,7 @@ class VertexLLM(BaseLLM):
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except httpx.HTTPStatusError as err:
|
except httpx.HTTPStatusError as err:
|
||||||
error_code = err.response.status_code
|
error_code = err.response.status_code
|
||||||
raise VertexAIError(status_code=error_code, message=response.text)
|
raise VertexAIError(status_code=error_code, message=err.response.text)
|
||||||
except httpx.TimeoutException:
|
except httpx.TimeoutException:
|
||||||
raise VertexAIError(status_code=408, message="Timeout error occurred.")
|
raise VertexAIError(status_code=408, message="Timeout error occurred.")
|
||||||
|
|
||||||
|
|
|
@ -120,6 +120,7 @@ from .llms.prompt_templates.factory import (
|
||||||
)
|
)
|
||||||
from .llms.text_completion_codestral import CodestralTextCompletion
|
from .llms.text_completion_codestral import CodestralTextCompletion
|
||||||
from .llms.triton import TritonChatCompletion
|
from .llms.triton import TritonChatCompletion
|
||||||
|
from .llms.vertex_ai_llama import VertexAILlama3
|
||||||
from .llms.vertex_httpx import VertexLLM
|
from .llms.vertex_httpx import VertexLLM
|
||||||
from .llms.watsonx import IBMWatsonXAI
|
from .llms.watsonx import IBMWatsonXAI
|
||||||
from .types.llms.openai import HttpxBinaryResponseContent
|
from .types.llms.openai import HttpxBinaryResponseContent
|
||||||
|
@ -156,6 +157,7 @@ triton_chat_completions = TritonChatCompletion()
|
||||||
bedrock_chat_completion = BedrockLLM()
|
bedrock_chat_completion = BedrockLLM()
|
||||||
bedrock_converse_chat_completion = BedrockConverseLLM()
|
bedrock_converse_chat_completion = BedrockConverseLLM()
|
||||||
vertex_chat_completion = VertexLLM()
|
vertex_chat_completion = VertexLLM()
|
||||||
|
vertex_llama_chat_completion = VertexAILlama3()
|
||||||
watsonxai = IBMWatsonXAI()
|
watsonxai = IBMWatsonXAI()
|
||||||
####### COMPLETION ENDPOINTS ################
|
####### COMPLETION ENDPOINTS ################
|
||||||
|
|
||||||
|
@ -375,6 +377,7 @@ async def acompletion(
|
||||||
or custom_llm_provider == "predibase"
|
or custom_llm_provider == "predibase"
|
||||||
or custom_llm_provider == "bedrock"
|
or custom_llm_provider == "bedrock"
|
||||||
or custom_llm_provider == "databricks"
|
or custom_llm_provider == "databricks"
|
||||||
|
or custom_llm_provider == "triton"
|
||||||
or custom_llm_provider == "clarifai"
|
or custom_llm_provider == "clarifai"
|
||||||
or custom_llm_provider == "watsonx"
|
or custom_llm_provider == "watsonx"
|
||||||
or custom_llm_provider in litellm.openai_compatible_providers
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
|
@ -1491,6 +1494,10 @@ def completion(
|
||||||
or get_secret("ANTHROPIC_BASE_URL")
|
or get_secret("ANTHROPIC_BASE_URL")
|
||||||
or "https://api.anthropic.com/v1/complete"
|
or "https://api.anthropic.com/v1/complete"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if api_base is not None and not api_base.endswith("/v1/complete"):
|
||||||
|
api_base += "/v1/complete"
|
||||||
|
|
||||||
response = anthropic_text_completions.completion(
|
response = anthropic_text_completions.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -1517,6 +1524,10 @@ def completion(
|
||||||
or get_secret("ANTHROPIC_BASE_URL")
|
or get_secret("ANTHROPIC_BASE_URL")
|
||||||
or "https://api.anthropic.com/v1/messages"
|
or "https://api.anthropic.com/v1/messages"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if api_base is not None and not api_base.endswith("/v1/messages"):
|
||||||
|
api_base += "/v1/messages"
|
||||||
|
|
||||||
response = anthropic_chat_completions.completion(
|
response = anthropic_chat_completions.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -2055,7 +2066,26 @@ def completion(
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
client=client,
|
client=client,
|
||||||
)
|
)
|
||||||
|
elif model.startswith("meta/"):
|
||||||
|
model_response = vertex_llama_chat_completion.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
model_response=model_response,
|
||||||
|
print_verbose=print_verbose,
|
||||||
|
optional_params=new_params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
logger_fn=logger_fn,
|
||||||
|
encoding=encoding,
|
||||||
|
vertex_location=vertex_ai_location,
|
||||||
|
vertex_project=vertex_ai_project,
|
||||||
|
vertex_credentials=vertex_credentials,
|
||||||
|
logging_obj=logging,
|
||||||
|
acompletion=acompletion,
|
||||||
|
headers=headers,
|
||||||
|
custom_prompt_dict=custom_prompt_dict,
|
||||||
|
timeout=timeout,
|
||||||
|
client=client,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
model_response = vertex_ai.completion(
|
model_response = vertex_ai.completion(
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -2469,6 +2499,25 @@ def completion(
|
||||||
return generator
|
return generator
|
||||||
|
|
||||||
response = generator
|
response = generator
|
||||||
|
|
||||||
|
elif custom_llm_provider == "triton":
|
||||||
|
api_base = litellm.api_base or api_base
|
||||||
|
model_response = triton_chat_completions.completion(
|
||||||
|
api_base=api_base,
|
||||||
|
timeout=timeout, # type: ignore
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
model_response=model_response,
|
||||||
|
optional_params=optional_params,
|
||||||
|
logging_obj=logging,
|
||||||
|
stream=stream,
|
||||||
|
acompletion=acompletion,
|
||||||
|
)
|
||||||
|
|
||||||
|
## RESPONSE OBJECT
|
||||||
|
response = model_response
|
||||||
|
return response
|
||||||
|
|
||||||
elif custom_llm_provider == "cloudflare":
|
elif custom_llm_provider == "cloudflare":
|
||||||
api_key = (
|
api_key = (
|
||||||
api_key
|
api_key
|
||||||
|
|
|
@ -760,6 +760,36 @@
|
||||||
"litellm_provider": "azure_ai",
|
"litellm_provider": "azure_ai",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"azure_ai/Meta-Llama-31-8B-Instruct": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.0000003,
|
||||||
|
"output_cost_per_token": 0.00000061,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice"
|
||||||
|
},
|
||||||
|
"azure_ai/Meta-Llama-31-70B-Instruct": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.00000268,
|
||||||
|
"output_cost_per_token": 0.00000354,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice"
|
||||||
|
},
|
||||||
|
"azure_ai/Meta-Llama-31-405B-Instruct": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.00000533,
|
||||||
|
"output_cost_per_token": 0.000016,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice"
|
||||||
|
},
|
||||||
"babbage-002": {
|
"babbage-002": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_input_tokens": 16384,
|
"max_input_tokens": 16384,
|
||||||
|
@ -1948,6 +1978,16 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
|
"vertex_ai/meta/llama3-405b-instruct-maas": {
|
||||||
|
"max_tokens": 32000,
|
||||||
|
"max_input_tokens": 32000,
|
||||||
|
"max_output_tokens": 32000,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "vertex_ai-llama_models",
|
||||||
|
"mode": "chat",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
|
||||||
|
},
|
||||||
"vertex_ai/imagegeneration@006": {
|
"vertex_ai/imagegeneration@006": {
|
||||||
"cost_per_image": 0.020,
|
"cost_per_image": 0.020,
|
||||||
"litellm_provider": "vertex_ai-image-models",
|
"litellm_provider": "vertex_ai-image-models",
|
||||||
|
@ -3633,6 +3673,24 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"meta.llama3-1-8b-instruct-v1:0": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"input_cost_per_token": 0.0000004,
|
||||||
|
"output_cost_per_token": 0.0000006,
|
||||||
|
"litellm_provider": "bedrock",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"meta.llama3-1-70b-instruct-v1:0": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"input_cost_per_token": 0.00000265,
|
||||||
|
"output_cost_per_token": 0.0000035,
|
||||||
|
"litellm_provider": "bedrock",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
|
"512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
|
||||||
"max_tokens": 77,
|
"max_tokens": 77,
|
||||||
"max_input_tokens": 77,
|
"max_input_tokens": 77,
|
||||||
|
|
|
@ -1,5 +1,8 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: groq-llama3
|
- model_name: "*" # all requests where model not in your config go to this deployment
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: groq/llama3-groq-70b-8192-tool-use-preview
|
model: "openai/*" # passes our validation check that a real provider is given
|
||||||
api_key: os.environ/GROQ_API_KEY
|
api_key: ""
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
completion_model: "gpt-3.5-turbo"
|
|
@ -228,6 +228,10 @@ class LiteLLMRoutes(enum.Enum):
|
||||||
"/utils/token_counter",
|
"/utils/token_counter",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
anthropic_routes: List = [
|
||||||
|
"/v1/messages",
|
||||||
|
]
|
||||||
|
|
||||||
info_routes: List = [
|
info_routes: List = [
|
||||||
"/key/info",
|
"/key/info",
|
||||||
"/team/info",
|
"/team/info",
|
||||||
|
@ -880,6 +884,26 @@ class BlockTeamRequest(LiteLLMBase):
|
||||||
team_id: str # required
|
team_id: str # required
|
||||||
|
|
||||||
|
|
||||||
|
class AddTeamCallback(LiteLLMBase):
|
||||||
|
callback_name: str
|
||||||
|
callback_type: Literal["success", "failure", "success_and_failure"]
|
||||||
|
# for now - only supported for langfuse
|
||||||
|
callback_vars: Dict[
|
||||||
|
Literal["langfuse_public_key", "langfuse_secret_key", "langfuse_host"], str
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TeamCallbackMetadata(LiteLLMBase):
|
||||||
|
success_callback: Optional[List[str]] = []
|
||||||
|
failure_callback: Optional[List[str]] = []
|
||||||
|
# for now - only supported for langfuse
|
||||||
|
callback_vars: Optional[
|
||||||
|
Dict[
|
||||||
|
Literal["langfuse_public_key", "langfuse_secret_key", "langfuse_host"], str
|
||||||
|
]
|
||||||
|
] = {}
|
||||||
|
|
||||||
|
|
||||||
class LiteLLM_TeamTable(TeamBase):
|
class LiteLLM_TeamTable(TeamBase):
|
||||||
spend: Optional[float] = None
|
spend: Optional[float] = None
|
||||||
max_parallel_requests: Optional[int] = None
|
max_parallel_requests: Optional[int] = None
|
||||||
|
@ -1232,6 +1256,7 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
|
||||||
soft_budget: Optional[float] = None
|
soft_budget: Optional[float] = None
|
||||||
team_model_aliases: Optional[Dict] = None
|
team_model_aliases: Optional[Dict] = None
|
||||||
team_member_spend: Optional[float] = None
|
team_member_spend: Optional[float] = None
|
||||||
|
team_metadata: Optional[Dict] = None
|
||||||
|
|
||||||
# End User Params
|
# End User Params
|
||||||
end_user_id: Optional[str] = None
|
end_user_id: Optional[str] = None
|
||||||
|
@ -1677,3 +1702,5 @@ class ProxyErrorTypes(str, enum.Enum):
|
||||||
budget_exceeded = "budget_exceeded"
|
budget_exceeded = "budget_exceeded"
|
||||||
expired_key = "expired_key"
|
expired_key = "expired_key"
|
||||||
auth_error = "auth_error"
|
auth_error = "auth_error"
|
||||||
|
internal_server_error = "internal_server_error"
|
||||||
|
bad_request_error = "bad_request_error"
|
||||||
|
|
|
@ -24,7 +24,7 @@ from litellm.proxy._types import (
|
||||||
LitellmUserRoles,
|
LitellmUserRoles,
|
||||||
UserAPIKeyAuth,
|
UserAPIKeyAuth,
|
||||||
)
|
)
|
||||||
from litellm.proxy.auth.auth_utils import is_openai_route
|
from litellm.proxy.auth.auth_utils import is_llm_api_route
|
||||||
from litellm.proxy.utils import PrismaClient, ProxyLogging, log_to_opentelemetry
|
from litellm.proxy.utils import PrismaClient, ProxyLogging, log_to_opentelemetry
|
||||||
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
from litellm.types.services import ServiceLoggerPayload, ServiceTypes
|
||||||
|
|
||||||
|
@ -57,6 +57,7 @@ def common_checks(
|
||||||
4. If end_user (either via JWT or 'user' passed to /chat/completions, /embeddings endpoint) is in budget
|
4. If end_user (either via JWT or 'user' passed to /chat/completions, /embeddings endpoint) is in budget
|
||||||
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
|
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
|
||||||
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
|
||||||
|
7. [OPTIONAL] If guardrails modified - is request allowed to change this
|
||||||
"""
|
"""
|
||||||
_model = request_body.get("model", None)
|
_model = request_body.get("model", None)
|
||||||
if team_object is not None and team_object.blocked is True:
|
if team_object is not None and team_object.blocked is True:
|
||||||
|
@ -106,7 +107,7 @@ def common_checks(
|
||||||
general_settings.get("enforce_user_param", None) is not None
|
general_settings.get("enforce_user_param", None) is not None
|
||||||
and general_settings["enforce_user_param"] == True
|
and general_settings["enforce_user_param"] == True
|
||||||
):
|
):
|
||||||
if is_openai_route(route=route) and "user" not in request_body:
|
if is_llm_api_route(route=route) and "user" not in request_body:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
|
||||||
)
|
)
|
||||||
|
@ -122,7 +123,7 @@ def common_checks(
|
||||||
+ CommonProxyErrors.not_premium_user.value
|
+ CommonProxyErrors.not_premium_user.value
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_openai_route(route=route):
|
if is_llm_api_route(route=route):
|
||||||
# loop through each enforced param
|
# loop through each enforced param
|
||||||
# example enforced_params ['user', 'metadata', 'metadata.generation_name']
|
# example enforced_params ['user', 'metadata', 'metadata.generation_name']
|
||||||
for enforced_param in general_settings["enforced_params"]:
|
for enforced_param in general_settings["enforced_params"]:
|
||||||
|
@ -150,7 +151,7 @@ def common_checks(
|
||||||
and global_proxy_spend is not None
|
and global_proxy_spend is not None
|
||||||
# only run global budget checks for OpenAI routes
|
# only run global budget checks for OpenAI routes
|
||||||
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
|
# Reason - the Admin UI should continue working if the proxy crosses it's global budget
|
||||||
and is_openai_route(route=route)
|
and is_llm_api_route(route=route)
|
||||||
and route != "/v1/models"
|
and route != "/v1/models"
|
||||||
and route != "/models"
|
and route != "/models"
|
||||||
):
|
):
|
||||||
|
@ -158,6 +159,22 @@ def common_checks(
|
||||||
raise litellm.BudgetExceededError(
|
raise litellm.BudgetExceededError(
|
||||||
current_cost=global_proxy_spend, max_budget=litellm.max_budget
|
current_cost=global_proxy_spend, max_budget=litellm.max_budget
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_request_metadata: dict = request_body.get("metadata", {}) or {}
|
||||||
|
if _request_metadata.get("guardrails"):
|
||||||
|
# check if team allowed to modify guardrails
|
||||||
|
from litellm.proxy.guardrails.guardrail_helpers import can_modify_guardrails
|
||||||
|
|
||||||
|
can_modify: bool = can_modify_guardrails(team_object)
|
||||||
|
if can_modify is False:
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=403,
|
||||||
|
detail={
|
||||||
|
"error": "Your team does not have permission to modify guardrails."
|
||||||
|
},
|
||||||
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ def route_in_additonal_public_routes(current_route: str):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_openai_route(route: str) -> bool:
|
def is_llm_api_route(route: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Helper to checks if provided route is an OpenAI route
|
Helper to checks if provided route is an OpenAI route
|
||||||
|
|
||||||
|
@ -59,6 +59,9 @@ def is_openai_route(route: str) -> bool:
|
||||||
if route in LiteLLMRoutes.openai_routes.value:
|
if route in LiteLLMRoutes.openai_routes.value:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
if route in LiteLLMRoutes.anthropic_routes.value:
|
||||||
|
return True
|
||||||
|
|
||||||
# fuzzy match routes like "/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ"
|
# fuzzy match routes like "/v1/threads/thread_49EIN5QF32s4mH20M7GFKdlZ"
|
||||||
# Check for routes with placeholders
|
# Check for routes with placeholders
|
||||||
for openai_route in LiteLLMRoutes.openai_routes.value:
|
for openai_route in LiteLLMRoutes.openai_routes.value:
|
||||||
|
|
|
@ -57,7 +57,7 @@ from litellm.proxy.auth.auth_checks import (
|
||||||
log_to_opentelemetry,
|
log_to_opentelemetry,
|
||||||
)
|
)
|
||||||
from litellm.proxy.auth.auth_utils import (
|
from litellm.proxy.auth.auth_utils import (
|
||||||
is_openai_route,
|
is_llm_api_route,
|
||||||
route_in_additonal_public_routes,
|
route_in_additonal_public_routes,
|
||||||
)
|
)
|
||||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||||
|
@ -924,6 +924,7 @@ async def user_api_key_auth(
|
||||||
rpm_limit=valid_token.team_rpm_limit,
|
rpm_limit=valid_token.team_rpm_limit,
|
||||||
blocked=valid_token.team_blocked,
|
blocked=valid_token.team_blocked,
|
||||||
models=valid_token.team_models,
|
models=valid_token.team_models,
|
||||||
|
metadata=valid_token.team_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
user_api_key_cache.set_cache(
|
user_api_key_cache.set_cache(
|
||||||
|
@ -994,9 +995,9 @@ async def user_api_key_auth(
|
||||||
_user_role = _get_user_role(user_id_information=user_id_information)
|
_user_role = _get_user_role(user_id_information=user_id_information)
|
||||||
|
|
||||||
if not _is_user_proxy_admin(user_id_information): # if non-admin
|
if not _is_user_proxy_admin(user_id_information): # if non-admin
|
||||||
if is_openai_route(route=route):
|
if is_llm_api_route(route=route):
|
||||||
pass
|
pass
|
||||||
elif is_openai_route(route=request["route"].name):
|
elif is_llm_api_route(route=request["route"].name):
|
||||||
pass
|
pass
|
||||||
elif (
|
elif (
|
||||||
route in LiteLLMRoutes.info_routes.value
|
route in LiteLLMRoutes.info_routes.value
|
||||||
|
@ -1049,7 +1050,7 @@ async def user_api_key_auth(
|
||||||
|
|
||||||
pass
|
pass
|
||||||
elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
|
elif _user_role == LitellmUserRoles.PROXY_ADMIN_VIEW_ONLY.value:
|
||||||
if is_openai_route(route=route):
|
if is_llm_api_route(route=route):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
detail=f"user not allowed to access this OpenAI routes, role= {_user_role}",
|
detail=f"user not allowed to access this OpenAI routes, role= {_user_role}",
|
||||||
|
|
|
@ -23,11 +23,11 @@ def initialize_callbacks_on_proxy(
|
||||||
)
|
)
|
||||||
if isinstance(value, list):
|
if isinstance(value, list):
|
||||||
imported_list: List[Any] = []
|
imported_list: List[Any] = []
|
||||||
known_compatible_callbacks = list(
|
|
||||||
get_args(litellm._custom_logger_compatible_callbacks_literal)
|
|
||||||
)
|
|
||||||
for callback in value: # ["presidio", <my-custom-callback>]
|
for callback in value: # ["presidio", <my-custom-callback>]
|
||||||
if isinstance(callback, str) and callback in known_compatible_callbacks:
|
if (
|
||||||
|
isinstance(callback, str)
|
||||||
|
and callback in litellm._known_custom_logger_compatible_callbacks
|
||||||
|
):
|
||||||
imported_list.append(callback)
|
imported_list.append(callback)
|
||||||
elif isinstance(callback, str) and callback == "otel":
|
elif isinstance(callback, str) and callback == "otel":
|
||||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||||
|
|
|
@ -1,9 +1,26 @@
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm.proxy.proxy_server import UserAPIKeyAuth
|
from litellm.proxy.proxy_server import LiteLLM_TeamTable, UserAPIKeyAuth
|
||||||
from litellm.types.guardrails import *
|
from litellm.types.guardrails import *
|
||||||
|
|
||||||
|
|
||||||
|
def can_modify_guardrails(team_obj: Optional[LiteLLM_TeamTable]) -> bool:
|
||||||
|
if team_obj is None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
team_metadata = team_obj.metadata or {}
|
||||||
|
|
||||||
|
if team_metadata.get("guardrails", None) is not None and isinstance(
|
||||||
|
team_metadata.get("guardrails"), Dict
|
||||||
|
):
|
||||||
|
if team_metadata.get("guardrails", {}).get("modify_guardrails", None) is False:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
async def should_proceed_based_on_metadata(data: dict, guardrail_name: str) -> bool:
|
async def should_proceed_based_on_metadata(data: dict, guardrail_name: str) -> bool:
|
||||||
"""
|
"""
|
||||||
checks if this guardrail should be applied to this call
|
checks if this guardrail should be applied to this call
|
||||||
|
|
|
@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
|
|
||||||
from litellm._logging import verbose_logger, verbose_proxy_logger
|
from litellm._logging import verbose_logger, verbose_proxy_logger
|
||||||
from litellm.proxy._types import CommonProxyErrors, UserAPIKeyAuth
|
from litellm.proxy._types import CommonProxyErrors, TeamCallbackMetadata, UserAPIKeyAuth
|
||||||
from litellm.types.utils import SupportedCacheControls
|
from litellm.types.utils import SupportedCacheControls
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
|
@ -39,6 +39,9 @@ def _get_metadata_variable_name(request: Request) -> str:
|
||||||
"""
|
"""
|
||||||
if "thread" in request.url.path or "assistant" in request.url.path:
|
if "thread" in request.url.path or "assistant" in request.url.path:
|
||||||
return "litellm_metadata"
|
return "litellm_metadata"
|
||||||
|
if "/v1/messages" in request.url.path:
|
||||||
|
# anthropic API has a field called metadata
|
||||||
|
return "litellm_metadata"
|
||||||
else:
|
else:
|
||||||
return "metadata"
|
return "metadata"
|
||||||
|
|
||||||
|
@ -207,6 +210,32 @@ async def add_litellm_data_to_request(
|
||||||
**data,
|
**data,
|
||||||
} # add the team-specific configs to the completion call
|
} # add the team-specific configs to the completion call
|
||||||
|
|
||||||
|
# Team Callbacks controls
|
||||||
|
if user_api_key_dict.team_metadata is not None:
|
||||||
|
team_metadata = user_api_key_dict.team_metadata
|
||||||
|
if "callback_settings" in team_metadata:
|
||||||
|
callback_settings = team_metadata.get("callback_settings", None) or {}
|
||||||
|
callback_settings_obj = TeamCallbackMetadata(**callback_settings)
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
"Team callback settings activated: %s", callback_settings_obj
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
callback_settings = {
|
||||||
|
{
|
||||||
|
'callback_vars': {'langfuse_public_key': 'pk', 'langfuse_secret_key': 'sk_'},
|
||||||
|
'failure_callback': [],
|
||||||
|
'success_callback': ['langfuse', 'langfuse']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
data["success_callback"] = callback_settings_obj.success_callback
|
||||||
|
data["failure_callback"] = callback_settings_obj.failure_callback
|
||||||
|
|
||||||
|
if callback_settings_obj.callback_vars is not None:
|
||||||
|
# unpack callback_vars in data
|
||||||
|
for k, v in callback_settings_obj.callback_vars.items():
|
||||||
|
data[k] = v
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -333,6 +333,13 @@ async def update_key_fn(
|
||||||
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
expires = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
non_default_values["expires"] = expires
|
non_default_values["expires"] = expires
|
||||||
|
|
||||||
|
if "budget_duration" in non_default_values:
|
||||||
|
duration_s = _duration_in_seconds(
|
||||||
|
duration=non_default_values["budget_duration"]
|
||||||
|
)
|
||||||
|
key_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
|
||||||
|
non_default_values["budget_reset_at"] = key_reset_at
|
||||||
|
|
||||||
response = await prisma_client.update_data(
|
response = await prisma_client.update_data(
|
||||||
token=key, data={**non_default_values, "token": key}
|
token=key, data={**non_default_values, "token": key}
|
||||||
)
|
)
|
||||||
|
|
364
litellm/proxy/management_endpoints/team_callback_endpoints.py
Normal file
364
litellm/proxy/management_endpoints/team_callback_endpoints.py
Normal file
|
@ -0,0 +1,364 @@
|
||||||
|
"""
|
||||||
|
Endpoints to control callbacks per team
|
||||||
|
|
||||||
|
Use this when each team should control its own callbacks
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import copy
|
||||||
|
import json
|
||||||
|
import traceback
|
||||||
|
import uuid
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import fastapi
|
||||||
|
from fastapi import APIRouter, Depends, Header, HTTPException, Request, status
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.proxy._types import (
|
||||||
|
AddTeamCallback,
|
||||||
|
LiteLLM_TeamTable,
|
||||||
|
ProxyErrorTypes,
|
||||||
|
ProxyException,
|
||||||
|
TeamCallbackMetadata,
|
||||||
|
UserAPIKeyAuth,
|
||||||
|
)
|
||||||
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
|
from litellm.proxy.management_helpers.utils import (
|
||||||
|
add_new_member,
|
||||||
|
management_endpoint_wrapper,
|
||||||
|
)
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/team/{team_id:path}/callback",
|
||||||
|
tags=["team management"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
|
async def add_team_callbacks(
|
||||||
|
data: AddTeamCallback,
|
||||||
|
http_request: Request,
|
||||||
|
team_id: str,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
litellm_changed_by: Optional[str] = Header(
|
||||||
|
None,
|
||||||
|
description="The litellm-changed-by header enables tracking of actions performed by authorized users on behalf of other users, providing an audit trail for accountability",
|
||||||
|
),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Add a success/failure callback to a team
|
||||||
|
|
||||||
|
Use this if if you want different teams to have different success/failure callbacks
|
||||||
|
|
||||||
|
Example curl:
|
||||||
|
```
|
||||||
|
curl -X POST 'http:/localhost:4000/team/dbe2f686-a686-4896-864a-4c3924458709/callback' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"callback_name": "langfuse",
|
||||||
|
"callback_type": "success",
|
||||||
|
"callback_vars": {"langfuse_public_key": "pk-lf-xxxx1", "langfuse_secret_key": "sk-xxxxx"}
|
||||||
|
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
This means for the team where team_id = dbe2f686-a686-4896-864a-4c3924458709, all LLM calls will be logged to langfuse using the public key pk-lf-xxxx1 and the secret key sk-xxxxx
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from litellm.proxy.proxy_server import (
|
||||||
|
_duration_in_seconds,
|
||||||
|
create_audit_log_for_update,
|
||||||
|
litellm_proxy_admin_name,
|
||||||
|
prisma_client,
|
||||||
|
)
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||||
|
|
||||||
|
# Check if team_id exists already
|
||||||
|
_existing_team = await prisma_client.get_data(
|
||||||
|
team_id=team_id, table_name="team", query_type="find_unique"
|
||||||
|
)
|
||||||
|
if _existing_team is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={
|
||||||
|
"error": f"Team id = {team_id} does not exist. Please use a different team id."
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# store team callback settings in metadata
|
||||||
|
team_metadata = _existing_team.metadata
|
||||||
|
team_callback_settings = team_metadata.get("callback_settings", {})
|
||||||
|
# expect callback settings to be
|
||||||
|
team_callback_settings_obj = TeamCallbackMetadata(**team_callback_settings)
|
||||||
|
if data.callback_type == "success":
|
||||||
|
if team_callback_settings_obj.success_callback is None:
|
||||||
|
team_callback_settings_obj.success_callback = []
|
||||||
|
|
||||||
|
if data.callback_name in team_callback_settings_obj.success_callback:
|
||||||
|
raise ProxyException(
|
||||||
|
message=f"callback_name = {data.callback_name} already exists in failure_callback, for team_id = {team_id}. \n Existing failure_callback = {team_callback_settings_obj.success_callback}",
|
||||||
|
code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
type=ProxyErrorTypes.bad_request_error,
|
||||||
|
param="callback_name",
|
||||||
|
)
|
||||||
|
|
||||||
|
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||||
|
elif data.callback_type == "failure":
|
||||||
|
if team_callback_settings_obj.failure_callback is None:
|
||||||
|
team_callback_settings_obj.failure_callback = []
|
||||||
|
|
||||||
|
if data.callback_name in team_callback_settings_obj.failure_callback:
|
||||||
|
raise ProxyException(
|
||||||
|
message=f"callback_name = {data.callback_name} already exists in failure_callback, for team_id = {team_id}. \n Existing failure_callback = {team_callback_settings_obj.failure_callback}",
|
||||||
|
code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
type=ProxyErrorTypes.bad_request_error,
|
||||||
|
param="callback_name",
|
||||||
|
)
|
||||||
|
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||||
|
elif data.callback_type == "success_and_failure":
|
||||||
|
if team_callback_settings_obj.success_callback is None:
|
||||||
|
team_callback_settings_obj.success_callback = []
|
||||||
|
if team_callback_settings_obj.failure_callback is None:
|
||||||
|
team_callback_settings_obj.failure_callback = []
|
||||||
|
if data.callback_name in team_callback_settings_obj.success_callback:
|
||||||
|
raise ProxyException(
|
||||||
|
message=f"callback_name = {data.callback_name} already exists in success_callback, for team_id = {team_id}. \n Existing success_callback = {team_callback_settings_obj.success_callback}",
|
||||||
|
code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
type=ProxyErrorTypes.bad_request_error,
|
||||||
|
param="callback_name",
|
||||||
|
)
|
||||||
|
|
||||||
|
if data.callback_name in team_callback_settings_obj.failure_callback:
|
||||||
|
raise ProxyException(
|
||||||
|
message=f"callback_name = {data.callback_name} already exists in failure_callback, for team_id = {team_id}. \n Existing failure_callback = {team_callback_settings_obj.failure_callback}",
|
||||||
|
code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
type=ProxyErrorTypes.bad_request_error,
|
||||||
|
param="callback_name",
|
||||||
|
)
|
||||||
|
|
||||||
|
team_callback_settings_obj.success_callback.append(data.callback_name)
|
||||||
|
team_callback_settings_obj.failure_callback.append(data.callback_name)
|
||||||
|
for var, value in data.callback_vars.items():
|
||||||
|
if team_callback_settings_obj.callback_vars is None:
|
||||||
|
team_callback_settings_obj.callback_vars = {}
|
||||||
|
team_callback_settings_obj.callback_vars[var] = value
|
||||||
|
|
||||||
|
team_callback_settings_obj_dict = team_callback_settings_obj.model_dump()
|
||||||
|
|
||||||
|
team_metadata["callback_settings"] = team_callback_settings_obj_dict
|
||||||
|
team_metadata_json = json.dumps(team_metadata) # update team_metadata
|
||||||
|
|
||||||
|
new_team_row = await prisma_client.db.litellm_teamtable.update(
|
||||||
|
where={"team_id": team_id}, data={"metadata": team_metadata_json} # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"data": new_team_row,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.add_team_callbacks(): Exception occured - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
|
if isinstance(e, HTTPException):
|
||||||
|
raise ProxyException(
|
||||||
|
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
||||||
|
type=ProxyErrorTypes.internal_server_error.value,
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||||
|
)
|
||||||
|
elif isinstance(e, ProxyException):
|
||||||
|
raise e
|
||||||
|
raise ProxyException(
|
||||||
|
message="Internal Server Error, " + str(e),
|
||||||
|
type=ProxyErrorTypes.internal_server_error.value,
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post(
|
||||||
|
"/team/{team_id}/disable_logging",
|
||||||
|
tags=["team management"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
|
async def disable_team_logging(
|
||||||
|
http_request: Request,
|
||||||
|
team_id: str,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
from litellm.proxy.proxy_server import prisma_client
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||||
|
|
||||||
|
# Check if team exists
|
||||||
|
_existing_team = await prisma_client.get_data(
|
||||||
|
team_id=team_id, table_name="team", query_type="find_unique"
|
||||||
|
)
|
||||||
|
if _existing_team is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail={"error": f"Team id = {team_id} does not exist."},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update team metadata to disable logging
|
||||||
|
team_metadata = _existing_team.metadata
|
||||||
|
team_callback_settings = team_metadata.get("callback_settings", {})
|
||||||
|
team_callback_settings_obj = TeamCallbackMetadata(**team_callback_settings)
|
||||||
|
|
||||||
|
# Reset callbacks
|
||||||
|
team_callback_settings_obj.success_callback = []
|
||||||
|
team_callback_settings_obj.failure_callback = []
|
||||||
|
|
||||||
|
# Update metadata
|
||||||
|
team_metadata["callback_settings"] = team_callback_settings_obj.model_dump()
|
||||||
|
team_metadata_json = json.dumps(team_metadata)
|
||||||
|
|
||||||
|
# Update team in database
|
||||||
|
updated_team = await prisma_client.db.litellm_teamtable.update(
|
||||||
|
where={"team_id": team_id}, data={"metadata": team_metadata_json} # type: ignore
|
||||||
|
)
|
||||||
|
|
||||||
|
if updated_team is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail={
|
||||||
|
"error": f"Team id = {team_id} does not exist. Error updating team logging"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"message": f"Logging disabled for team {team_id}",
|
||||||
|
"data": {
|
||||||
|
"team_id": updated_team.team_id,
|
||||||
|
"success_callbacks": [],
|
||||||
|
"failure_callbacks": [],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.error(
|
||||||
|
f"litellm.proxy.proxy_server.disable_team_logging(): Exception occurred - {str(e)}"
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
|
if isinstance(e, HTTPException):
|
||||||
|
raise ProxyException(
|
||||||
|
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
||||||
|
type=ProxyErrorTypes.internal_server_error.value,
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||||
|
)
|
||||||
|
elif isinstance(e, ProxyException):
|
||||||
|
raise e
|
||||||
|
raise ProxyException(
|
||||||
|
message="Internal Server Error, " + str(e),
|
||||||
|
type=ProxyErrorTypes.internal_server_error.value,
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get(
|
||||||
|
"/team/{team_id:path}/callback",
|
||||||
|
tags=["team management"],
|
||||||
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
|
)
|
||||||
|
@management_endpoint_wrapper
|
||||||
|
async def get_team_callbacks(
|
||||||
|
http_request: Request,
|
||||||
|
team_id: str,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Get the success/failure callbacks and variables for a team
|
||||||
|
|
||||||
|
Example curl:
|
||||||
|
```
|
||||||
|
curl -X GET 'http://localhost:4000/team/dbe2f686-a686-4896-864a-4c3924458709/callback' \
|
||||||
|
-H 'Authorization: Bearer sk-1234'
|
||||||
|
```
|
||||||
|
|
||||||
|
This will return the callback settings for the team with id dbe2f686-a686-4896-864a-4c3924458709
|
||||||
|
|
||||||
|
Returns {
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"team_id": team_id,
|
||||||
|
"success_callbacks": team_callback_settings_obj.success_callback,
|
||||||
|
"failure_callbacks": team_callback_settings_obj.failure_callback,
|
||||||
|
"callback_vars": team_callback_settings_obj.callback_vars,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from litellm.proxy.proxy_server import prisma_client
|
||||||
|
|
||||||
|
if prisma_client is None:
|
||||||
|
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||||
|
|
||||||
|
# Check if team_id exists
|
||||||
|
_existing_team = await prisma_client.get_data(
|
||||||
|
team_id=team_id, table_name="team", query_type="find_unique"
|
||||||
|
)
|
||||||
|
if _existing_team is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail={"error": f"Team id = {team_id} does not exist."},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Retrieve team callback settings from metadata
|
||||||
|
team_metadata = _existing_team.metadata
|
||||||
|
team_callback_settings = team_metadata.get("callback_settings", {})
|
||||||
|
|
||||||
|
# Convert to TeamCallbackMetadata object for consistent structure
|
||||||
|
team_callback_settings_obj = TeamCallbackMetadata(**team_callback_settings)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"data": {
|
||||||
|
"team_id": team_id,
|
||||||
|
"success_callbacks": team_callback_settings_obj.success_callback,
|
||||||
|
"failure_callbacks": team_callback_settings_obj.failure_callback,
|
||||||
|
"callback_vars": team_callback_settings_obj.callback_vars,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.error(
|
||||||
|
"litellm.proxy.proxy_server.get_team_callbacks(): Exception occurred - {}".format(
|
||||||
|
str(e)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(traceback.format_exc())
|
||||||
|
if isinstance(e, HTTPException):
|
||||||
|
raise ProxyException(
|
||||||
|
message=getattr(e, "detail", f"Internal Server Error({str(e)})"),
|
||||||
|
type=ProxyErrorTypes.internal_server_error.value,
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=getattr(e, "status_code", status.HTTP_500_INTERNAL_SERVER_ERROR),
|
||||||
|
)
|
||||||
|
elif isinstance(e, ProxyException):
|
||||||
|
raise e
|
||||||
|
raise ProxyException(
|
||||||
|
message="Internal Server Error, " + str(e),
|
||||||
|
type=ProxyErrorTypes.internal_server_error.value,
|
||||||
|
param=getattr(e, "param", "None"),
|
||||||
|
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
)
|
|
@ -363,6 +363,7 @@ async def update_team(
|
||||||
# set the budget_reset_at in DB
|
# set the budget_reset_at in DB
|
||||||
updated_kv["budget_reset_at"] = reset_at
|
updated_kv["budget_reset_at"] = reset_at
|
||||||
|
|
||||||
|
updated_kv = prisma_client.jsonify_object(data=updated_kv)
|
||||||
team_row: Optional[
|
team_row: Optional[
|
||||||
LiteLLM_TeamTable
|
LiteLLM_TeamTable
|
||||||
] = await prisma_client.db.litellm_teamtable.update(
|
] = await prisma_client.db.litellm_teamtable.update(
|
||||||
|
|
|
@ -1,10 +1,21 @@
|
||||||
model_list:
|
model_list:
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
- model_name: fireworks-llama-v3-70b-instruct
|
- model_name: fireworks-llama-v3-70b-instruct
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
||||||
api_key: "os.environ/FIREWORKS_AI_API_KEY"
|
api_key: "os.environ/FIREWORKS"
|
||||||
|
|
||||||
router_settings:
|
|
||||||
enable_tag_filtering: True # 👈 Key Change
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
|
alerting: ["slack"]
|
||||||
|
alerting_threshold: 0.0001
|
||||||
|
alert_to_webhook_url: {
|
||||||
|
"llm_too_slow": "https://hooks.slack.com/services/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",
|
||||||
|
"llm_requests_hanging": "https://hooks.slack.com/services/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",
|
||||||
|
}
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["langfuse"]
|
|
@ -170,6 +170,9 @@ from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||||
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
from litellm.proxy.management_endpoints.key_management_endpoints import (
|
||||||
router as key_management_router,
|
router as key_management_router,
|
||||||
)
|
)
|
||||||
|
from litellm.proxy.management_endpoints.team_callback_endpoints import (
|
||||||
|
router as team_callback_router,
|
||||||
|
)
|
||||||
from litellm.proxy.management_endpoints.team_endpoints import router as team_router
|
from litellm.proxy.management_endpoints.team_endpoints import router as team_router
|
||||||
from litellm.proxy.openai_files_endpoints.files_endpoints import (
|
from litellm.proxy.openai_files_endpoints.files_endpoints import (
|
||||||
router as openai_files_router,
|
router as openai_files_router,
|
||||||
|
@ -654,7 +657,11 @@ async def _PROXY_track_cost_callback(
|
||||||
global prisma_client, custom_db_client
|
global prisma_client, custom_db_client
|
||||||
try:
|
try:
|
||||||
# check if it has collected an entire stream response
|
# check if it has collected an entire stream response
|
||||||
verbose_proxy_logger.debug("Proxy: In track_cost_callback for: %s", kwargs)
|
verbose_proxy_logger.debug(
|
||||||
|
"Proxy: In track_cost_callback for: kwargs=%s and completion_response: %s",
|
||||||
|
kwargs,
|
||||||
|
completion_response,
|
||||||
|
)
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
|
f"kwargs stream: {kwargs.get('stream', None)} + complete streaming response: {kwargs.get('complete_streaming_response', None)}"
|
||||||
)
|
)
|
||||||
|
@ -1620,6 +1627,7 @@ class ProxyConfig:
|
||||||
alerting=general_settings.get("alerting", None),
|
alerting=general_settings.get("alerting", None),
|
||||||
alerting_threshold=general_settings.get("alerting_threshold", 600),
|
alerting_threshold=general_settings.get("alerting_threshold", 600),
|
||||||
alert_types=general_settings.get("alert_types", None),
|
alert_types=general_settings.get("alert_types", None),
|
||||||
|
alert_to_webhook_url=general_settings.get("alert_to_webhook_url", None),
|
||||||
alerting_args=general_settings.get("alerting_args", None),
|
alerting_args=general_settings.get("alerting_args", None),
|
||||||
redis_cache=redis_usage_cache,
|
redis_cache=redis_usage_cache,
|
||||||
)
|
)
|
||||||
|
@ -2905,6 +2913,7 @@ async def chat_completion(
|
||||||
fastest_response_batch_completion = hidden_params.get(
|
fastest_response_batch_completion = hidden_params.get(
|
||||||
"fastest_response_batch_completion", None
|
"fastest_response_batch_completion", None
|
||||||
)
|
)
|
||||||
|
additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
|
||||||
|
|
||||||
# Post Call Processing
|
# Post Call Processing
|
||||||
if llm_router is not None:
|
if llm_router is not None:
|
||||||
|
@ -2927,6 +2936,7 @@ async def chat_completion(
|
||||||
response_cost=response_cost,
|
response_cost=response_cost,
|
||||||
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
model_region=getattr(user_api_key_dict, "allowed_model_region", ""),
|
||||||
fastest_response_batch_completion=fastest_response_batch_completion,
|
fastest_response_batch_completion=fastest_response_batch_completion,
|
||||||
|
**additional_headers,
|
||||||
)
|
)
|
||||||
selected_data_generator = select_data_generator(
|
selected_data_generator = select_data_generator(
|
||||||
response=response,
|
response=response,
|
||||||
|
@ -2944,8 +2954,10 @@ async def chat_completion(
|
||||||
user_api_key_dict=user_api_key_dict, response=response
|
user_api_key_dict=user_api_key_dict, response=response
|
||||||
)
|
)
|
||||||
|
|
||||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
hidden_params = (
|
||||||
additional_headers: dict = hidden_params.get("additional_headers", {}) or {}
|
getattr(response, "_hidden_params", {}) or {}
|
||||||
|
) # get any updated response headers
|
||||||
|
additional_headers = hidden_params.get("additional_headers", {}) or {}
|
||||||
|
|
||||||
fastapi_response.headers.update(
|
fastapi_response.headers.update(
|
||||||
get_custom_headers(
|
get_custom_headers(
|
||||||
|
@ -9457,3 +9469,4 @@ app.include_router(analytics_router)
|
||||||
app.include_router(debugging_endpoints_router)
|
app.include_router(debugging_endpoints_router)
|
||||||
app.include_router(ui_crud_endpoints_router)
|
app.include_router(ui_crud_endpoints_router)
|
||||||
app.include_router(openai_files_router)
|
app.include_router(openai_files_router)
|
||||||
|
app.include_router(team_callback_router)
|
||||||
|
|
|
@ -183,12 +183,12 @@ model LiteLLM_SpendLogs {
|
||||||
model String @default("")
|
model String @default("")
|
||||||
model_id String? @default("") // the model id stored in proxy model db
|
model_id String? @default("") // the model id stored in proxy model db
|
||||||
model_group String? @default("") // public model_name / model_group
|
model_group String? @default("") // public model_name / model_group
|
||||||
api_base String @default("")
|
api_base String? @default("")
|
||||||
user String @default("")
|
user String? @default("")
|
||||||
metadata Json @default("{}")
|
metadata Json? @default("{}")
|
||||||
cache_hit String @default("")
|
cache_hit String? @default("")
|
||||||
cache_key String @default("")
|
cache_key String? @default("")
|
||||||
request_tags Json @default("[]")
|
request_tags Json? @default("[]")
|
||||||
team_id String?
|
team_id String?
|
||||||
end_user String?
|
end_user String?
|
||||||
requester_ip_address String?
|
requester_ip_address String?
|
||||||
|
@ -257,4 +257,4 @@ model LiteLLM_AuditLog {
|
||||||
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
|
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
|
||||||
before_value Json? // value of the row
|
before_value Json? // value of the row
|
||||||
updated_values Json? // value of the row after change
|
updated_values Json? // value of the row after change
|
||||||
}
|
}
|
||||||
|
|
22
litellm/proxy/tests/test_anthropic_sdk.py
Normal file
22
litellm/proxy/tests/test_anthropic_sdk.py
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
from anthropic import Anthropic
|
||||||
|
|
||||||
|
client = Anthropic(
|
||||||
|
# This is the default and can be omitted
|
||||||
|
base_url="http://localhost:4000",
|
||||||
|
# this is a litellm proxy key :) - not a real anthropic key
|
||||||
|
api_key="sk-s4xN1IiLTCytwtZFJaYQrA",
|
||||||
|
)
|
||||||
|
|
||||||
|
message = client.messages.create(
|
||||||
|
max_tokens=1024,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, Claude",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model="claude-3-opus-20240229",
|
||||||
|
)
|
||||||
|
print(message.content)
|
|
@ -25,7 +25,7 @@ from typing_extensions import overload
|
||||||
import litellm
|
import litellm
|
||||||
import litellm.litellm_core_utils
|
import litellm.litellm_core_utils
|
||||||
import litellm.litellm_core_utils.litellm_logging
|
import litellm.litellm_core_utils.litellm_logging
|
||||||
from litellm import EmbeddingResponse, ImageResponse, ModelResponse
|
from litellm import EmbeddingResponse, ImageResponse, ModelResponse, get_litellm_params
|
||||||
from litellm._logging import verbose_proxy_logger
|
from litellm._logging import verbose_proxy_logger
|
||||||
from litellm._service_logger import ServiceLogging, ServiceTypes
|
from litellm._service_logger import ServiceLogging, ServiceTypes
|
||||||
from litellm.caching import DualCache, RedisCache
|
from litellm.caching import DualCache, RedisCache
|
||||||
|
@ -50,7 +50,7 @@ from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
|
||||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||||
_PROXY_MaxParallelRequestsHandler,
|
_PROXY_MaxParallelRequestsHandler,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import CallTypes
|
from litellm.types.utils import CallTypes, LoggedLiteLLMParams
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from opentelemetry.trace import Span as _Span
|
from opentelemetry.trace import Span as _Span
|
||||||
|
@ -188,6 +188,7 @@ class ProxyLogging:
|
||||||
"new_model_added",
|
"new_model_added",
|
||||||
"outage_alerts",
|
"outage_alerts",
|
||||||
]
|
]
|
||||||
|
self.alert_to_webhook_url: Optional[dict] = None
|
||||||
self.slack_alerting_instance: SlackAlerting = SlackAlerting(
|
self.slack_alerting_instance: SlackAlerting = SlackAlerting(
|
||||||
alerting_threshold=self.alerting_threshold,
|
alerting_threshold=self.alerting_threshold,
|
||||||
alerting=self.alerting,
|
alerting=self.alerting,
|
||||||
|
@ -202,6 +203,7 @@ class ProxyLogging:
|
||||||
redis_cache: Optional[RedisCache] = None,
|
redis_cache: Optional[RedisCache] = None,
|
||||||
alert_types: Optional[List[AlertType]] = None,
|
alert_types: Optional[List[AlertType]] = None,
|
||||||
alerting_args: Optional[dict] = None,
|
alerting_args: Optional[dict] = None,
|
||||||
|
alert_to_webhook_url: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
updated_slack_alerting: bool = False
|
updated_slack_alerting: bool = False
|
||||||
if alerting is not None:
|
if alerting is not None:
|
||||||
|
@ -213,6 +215,9 @@ class ProxyLogging:
|
||||||
if alert_types is not None:
|
if alert_types is not None:
|
||||||
self.alert_types = alert_types
|
self.alert_types = alert_types
|
||||||
updated_slack_alerting = True
|
updated_slack_alerting = True
|
||||||
|
if alert_to_webhook_url is not None:
|
||||||
|
self.alert_to_webhook_url = alert_to_webhook_url
|
||||||
|
updated_slack_alerting = True
|
||||||
|
|
||||||
if updated_slack_alerting is True:
|
if updated_slack_alerting is True:
|
||||||
self.slack_alerting_instance.update_values(
|
self.slack_alerting_instance.update_values(
|
||||||
|
@ -220,6 +225,7 @@ class ProxyLogging:
|
||||||
alerting_threshold=self.alerting_threshold,
|
alerting_threshold=self.alerting_threshold,
|
||||||
alert_types=self.alert_types,
|
alert_types=self.alert_types,
|
||||||
alerting_args=alerting_args,
|
alerting_args=alerting_args,
|
||||||
|
alert_to_webhook_url=self.alert_to_webhook_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
@ -602,14 +608,20 @@ class ProxyLogging:
|
||||||
if litellm_logging_obj is not None:
|
if litellm_logging_obj is not None:
|
||||||
## UPDATE LOGGING INPUT
|
## UPDATE LOGGING INPUT
|
||||||
_optional_params = {}
|
_optional_params = {}
|
||||||
|
_litellm_params = {}
|
||||||
|
|
||||||
|
litellm_param_keys = LoggedLiteLLMParams.__annotations__.keys()
|
||||||
for k, v in request_data.items():
|
for k, v in request_data.items():
|
||||||
if k != "model" and k != "user" and k != "litellm_params":
|
if k in litellm_param_keys:
|
||||||
|
_litellm_params[k] = v
|
||||||
|
elif k != "model" and k != "user":
|
||||||
_optional_params[k] = v
|
_optional_params[k] = v
|
||||||
|
|
||||||
litellm_logging_obj.update_environment_variables(
|
litellm_logging_obj.update_environment_variables(
|
||||||
model=request_data.get("model", ""),
|
model=request_data.get("model", ""),
|
||||||
user=request_data.get("user", ""),
|
user=request_data.get("user", ""),
|
||||||
optional_params=_optional_params,
|
optional_params=_optional_params,
|
||||||
litellm_params=request_data.get("litellm_params", {}),
|
litellm_params=_litellm_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
input: Union[list, str, dict] = ""
|
input: Union[list, str, dict] = ""
|
||||||
|
@ -832,6 +844,30 @@ class PrismaClient:
|
||||||
|
|
||||||
If the view doesn't exist, one will be created.
|
If the view doesn't exist, one will be created.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# Check to see if all of the necessary views exist and if they do, simply return
|
||||||
|
# This is more efficient because it lets us check for all views in one
|
||||||
|
# query instead of multiple queries.
|
||||||
|
try:
|
||||||
|
ret = await self.db.query_raw(
|
||||||
|
"""
|
||||||
|
SELECT SUM(1) FROM pg_views
|
||||||
|
WHERE schemaname = 'public' AND viewname IN (
|
||||||
|
'LiteLLM_VerificationTokenView',
|
||||||
|
'MonthlyGlobalSpend',
|
||||||
|
'Last30dKeysBySpend',
|
||||||
|
'Last30dModelsBySpend',
|
||||||
|
'MonthlyGlobalSpendPerKey',
|
||||||
|
'Last30dTopEndUsersSpend'
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
if ret[0]['sum'] == 6:
|
||||||
|
print("All necessary views exist!") # noqa
|
||||||
|
return
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Try to select one row from the view
|
# Try to select one row from the view
|
||||||
await self.db.query_raw(
|
await self.db.query_raw(
|
||||||
|
@ -1313,8 +1349,10 @@ class PrismaClient:
|
||||||
t.tpm_limit AS team_tpm_limit,
|
t.tpm_limit AS team_tpm_limit,
|
||||||
t.rpm_limit AS team_rpm_limit,
|
t.rpm_limit AS team_rpm_limit,
|
||||||
t.models AS team_models,
|
t.models AS team_models,
|
||||||
|
t.metadata AS team_metadata,
|
||||||
t.blocked AS team_blocked,
|
t.blocked AS team_blocked,
|
||||||
t.team_alias AS team_alias,
|
t.team_alias AS team_alias,
|
||||||
|
t.metadata AS team_metadata,
|
||||||
tm.spend AS team_member_spend,
|
tm.spend AS team_member_spend,
|
||||||
m.aliases as team_model_aliases
|
m.aliases as team_model_aliases
|
||||||
FROM "LiteLLM_VerificationToken" AS v
|
FROM "LiteLLM_VerificationToken" AS v
|
||||||
|
|
|
@ -895,6 +895,52 @@ async def test_gemini_pro_function_calling_httpx(model, sync_mode):
|
||||||
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
||||||
|
|
||||||
|
|
||||||
|
from litellm.tests.test_completion import response_format_tests
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model", ["vertex_ai/meta/llama3-405b-instruct-maas"]
|
||||||
|
) # "vertex_ai",
|
||||||
|
@pytest.mark.parametrize("sync_mode", [True, False]) # "vertex_ai",
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_llama_3_httpx(model, sync_mode):
|
||||||
|
try:
|
||||||
|
load_vertex_ai_credentials()
|
||||||
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "Your name is Litellm Bot, you are a helpful assistant",
|
||||||
|
},
|
||||||
|
# User asks for their name and weather in San Francisco
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, what is your name and can you tell me the weather?",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"model": model,
|
||||||
|
"messages": messages,
|
||||||
|
}
|
||||||
|
if sync_mode:
|
||||||
|
response = litellm.completion(**data)
|
||||||
|
else:
|
||||||
|
response = await litellm.acompletion(**data)
|
||||||
|
|
||||||
|
response_format_tests(response=response)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
except litellm.RateLimitError as e:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
if "429 Quota exceeded" in str(e):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
pytest.fail("An unexpected exception occurred - {}".format(str(e)))
|
||||||
|
|
||||||
|
|
||||||
def vertex_httpx_mock_reject_prompt_post(*args, **kwargs):
|
def vertex_httpx_mock_reject_prompt_post(*args, **kwargs):
|
||||||
mock_response = MagicMock()
|
mock_response = MagicMock()
|
||||||
mock_response.status_code = 200
|
mock_response.status_code = 200
|
||||||
|
|
|
@ -48,6 +48,42 @@ def test_anthropic_completion_input_translation():
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_anthropic_completion_input_translation_with_metadata():
|
||||||
|
"""
|
||||||
|
Tests that cost tracking works as expected with LiteLLM Proxy
|
||||||
|
|
||||||
|
LiteLLM Proxy will insert litellm_metadata for anthropic endpoints to track user_api_key and user_api_key_team_id
|
||||||
|
|
||||||
|
This test ensures that the `litellm_metadata` is not present in the translated input
|
||||||
|
It ensures that `litellm.acompletion()` will receieve metadata which is a litellm specific param
|
||||||
|
"""
|
||||||
|
data = {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"messages": [{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
"litellm_metadata": {
|
||||||
|
"user_api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||||
|
"user_api_key_alias": None,
|
||||||
|
"user_api_end_user_max_budget": None,
|
||||||
|
"litellm_api_version": "1.40.19",
|
||||||
|
"global_max_parallel_requests": None,
|
||||||
|
"user_api_key_user_id": "default_user_id",
|
||||||
|
"user_api_key_org_id": None,
|
||||||
|
"user_api_key_team_id": None,
|
||||||
|
"user_api_key_team_alias": None,
|
||||||
|
"user_api_key_team_max_budget": None,
|
||||||
|
"user_api_key_team_spend": None,
|
||||||
|
"user_api_key_spend": 0.0,
|
||||||
|
"user_api_key_max_budget": None,
|
||||||
|
"user_api_key_metadata": {},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
translated_input = anthropic_adapter.translate_completion_input_params(kwargs=data)
|
||||||
|
|
||||||
|
assert "litellm_metadata" not in translated_input
|
||||||
|
assert "metadata" in translated_input
|
||||||
|
assert translated_input["metadata"] == data["litellm_metadata"]
|
||||||
|
|
||||||
|
|
||||||
def test_anthropic_completion_e2e():
|
def test_anthropic_completion_e2e():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
|
|
||||||
|
|
29
litellm/tests/test_arize_ai.py
Normal file
29
litellm/tests/test_arize_ai.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.integrations.opentelemetry import OpenTelemetry, OpenTelemetryConfig
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import logging
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_async_otel_callback():
|
||||||
|
litellm.set_verbose = True
|
||||||
|
litellm.success_callback = ["arize"]
|
||||||
|
|
||||||
|
await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "hi test from local arize"}],
|
||||||
|
mock_response="hello",
|
||||||
|
temperature=0.1,
|
||||||
|
user="OTEL_USER",
|
||||||
|
)
|
|
@ -2,18 +2,19 @@
|
||||||
# This tests chaos monkeys - if random parts of the system are broken / things aren't sent correctly - what happens.
|
# This tests chaos monkeys - if random parts of the system are broken / things aren't sent correctly - what happens.
|
||||||
# Expect to add more edge cases to this over time.
|
# Expect to add more edge cases to this over time.
|
||||||
|
|
||||||
import sys, os
|
import os
|
||||||
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
sys.path.insert(
|
sys.path.insert(
|
||||||
0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import embedding, completion
|
from litellm import completion, embedding
|
||||||
from litellm.utils import Message
|
from litellm.utils import Message
|
||||||
|
|
||||||
|
|
||||||
# litellm.set_verbose = True
|
# litellm.set_verbose = True
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
@ -74,6 +75,8 @@ def test_completion_invalid_param_cohere():
|
||||||
response = completion(model="command-nightly", messages=messages, seed=12)
|
response = completion(model="command-nightly", messages=messages, seed=12)
|
||||||
pytest.fail(f"This should have failed cohere does not support `seed` parameter")
|
pytest.fail(f"This should have failed cohere does not support `seed` parameter")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
assert isinstance(e, litellm.UnsupportedParamsError)
|
||||||
|
print("got an exception=", str(e))
|
||||||
if " cohere does not support parameters: {'seed': 12}" in str(e):
|
if " cohere does not support parameters: {'seed': 12}" in str(e):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|
53
litellm/tests/test_braintrust.py
Normal file
53
litellm/tests/test_braintrust.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# What is this?
|
||||||
|
## This tests the braintrust integration
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import traceback
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from fastapi import Request
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||||
|
|
||||||
|
|
||||||
|
def test_braintrust_logging():
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
http_client = HTTPHandler()
|
||||||
|
|
||||||
|
setattr(
|
||||||
|
litellm.integrations.braintrust_logging,
|
||||||
|
"global_braintrust_sync_http_handler",
|
||||||
|
http_client,
|
||||||
|
)
|
||||||
|
|
||||||
|
with patch.object(http_client, "post", new=MagicMock()) as mock_client:
|
||||||
|
|
||||||
|
# set braintrust as a callback, litellm will send the data to braintrust
|
||||||
|
litellm.callbacks = ["braintrust"]
|
||||||
|
|
||||||
|
# openai call
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
mock_client.assert_called()
|
|
@ -346,7 +346,7 @@ def test_completion_claude_3_empty_response():
|
||||||
messages = [
|
messages = [
|
||||||
{
|
{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": "You are 2twNLGfqk4GMOn3ffp4p.",
|
"content": [{"type": "text", "text": "You are 2twNLGfqk4GMOn3ffp4p."}],
|
||||||
},
|
},
|
||||||
{"role": "user", "content": "Hi gm!", "name": "ishaan"},
|
{"role": "user", "content": "Hi gm!", "name": "ishaan"},
|
||||||
{"role": "assistant", "content": "Good morning! How are you doing today?"},
|
{"role": "assistant", "content": "Good morning! How are you doing today?"},
|
||||||
|
@ -1364,6 +1364,12 @@ def test_completion_openai_response_headers():
|
||||||
print("response_headers=", response._response_headers)
|
print("response_headers=", response._response_headers)
|
||||||
assert response._response_headers is not None
|
assert response._response_headers is not None
|
||||||
assert "x-ratelimit-remaining-tokens" in response._response_headers
|
assert "x-ratelimit-remaining-tokens" in response._response_headers
|
||||||
|
assert isinstance(
|
||||||
|
response._hidden_params["additional_headers"][
|
||||||
|
"llm_provider-x-ratelimit-remaining-requests"
|
||||||
|
],
|
||||||
|
str,
|
||||||
|
)
|
||||||
|
|
||||||
# /chat/completion - with streaming
|
# /chat/completion - with streaming
|
||||||
|
|
||||||
|
@ -1376,6 +1382,12 @@ def test_completion_openai_response_headers():
|
||||||
print("streaming response_headers=", response_headers)
|
print("streaming response_headers=", response_headers)
|
||||||
assert response_headers is not None
|
assert response_headers is not None
|
||||||
assert "x-ratelimit-remaining-tokens" in response_headers
|
assert "x-ratelimit-remaining-tokens" in response_headers
|
||||||
|
assert isinstance(
|
||||||
|
response._hidden_params["additional_headers"][
|
||||||
|
"llm_provider-x-ratelimit-remaining-requests"
|
||||||
|
],
|
||||||
|
str,
|
||||||
|
)
|
||||||
|
|
||||||
for chunk in streaming_response:
|
for chunk in streaming_response:
|
||||||
print("chunk=", chunk)
|
print("chunk=", chunk)
|
||||||
|
@ -1390,6 +1402,12 @@ def test_completion_openai_response_headers():
|
||||||
print("embedding_response_headers=", embedding_response_headers)
|
print("embedding_response_headers=", embedding_response_headers)
|
||||||
assert embedding_response_headers is not None
|
assert embedding_response_headers is not None
|
||||||
assert "x-ratelimit-remaining-tokens" in embedding_response_headers
|
assert "x-ratelimit-remaining-tokens" in embedding_response_headers
|
||||||
|
assert isinstance(
|
||||||
|
response._hidden_params["additional_headers"][
|
||||||
|
"llm_provider-x-ratelimit-remaining-requests"
|
||||||
|
],
|
||||||
|
str,
|
||||||
|
)
|
||||||
|
|
||||||
litellm.return_response_headers = False
|
litellm.return_response_headers = False
|
||||||
|
|
||||||
|
@ -2542,6 +2560,71 @@ def test_completion_anyscale_with_functions():
|
||||||
# test_completion_anyscale_with_functions()
|
# test_completion_anyscale_with_functions()
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_azure_extra_headers():
|
||||||
|
# this tests if we can pass api_key to completion, when it's not in the env.
|
||||||
|
# DO NOT REMOVE THIS TEST. No MATTER WHAT Happens!
|
||||||
|
# If you want to remove it, speak to Ishaan!
|
||||||
|
# Ishaan will be very disappointed if this test is removed -> this is a standard way to pass api_key + the router + proxy use this
|
||||||
|
from httpx import Client
|
||||||
|
from openai import AzureOpenAI
|
||||||
|
|
||||||
|
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||||
|
|
||||||
|
http_client = Client()
|
||||||
|
|
||||||
|
with patch.object(http_client, "send", new=MagicMock()) as mock_client:
|
||||||
|
litellm.client_session = http_client
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="azure/chatgpt-v-2",
|
||||||
|
messages=messages,
|
||||||
|
api_base=os.getenv("AZURE_API_BASE"),
|
||||||
|
api_version="2023-07-01-preview",
|
||||||
|
api_key=os.getenv("AZURE_API_KEY"),
|
||||||
|
extra_headers={
|
||||||
|
"Authorization": "my-bad-key",
|
||||||
|
"Ocp-Apim-Subscription-Key": "hello-world-testing",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
pytest.fail("Expected this to fail")
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
mock_client.assert_called()
|
||||||
|
|
||||||
|
print(f"mock_client.call_args: {mock_client.call_args}")
|
||||||
|
request = mock_client.call_args[0][0]
|
||||||
|
print(request.method) # This will print 'POST'
|
||||||
|
print(request.url) # This will print the full URL
|
||||||
|
print(request.headers) # This will print the full URL
|
||||||
|
auth_header = request.headers.get("Authorization")
|
||||||
|
apim_key = request.headers.get("Ocp-Apim-Subscription-Key")
|
||||||
|
print(auth_header)
|
||||||
|
assert auth_header == "my-bad-key"
|
||||||
|
assert apim_key == "hello-world-testing"
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_azure_ad_token():
|
||||||
|
# this tests if we can pass api_key to completion, when it's not in the env.
|
||||||
|
# DO NOT REMOVE THIS TEST. No MATTER WHAT Happens!
|
||||||
|
# If you want to remove it, speak to Ishaan!
|
||||||
|
# Ishaan will be very disappointed if this test is removed -> this is a standard way to pass api_key + the router + proxy use this
|
||||||
|
from httpx import Client
|
||||||
|
from openai import AzureOpenAI
|
||||||
|
|
||||||
|
from litellm import completion
|
||||||
|
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="azure/chatgpt-v-2",
|
||||||
|
messages=messages,
|
||||||
|
# api_key="my-fake-ad-token",
|
||||||
|
azure_ad_token=os.getenv("AZURE_API_KEY"),
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
|
||||||
def test_completion_azure_key_completion_arg():
|
def test_completion_azure_key_completion_arg():
|
||||||
# this tests if we can pass api_key to completion, when it's not in the env.
|
# this tests if we can pass api_key to completion, when it's not in the env.
|
||||||
# DO NOT REMOVE THIS TEST. No MATTER WHAT Happens!
|
# DO NOT REMOVE THIS TEST. No MATTER WHAT Happens!
|
||||||
|
|
|
@ -881,6 +881,7 @@ def test_completion_azure_ai():
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_completion_cost_hidden_params(sync_mode):
|
async def test_completion_cost_hidden_params(sync_mode):
|
||||||
|
litellm.return_response_headers = True
|
||||||
if sync_mode:
|
if sync_mode:
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
|
|
|
@ -235,6 +235,7 @@ class CompletionCustomHandler(
|
||||||
|
|
||||||
assert isinstance(kwargs["optional_params"], dict)
|
assert isinstance(kwargs["optional_params"], dict)
|
||||||
assert isinstance(kwargs["litellm_params"], dict)
|
assert isinstance(kwargs["litellm_params"], dict)
|
||||||
|
assert isinstance(kwargs["litellm_params"]["metadata"], Optional[dict])
|
||||||
assert isinstance(kwargs["start_time"], (datetime, type(None)))
|
assert isinstance(kwargs["start_time"], (datetime, type(None)))
|
||||||
assert isinstance(kwargs["stream"], bool)
|
assert isinstance(kwargs["stream"], bool)
|
||||||
assert isinstance(kwargs["user"], (str, type(None)))
|
assert isinstance(kwargs["user"], (str, type(None)))
|
||||||
|
|
|
@ -197,6 +197,29 @@ def test_openai_azure_embedding():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("CIRCLE_OIDC_TOKEN") is None,
|
||||||
|
reason="Cannot run without being in CircleCI Runner",
|
||||||
|
)
|
||||||
|
def test_openai_azure_embedding_with_oidc_and_cf():
|
||||||
|
# TODO: Switch to our own Azure account, currently using ai.moda's account
|
||||||
|
os.environ["AZURE_TENANT_ID"] = "17c0a27a-1246-4aa1-a3b6-d294e80e783c"
|
||||||
|
os.environ["AZURE_CLIENT_ID"] = "4faf5422-b2bd-45e8-a6d7-46543a38acd0"
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = embedding(
|
||||||
|
model="azure/text-embedding-ada-002",
|
||||||
|
input=["Hello"],
|
||||||
|
azure_ad_token="oidc/circleci/",
|
||||||
|
api_base="https://eastus2-litellm.openai.azure.com/",
|
||||||
|
api_version="2024-06-01",
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
def test_openai_azure_embedding_optional_arg(mocker):
|
def test_openai_azure_embedding_optional_arg(mocker):
|
||||||
mocked_create_embeddings = mocker.patch.object(
|
mocked_create_embeddings = mocker.patch.object(
|
||||||
openai.resources.embeddings.Embeddings,
|
openai.resources.embeddings.Embeddings,
|
||||||
|
@ -650,3 +673,17 @@ async def test_databricks_embeddings(sync_mode):
|
||||||
# print(response)
|
# print(response)
|
||||||
|
|
||||||
# local_proxy_embeddings()
|
# local_proxy_embeddings()
|
||||||
|
|
||||||
|
|
||||||
|
def test_embedding_azure_ad_token():
|
||||||
|
# this tests if we can pass api_key to completion, when it's not in the env.
|
||||||
|
# DO NOT REMOVE THIS TEST. No MATTER WHAT Happens!
|
||||||
|
# If you want to remove it, speak to Ishaan!
|
||||||
|
# Ishaan will be very disappointed if this test is removed -> this is a standard way to pass api_key + the router + proxy use this
|
||||||
|
|
||||||
|
response = embedding(
|
||||||
|
model="azure/azure-embedding-model",
|
||||||
|
input=["good morning from litellm"],
|
||||||
|
azure_ad_token=os.getenv("AZURE_API_KEY"),
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
|
|
@ -64,6 +64,30 @@ async def test_content_policy_exception_azure():
|
||||||
pytest.fail(f"An exception occurred - {str(e)}")
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_content_policy_exception_openai():
|
||||||
|
try:
|
||||||
|
# this is ony a test - we needed some way to invoke the exception :(
|
||||||
|
litellm.set_verbose = True
|
||||||
|
response = await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo-0613",
|
||||||
|
stream=True,
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Gimme the lyrics to Don't Stop Me Now"}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
async for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
except litellm.ContentPolicyViolationError as e:
|
||||||
|
print("caught a content policy violation error! Passed")
|
||||||
|
print("exception", e)
|
||||||
|
assert e.llm_provider == "openai"
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
print()
|
||||||
|
pytest.fail(f"An exception occurred - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
# Test 1: Context Window Errors
|
# Test 1: Context Window Errors
|
||||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
@pytest.mark.parametrize("model", exception_models)
|
@pytest.mark.parametrize("model", exception_models)
|
||||||
|
|
|
@ -36,6 +36,7 @@ async def test_async_langsmith_logging():
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
metadata={
|
metadata={
|
||||||
"id": run_id,
|
"id": run_id,
|
||||||
|
"tags": ["tag1", "tag2"],
|
||||||
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
"user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
|
||||||
"user_api_key_alias": "ishaans-langmsith-key",
|
"user_api_key_alias": "ishaans-langmsith-key",
|
||||||
"user_api_end_user_max_budget": None,
|
"user_api_end_user_max_budget": None,
|
||||||
|
|
|
@ -128,6 +128,19 @@ def test_azure_ai_mistral_optional_params():
|
||||||
assert "user" not in optional_params
|
assert "user" not in optional_params
|
||||||
|
|
||||||
|
|
||||||
|
def test_vertex_ai_llama_3_optional_params():
|
||||||
|
litellm.vertex_llama3_models = ["meta/llama3-405b-instruct-maas"]
|
||||||
|
litellm.drop_params = True
|
||||||
|
optional_params = get_optional_params(
|
||||||
|
model="meta/llama3-405b-instruct-maas",
|
||||||
|
user="John",
|
||||||
|
custom_llm_provider="vertex_ai",
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.2,
|
||||||
|
)
|
||||||
|
assert "user" not in optional_params
|
||||||
|
|
||||||
|
|
||||||
def test_azure_gpt_optional_params_gpt_vision():
|
def test_azure_gpt_optional_params_gpt_vision():
|
||||||
# for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
|
# for OpenAI, Azure all extra params need to get passed as extra_body to OpenAI python. We assert we actually set extra_body here
|
||||||
optional_params = litellm.utils.get_optional_params(
|
optional_params = litellm.utils.get_optional_params(
|
||||||
|
|
|
@ -212,7 +212,7 @@ def test_convert_url_to_img():
|
||||||
[
|
[
|
||||||
("", "image/jpeg"),
|
("", "image/jpeg"),
|
||||||
("data:application/pdf;base64,1234", "application/pdf"),
|
("data:application/pdf;base64,1234", "application/pdf"),
|
||||||
("data:image\/jpeg;base64,1234", "image/jpeg"),
|
(r"data:image\/jpeg;base64,1234", "image/jpeg"),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_base64_image_input(url, expected_media_type):
|
def test_base64_image_input(url, expected_media_type):
|
||||||
|
|
|
@ -19,7 +19,7 @@ import pytest
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.proxy._types import LiteLLMRoutes
|
from litellm.proxy._types import LiteLLMRoutes
|
||||||
from litellm.proxy.auth.auth_utils import is_openai_route
|
from litellm.proxy.auth.auth_utils import is_llm_api_route
|
||||||
from litellm.proxy.proxy_server import app
|
from litellm.proxy.proxy_server import app
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
|
@ -77,8 +77,8 @@ def test_routes_on_litellm_proxy():
|
||||||
("/v1/non_existent_endpoint", False),
|
("/v1/non_existent_endpoint", False),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_is_openai_route(route: str, expected: bool):
|
def test_is_llm_api_route(route: str, expected: bool):
|
||||||
assert is_openai_route(route) == expected
|
assert is_llm_api_route(route) == expected
|
||||||
|
|
||||||
|
|
||||||
# Test case for routes that are similar but should return False
|
# Test case for routes that are similar but should return False
|
||||||
|
@ -91,5 +91,10 @@ def test_is_openai_route(route: str, expected: bool):
|
||||||
"/engines/model/invalid/completions",
|
"/engines/model/invalid/completions",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_is_openai_route_similar_but_false(route: str):
|
def test_is_llm_api_route_similar_but_false(route: str):
|
||||||
assert is_openai_route(route) == False
|
assert is_llm_api_route(route) == False
|
||||||
|
|
||||||
|
|
||||||
|
def test_anthropic_api_routes():
|
||||||
|
# allow non proxy admins to call anthropic api routes
|
||||||
|
assert is_llm_api_route(route="/v1/messages") is True
|
||||||
|
|
|
@ -173,6 +173,63 @@ def test_chat_completion(mock_acompletion, client_no_auth):
|
||||||
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
@mock_patch_acompletion()
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_team_disable_guardrails(mock_acompletion, client_no_auth):
|
||||||
|
"""
|
||||||
|
If team not allowed to turn on/off guardrails
|
||||||
|
|
||||||
|
Raise 403 forbidden error, if request is made by team on `/key/generate` or `/chat/completions`.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
|
||||||
|
from fastapi import HTTPException, Request
|
||||||
|
from starlette.datastructures import URL
|
||||||
|
|
||||||
|
from litellm.proxy._types import LiteLLM_TeamTable, ProxyException, UserAPIKeyAuth
|
||||||
|
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||||
|
from litellm.proxy.proxy_server import hash_token, user_api_key_cache
|
||||||
|
|
||||||
|
_team_id = "1234"
|
||||||
|
user_key = "sk-12345678"
|
||||||
|
|
||||||
|
valid_token = UserAPIKeyAuth(
|
||||||
|
team_id=_team_id,
|
||||||
|
team_blocked=True,
|
||||||
|
token=hash_token(user_key),
|
||||||
|
last_refreshed_at=time.time(),
|
||||||
|
)
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
team_obj = LiteLLM_TeamTable(
|
||||||
|
team_id=_team_id,
|
||||||
|
blocked=False,
|
||||||
|
last_refreshed_at=time.time(),
|
||||||
|
metadata={"guardrails": {"modify_guardrails": False}},
|
||||||
|
)
|
||||||
|
user_api_key_cache.set_cache(key=hash_token(user_key), value=valid_token)
|
||||||
|
user_api_key_cache.set_cache(key="team_id:{}".format(_team_id), value=team_obj)
|
||||||
|
|
||||||
|
setattr(litellm.proxy.proxy_server, "user_api_key_cache", user_api_key_cache)
|
||||||
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
|
setattr(litellm.proxy.proxy_server, "prisma_client", "hello-world")
|
||||||
|
|
||||||
|
request = Request(scope={"type": "http"})
|
||||||
|
request._url = URL(url="/chat/completions")
|
||||||
|
|
||||||
|
body = {"metadata": {"guardrails": {"hide_secrets": False}}}
|
||||||
|
json_bytes = json.dumps(body).encode("utf-8")
|
||||||
|
|
||||||
|
request._body = json_bytes
|
||||||
|
|
||||||
|
try:
|
||||||
|
await user_api_key_auth(request=request, api_key="Bearer " + user_key)
|
||||||
|
pytest.fail("Expected to raise 403 forbidden error.")
|
||||||
|
except ProxyException as e:
|
||||||
|
assert e.code == 403
|
||||||
|
|
||||||
|
|
||||||
from litellm.tests.test_custom_callback_input import CompletionCustomHandler
|
from litellm.tests.test_custom_callback_input import CompletionCustomHandler
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,8 @@ sys.path.insert(
|
||||||
import pytest
|
import pytest
|
||||||
from litellm import get_secret
|
from litellm import get_secret
|
||||||
from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager
|
from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager
|
||||||
|
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
||||||
|
from litellm.llms.bedrock_httpx import BedrockLLM
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="AWS Suspended Account")
|
@pytest.mark.skip(reason="AWS Suspended Account")
|
||||||
|
@ -60,7 +62,7 @@ def test_oidc_github():
|
||||||
)
|
)
|
||||||
def test_oidc_circleci():
|
def test_oidc_circleci():
|
||||||
secret_val = get_secret(
|
secret_val = get_secret(
|
||||||
"oidc/circleci/https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/invoke"
|
"oidc/circleci/"
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"secret_val: {redact_oidc_signature(secret_val)}")
|
print(f"secret_val: {redact_oidc_signature(secret_val)}")
|
||||||
|
@ -76,3 +78,38 @@ def test_oidc_circleci_v2():
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"secret_val: {redact_oidc_signature(secret_val)}")
|
print(f"secret_val: {redact_oidc_signature(secret_val)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("CIRCLE_OIDC_TOKEN") is None,
|
||||||
|
reason="Cannot run without being in CircleCI Runner",
|
||||||
|
)
|
||||||
|
def test_oidc_circleci_with_azure():
|
||||||
|
# TODO: Switch to our own Azure account, currently using ai.moda's account
|
||||||
|
os.environ["AZURE_TENANT_ID"] = "17c0a27a-1246-4aa1-a3b6-d294e80e783c"
|
||||||
|
os.environ["AZURE_CLIENT_ID"] = "4faf5422-b2bd-45e8-a6d7-46543a38acd0"
|
||||||
|
azure_ad_token = get_azure_ad_token_from_oidc("oidc/circleci/")
|
||||||
|
|
||||||
|
print(f"secret_val: {redact_oidc_signature(azure_ad_token)}")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
os.environ.get("CIRCLE_OIDC_TOKEN") is None,
|
||||||
|
reason="Cannot run without being in CircleCI Runner",
|
||||||
|
)
|
||||||
|
def test_oidc_circle_v1_with_amazon():
|
||||||
|
# The purpose of this test is to get logs using the older v1 of the CircleCI OIDC token
|
||||||
|
|
||||||
|
# TODO: This is using ai.moda's IAM role, we should use LiteLLM's IAM role eventually
|
||||||
|
aws_role_name = (
|
||||||
|
"arn:aws:iam::335785316107:role/litellm-github-unit-tests-circleci-v1-assume-only"
|
||||||
|
)
|
||||||
|
aws_web_identity_token = "oidc/circleci/"
|
||||||
|
|
||||||
|
bllm = BedrockLLM()
|
||||||
|
creds = bllm.get_credentials(
|
||||||
|
aws_region_name="ca-west-1",
|
||||||
|
aws_web_identity_token=aws_web_identity_token,
|
||||||
|
aws_role_name=aws_role_name,
|
||||||
|
aws_session_name="assume-v1-session",
|
||||||
|
)
|
||||||
|
|
|
@ -1988,25 +1988,30 @@ async def test_hf_completion_tgi_stream():
|
||||||
|
|
||||||
# test on openai completion call
|
# test on openai completion call
|
||||||
def test_openai_chat_completion_call():
|
def test_openai_chat_completion_call():
|
||||||
try:
|
litellm.set_verbose = False
|
||||||
litellm.set_verbose = False
|
litellm.return_response_headers = True
|
||||||
print(f"making openai chat completion call")
|
print(f"making openai chat completion call")
|
||||||
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
|
||||||
complete_response = ""
|
assert isinstance(
|
||||||
start_time = time.time()
|
response._hidden_params["additional_headers"][
|
||||||
for idx, chunk in enumerate(response):
|
"llm_provider-x-ratelimit-remaining-requests"
|
||||||
chunk, finished = streaming_format_tests(idx, chunk)
|
],
|
||||||
print(f"outside chunk: {chunk}")
|
str,
|
||||||
if finished:
|
)
|
||||||
break
|
|
||||||
complete_response += chunk
|
print(f"response._hidden_params: {response._hidden_params}")
|
||||||
# print(f'complete_chunk: {complete_response}')
|
complete_response = ""
|
||||||
if complete_response.strip() == "":
|
start_time = time.time()
|
||||||
raise Exception("Empty response received")
|
for idx, chunk in enumerate(response):
|
||||||
print(f"complete response: {complete_response}")
|
chunk, finished = streaming_format_tests(idx, chunk)
|
||||||
except:
|
print(f"outside chunk: {chunk}")
|
||||||
print(f"error occurred: {traceback.format_exc()}")
|
if finished:
|
||||||
pass
|
break
|
||||||
|
complete_response += chunk
|
||||||
|
# print(f'complete_chunk: {complete_response}')
|
||||||
|
if complete_response.strip() == "":
|
||||||
|
raise Exception("Empty response received")
|
||||||
|
print(f"complete response: {complete_response}")
|
||||||
|
|
||||||
|
|
||||||
# test_openai_chat_completion_call()
|
# test_openai_chat_completion_call()
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Iterable, List, Optional, Union
|
from typing import Any, Dict, Iterable, List, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, validator
|
from pydantic import BaseModel, validator
|
||||||
from typing_extensions import Literal, Required, TypedDict
|
from typing_extensions import Literal, Required, TypedDict
|
||||||
|
@ -113,6 +113,9 @@ class AnthropicMessagesRequest(TypedDict, total=False):
|
||||||
top_k: int
|
top_k: int
|
||||||
top_p: float
|
top_p: float
|
||||||
|
|
||||||
|
# litellm param - used for tracking litellm proxy metadata in the request
|
||||||
|
litellm_metadata: dict
|
||||||
|
|
||||||
|
|
||||||
class ContentTextBlockDelta(TypedDict):
|
class ContentTextBlockDelta(TypedDict):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -436,6 +436,7 @@ class ChatCompletionRequest(TypedDict, total=False):
|
||||||
function_call: Union[str, dict]
|
function_call: Union[str, dict]
|
||||||
functions: List
|
functions: List
|
||||||
user: str
|
user: str
|
||||||
|
metadata: dict # litellm specific param
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionDeltaChunk(TypedDict, total=False):
|
class ChatCompletionDeltaChunk(TypedDict, total=False):
|
||||||
|
|
|
@ -1029,3 +1029,22 @@ class GenericImageParsingChunk(TypedDict):
|
||||||
class ResponseFormatChunk(TypedDict, total=False):
|
class ResponseFormatChunk(TypedDict, total=False):
|
||||||
type: Required[Literal["json_object", "text"]]
|
type: Required[Literal["json_object", "text"]]
|
||||||
response_schema: dict
|
response_schema: dict
|
||||||
|
|
||||||
|
|
||||||
|
class LoggedLiteLLMParams(TypedDict, total=False):
|
||||||
|
force_timeout: Optional[float]
|
||||||
|
custom_llm_provider: Optional[str]
|
||||||
|
api_base: Optional[str]
|
||||||
|
litellm_call_id: Optional[str]
|
||||||
|
model_alias_map: Optional[dict]
|
||||||
|
metadata: Optional[dict]
|
||||||
|
model_info: Optional[dict]
|
||||||
|
proxy_server_request: Optional[dict]
|
||||||
|
acompletion: Optional[bool]
|
||||||
|
preset_cache_key: Optional[str]
|
||||||
|
no_log: Optional[bool]
|
||||||
|
input_cost_per_second: Optional[float]
|
||||||
|
input_cost_per_token: Optional[float]
|
||||||
|
output_cost_per_token: Optional[float]
|
||||||
|
output_cost_per_second: Optional[float]
|
||||||
|
cooldown_time: Optional[float]
|
||||||
|
|
111
litellm/utils.py
111
litellm/utils.py
|
@ -129,6 +129,7 @@ from .exceptions import (
|
||||||
ServiceUnavailableError,
|
ServiceUnavailableError,
|
||||||
Timeout,
|
Timeout,
|
||||||
UnprocessableEntityError,
|
UnprocessableEntityError,
|
||||||
|
UnsupportedParamsError,
|
||||||
)
|
)
|
||||||
from .proxy._types import KeyManagementSystem
|
from .proxy._types import KeyManagementSystem
|
||||||
from .types.llms.openai import (
|
from .types.llms.openai import (
|
||||||
|
@ -158,6 +159,7 @@ from typing import (
|
||||||
Tuple,
|
Tuple,
|
||||||
Union,
|
Union,
|
||||||
cast,
|
cast,
|
||||||
|
get_args,
|
||||||
)
|
)
|
||||||
|
|
||||||
from .caching import Cache
|
from .caching import Cache
|
||||||
|
@ -224,17 +226,6 @@ last_fetched_at_keys = None
|
||||||
# }
|
# }
|
||||||
|
|
||||||
|
|
||||||
class UnsupportedParamsError(Exception):
|
|
||||||
def __init__(self, status_code, message):
|
|
||||||
self.status_code = status_code
|
|
||||||
self.message = message
|
|
||||||
self.request = httpx.Request(method="POST", url=" https://openai.api.com/v1/")
|
|
||||||
self.response = httpx.Response(status_code=status_code, request=self.request)
|
|
||||||
super().__init__(
|
|
||||||
self.message
|
|
||||||
) # Call the base class constructor with the parameters it needs
|
|
||||||
|
|
||||||
|
|
||||||
############################################################
|
############################################################
|
||||||
def print_verbose(
|
def print_verbose(
|
||||||
print_statement,
|
print_statement,
|
||||||
|
@ -405,7 +396,6 @@ def function_setup(
|
||||||
# Pop the async items from input_callback in reverse order to avoid index issues
|
# Pop the async items from input_callback in reverse order to avoid index issues
|
||||||
for index in reversed(removed_async_items):
|
for index in reversed(removed_async_items):
|
||||||
litellm.input_callback.pop(index)
|
litellm.input_callback.pop(index)
|
||||||
|
|
||||||
if len(litellm.success_callback) > 0:
|
if len(litellm.success_callback) > 0:
|
||||||
removed_async_items = []
|
removed_async_items = []
|
||||||
for index, callback in enumerate(litellm.success_callback): # type: ignore
|
for index, callback in enumerate(litellm.success_callback): # type: ignore
|
||||||
|
@ -417,9 +407,9 @@ def function_setup(
|
||||||
# we only support async dynamo db logging for acompletion/aembedding since that's used on proxy
|
# we only support async dynamo db logging for acompletion/aembedding since that's used on proxy
|
||||||
litellm._async_success_callback.append(callback)
|
litellm._async_success_callback.append(callback)
|
||||||
removed_async_items.append(index)
|
removed_async_items.append(index)
|
||||||
elif callback == "langsmith":
|
elif callback in litellm._known_custom_logger_compatible_callbacks:
|
||||||
callback_class = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
callback_class = litellm.litellm_core_utils.litellm_logging._init_custom_logger_compatible_class( # type: ignore
|
||||||
callback, internal_usage_cache=None, llm_router=None
|
callback, internal_usage_cache=None, llm_router=None # type: ignore
|
||||||
)
|
)
|
||||||
|
|
||||||
# don't double add a callback
|
# don't double add a callback
|
||||||
|
@ -3088,6 +3078,15 @@ def get_optional_params(
|
||||||
non_default_params=non_default_params,
|
non_default_params=non_default_params,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
)
|
)
|
||||||
|
elif custom_llm_provider == "vertex_ai" and model in litellm.vertex_llama3_models:
|
||||||
|
supported_params = get_supported_openai_params(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
_check_valid_arg(supported_params=supported_params)
|
||||||
|
optional_params = litellm.VertexAILlama3Config().map_openai_params(
|
||||||
|
non_default_params=non_default_params,
|
||||||
|
optional_params=optional_params,
|
||||||
|
)
|
||||||
elif custom_llm_provider == "sagemaker":
|
elif custom_llm_provider == "sagemaker":
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
supported_params = get_supported_openai_params(
|
supported_params = get_supported_openai_params(
|
||||||
|
@ -4189,6 +4188,9 @@ def get_supported_openai_params(
|
||||||
return litellm.GoogleAIStudioGeminiConfig().get_supported_openai_params()
|
return litellm.GoogleAIStudioGeminiConfig().get_supported_openai_params()
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif custom_llm_provider == "vertex_ai":
|
||||||
if request_type == "chat_completion":
|
if request_type == "chat_completion":
|
||||||
|
if model.startswith("meta/"):
|
||||||
|
return litellm.VertexAILlama3Config().get_supported_openai_params()
|
||||||
|
|
||||||
return litellm.VertexAIConfig().get_supported_openai_params()
|
return litellm.VertexAIConfig().get_supported_openai_params()
|
||||||
elif request_type == "embeddings":
|
elif request_type == "embeddings":
|
||||||
return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
|
return litellm.VertexAITextEmbeddingConfig().get_supported_openai_params()
|
||||||
|
@ -4484,7 +4486,11 @@ def get_llm_provider(
|
||||||
or get_secret("TOGETHER_AI_TOKEN")
|
or get_secret("TOGETHER_AI_TOKEN")
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "friendliai":
|
elif custom_llm_provider == "friendliai":
|
||||||
api_base = "https://inference.friendli.ai/v1"
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or get_secret("FRIENDLI_API_BASE")
|
||||||
|
or "https://inference.friendli.ai/v1"
|
||||||
|
)
|
||||||
dynamic_api_key = (
|
dynamic_api_key = (
|
||||||
api_key
|
api_key
|
||||||
or get_secret("FRIENDLIAI_API_KEY")
|
or get_secret("FRIENDLIAI_API_KEY")
|
||||||
|
@ -5678,6 +5684,14 @@ def convert_to_model_response_object(
|
||||||
_response_headers: Optional[dict] = None,
|
_response_headers: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
received_args = locals()
|
received_args = locals()
|
||||||
|
if _response_headers is not None:
|
||||||
|
llm_response_headers = {
|
||||||
|
"{}-{}".format("llm_provider", k): v for k, v in _response_headers.items()
|
||||||
|
}
|
||||||
|
if hidden_params is not None:
|
||||||
|
hidden_params["additional_headers"] = llm_response_headers
|
||||||
|
else:
|
||||||
|
hidden_params = {"additional_headers": llm_response_headers}
|
||||||
### CHECK IF ERROR IN RESPONSE ### - openrouter returns these in the dictionary
|
### CHECK IF ERROR IN RESPONSE ### - openrouter returns these in the dictionary
|
||||||
if (
|
if (
|
||||||
response_object is not None
|
response_object is not None
|
||||||
|
@ -5744,10 +5758,12 @@ def convert_to_model_response_object(
|
||||||
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
||||||
|
|
||||||
if "created" in response_object:
|
if "created" in response_object:
|
||||||
model_response_object.created = response_object["created"]
|
model_response_object.created = response_object["created"] or int(
|
||||||
|
time.time()
|
||||||
|
)
|
||||||
|
|
||||||
if "id" in response_object:
|
if "id" in response_object:
|
||||||
model_response_object.id = response_object["id"]
|
model_response_object.id = response_object["id"] or str(uuid.uuid4())
|
||||||
|
|
||||||
if "system_fingerprint" in response_object:
|
if "system_fingerprint" in response_object:
|
||||||
model_response_object.system_fingerprint = response_object[
|
model_response_object.system_fingerprint = response_object[
|
||||||
|
@ -8312,8 +8328,13 @@ class CustomStreamWrapper:
|
||||||
or {}
|
or {}
|
||||||
)
|
)
|
||||||
self._hidden_params = {
|
self._hidden_params = {
|
||||||
"model_id": (_model_info.get("id", None))
|
"model_id": (_model_info.get("id", None)),
|
||||||
} # returned as x-litellm-model-id response header in proxy
|
} # returned as x-litellm-model-id response header in proxy
|
||||||
|
if _response_headers is not None:
|
||||||
|
self._hidden_params["additional_headers"] = {
|
||||||
|
"{}-{}".format("llm_provider", k): v
|
||||||
|
for k, v in _response_headers.items()
|
||||||
|
}
|
||||||
self._response_headers = _response_headers
|
self._response_headers = _response_headers
|
||||||
self.response_id = None
|
self.response_id = None
|
||||||
self.logging_loop = None
|
self.logging_loop = None
|
||||||
|
@ -8808,11 +8829,14 @@ class CustomStreamWrapper:
|
||||||
str_line.choices[0].content_filter_result
|
str_line.choices[0].content_filter_result
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
error_message = "Azure Response={}".format(
|
error_message = "{} Response={}".format(
|
||||||
str(dict(str_line))
|
self.custom_llm_provider, str(dict(str_line))
|
||||||
)
|
)
|
||||||
raise litellm.AzureOpenAIError(
|
|
||||||
status_code=400, message=error_message
|
raise litellm.ContentPolicyViolationError(
|
||||||
|
message=error_message,
|
||||||
|
llm_provider=self.custom_llm_provider,
|
||||||
|
model=self.model,
|
||||||
)
|
)
|
||||||
|
|
||||||
# checking for logprobs
|
# checking for logprobs
|
||||||
|
@ -9094,6 +9118,42 @@ class CustomStreamWrapper:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
def handle_triton_stream(self, chunk):
|
||||||
|
try:
|
||||||
|
if isinstance(chunk, dict):
|
||||||
|
parsed_response = chunk
|
||||||
|
elif isinstance(chunk, (str, bytes)):
|
||||||
|
if isinstance(chunk, bytes):
|
||||||
|
chunk = chunk.decode("utf-8")
|
||||||
|
if "text_output" in chunk:
|
||||||
|
response = chunk.replace("data: ", "").strip()
|
||||||
|
parsed_response = json.loads(response)
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
"text": "",
|
||||||
|
"is_finished": False,
|
||||||
|
"prompt_tokens": 0,
|
||||||
|
"completion_tokens": 0,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
print_verbose(f"chunk: {chunk} (Type: {type(chunk)})")
|
||||||
|
raise ValueError(
|
||||||
|
f"Unable to parse response. Original response: {chunk}"
|
||||||
|
)
|
||||||
|
text = parsed_response.get("text_output", "")
|
||||||
|
finish_reason = parsed_response.get("stop_reason")
|
||||||
|
is_finished = parsed_response.get("is_finished", False)
|
||||||
|
return {
|
||||||
|
"text": text,
|
||||||
|
"is_finished": is_finished,
|
||||||
|
"finish_reason": finish_reason,
|
||||||
|
"prompt_tokens": parsed_response.get("input_token_count", 0),
|
||||||
|
"completion_tokens": parsed_response.get("generated_token_count", 0),
|
||||||
|
}
|
||||||
|
return {"text": "", "is_finished": False}
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
def handle_clarifai_completion_chunk(self, chunk):
|
def handle_clarifai_completion_chunk(self, chunk):
|
||||||
try:
|
try:
|
||||||
if isinstance(chunk, dict):
|
if isinstance(chunk, dict):
|
||||||
|
@ -9513,6 +9573,12 @@ class CustomStreamWrapper:
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
if response_obj["is_finished"]:
|
if response_obj["is_finished"]:
|
||||||
self.received_finish_reason = response_obj["finish_reason"]
|
self.received_finish_reason = response_obj["finish_reason"]
|
||||||
|
elif self.custom_llm_provider == "triton":
|
||||||
|
response_obj = self.handle_triton_stream(chunk)
|
||||||
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||||
|
if response_obj["is_finished"]:
|
||||||
|
self.received_finish_reason = response_obj["finish_reason"]
|
||||||
elif self.custom_llm_provider == "text-completion-openai":
|
elif self.custom_llm_provider == "text-completion-openai":
|
||||||
response_obj = self.handle_openai_text_completion_chunk(chunk)
|
response_obj = self.handle_openai_text_completion_chunk(chunk)
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
@ -10068,6 +10134,7 @@ class CustomStreamWrapper:
|
||||||
or self.custom_llm_provider == "predibase"
|
or self.custom_llm_provider == "predibase"
|
||||||
or self.custom_llm_provider == "databricks"
|
or self.custom_llm_provider == "databricks"
|
||||||
or self.custom_llm_provider == "bedrock"
|
or self.custom_llm_provider == "bedrock"
|
||||||
|
or self.custom_llm_provider == "triton"
|
||||||
or self.custom_llm_provider == "watsonx"
|
or self.custom_llm_provider == "watsonx"
|
||||||
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
||||||
):
|
):
|
||||||
|
|
|
@ -760,6 +760,36 @@
|
||||||
"litellm_provider": "azure_ai",
|
"litellm_provider": "azure_ai",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"azure_ai/Meta-Llama-31-8B-Instruct": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.0000003,
|
||||||
|
"output_cost_per_token": 0.00000061,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice"
|
||||||
|
},
|
||||||
|
"azure_ai/Meta-Llama-31-70B-Instruct": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.00000268,
|
||||||
|
"output_cost_per_token": 0.00000354,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice"
|
||||||
|
},
|
||||||
|
"azure_ai/Meta-Llama-31-405B-Instruct": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.00000533,
|
||||||
|
"output_cost_per_token": 0.000016,
|
||||||
|
"litellm_provider": "azure_ai",
|
||||||
|
"mode": "chat",
|
||||||
|
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice"
|
||||||
|
},
|
||||||
"babbage-002": {
|
"babbage-002": {
|
||||||
"max_tokens": 16384,
|
"max_tokens": 16384,
|
||||||
"max_input_tokens": 16384,
|
"max_input_tokens": 16384,
|
||||||
|
@ -1948,6 +1978,16 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_vision": true
|
"supports_vision": true
|
||||||
},
|
},
|
||||||
|
"vertex_ai/meta/llama3-405b-instruct-maas": {
|
||||||
|
"max_tokens": 32000,
|
||||||
|
"max_input_tokens": 32000,
|
||||||
|
"max_output_tokens": 32000,
|
||||||
|
"input_cost_per_token": 0.0,
|
||||||
|
"output_cost_per_token": 0.0,
|
||||||
|
"litellm_provider": "vertex_ai-llama_models",
|
||||||
|
"mode": "chat",
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
|
||||||
|
},
|
||||||
"vertex_ai/imagegeneration@006": {
|
"vertex_ai/imagegeneration@006": {
|
||||||
"cost_per_image": 0.020,
|
"cost_per_image": 0.020,
|
||||||
"litellm_provider": "vertex_ai-image-models",
|
"litellm_provider": "vertex_ai-image-models",
|
||||||
|
@ -3633,6 +3673,24 @@
|
||||||
"litellm_provider": "bedrock",
|
"litellm_provider": "bedrock",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"meta.llama3-1-8b-instruct-v1:0": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"input_cost_per_token": 0.0000004,
|
||||||
|
"output_cost_per_token": 0.0000006,
|
||||||
|
"litellm_provider": "bedrock",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"meta.llama3-1-70b-instruct-v1:0": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"input_cost_per_token": 0.00000265,
|
||||||
|
"output_cost_per_token": 0.0000035,
|
||||||
|
"litellm_provider": "bedrock",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
|
"512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
|
||||||
"max_tokens": 77,
|
"max_tokens": 77,
|
||||||
"max_input_tokens": 77,
|
"max_input_tokens": 77,
|
||||||
|
|
7
prometheus.yml
Normal file
7
prometheus.yml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'litellm'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['litellm:4000'] # Assuming Litellm exposes metrics at port 4000
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.41.26"
|
version = "1.42.0"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.41.26"
|
version = "1.42.0"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -172,7 +172,7 @@ model LiteLLM_Config {
|
||||||
model LiteLLM_SpendLogs {
|
model LiteLLM_SpendLogs {
|
||||||
request_id String @id
|
request_id String @id
|
||||||
call_type String
|
call_type String
|
||||||
api_key String @default ("")
|
api_key String @default ("") // Hashed API Token. Not the actual Virtual Key. Equivalent to 'token' column in LiteLLM_VerificationToken
|
||||||
spend Float @default(0.0)
|
spend Float @default(0.0)
|
||||||
total_tokens Int @default(0)
|
total_tokens Int @default(0)
|
||||||
prompt_tokens Int @default(0)
|
prompt_tokens Int @default(0)
|
||||||
|
@ -183,12 +183,12 @@ model LiteLLM_SpendLogs {
|
||||||
model String @default("")
|
model String @default("")
|
||||||
model_id String? @default("") // the model id stored in proxy model db
|
model_id String? @default("") // the model id stored in proxy model db
|
||||||
model_group String? @default("") // public model_name / model_group
|
model_group String? @default("") // public model_name / model_group
|
||||||
api_base String @default("")
|
api_base String? @default("")
|
||||||
user String @default("")
|
user String? @default("")
|
||||||
metadata Json @default("{}")
|
metadata Json? @default("{}")
|
||||||
cache_hit String @default("")
|
cache_hit String? @default("")
|
||||||
cache_key String @default("")
|
cache_key String? @default("")
|
||||||
request_tags Json @default("[]")
|
request_tags Json? @default("[]")
|
||||||
team_id String?
|
team_id String?
|
||||||
end_user String?
|
end_user String?
|
||||||
requester_ip_address String?
|
requester_ip_address String?
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue