Merge branch 'main' into litellm_contributor_prs_03_24_2025_p1
|
@ -9,7 +9,11 @@ commands:
|
||||||
- run:
|
- run:
|
||||||
name: "Configure Google DNS"
|
name: "Configure Google DNS"
|
||||||
command: |
|
command: |
|
||||||
echo "nameserver 8.8.8.8" | sudo tee /etc/resolv.conf
|
# Backup original resolv.conf
|
||||||
|
sudo cp /etc/resolv.conf /etc/resolv.conf.backup
|
||||||
|
# Add both local and Google DNS servers
|
||||||
|
echo "nameserver 127.0.0.11" | sudo tee /etc/resolv.conf
|
||||||
|
echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
|
||||||
echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
|
echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
|
@ -243,6 +247,12 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
- setup_google_dns
|
- setup_google_dns
|
||||||
|
- run:
|
||||||
|
name: DNS lookup for Redis host
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y dnsutils
|
||||||
|
dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -414,7 +424,7 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
pwd
|
pwd
|
||||||
ls
|
ls
|
||||||
python -m pytest tests/local_testing tests/router_unit_tests --cov=litellm --cov-report=xml -vv -k "router" -x -s -v --junitxml=test-results/junit.xml --durations=5
|
python -m pytest tests/local_testing tests/router_unit_tests --cov=litellm --cov-report=xml -vv -k "router" -x -v --junitxml=test-results/junit.xml --durations=5
|
||||||
no_output_timeout: 120m
|
no_output_timeout: 120m
|
||||||
- run:
|
- run:
|
||||||
name: Rename the coverage files
|
name: Rename the coverage files
|
||||||
|
@ -490,6 +500,12 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Install PostgreSQL
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install postgresql postgresql-contrib
|
||||||
|
echo 'export PATH=/usr/lib/postgresql/*/bin:$PATH' >> $BASH_ENV
|
||||||
- setup_google_dns
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
|
@ -545,6 +561,7 @@ jobs:
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
|
pip install "pytest-postgresql==7.0.1"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
paths:
|
paths:
|
||||||
- ./venv
|
- ./venv
|
||||||
|
@ -593,6 +610,8 @@ jobs:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
pip install wheel
|
||||||
|
pip install --upgrade pip wheel setuptools
|
||||||
python -m pip install -r requirements.txt
|
python -m pip install -r requirements.txt
|
||||||
pip install "pytest==7.3.1"
|
pip install "pytest==7.3.1"
|
||||||
pip install "respx==0.21.1"
|
pip install "respx==0.21.1"
|
||||||
|
@ -684,7 +703,7 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
pwd
|
pwd
|
||||||
ls
|
ls
|
||||||
python -m pytest -vv tests/llm_translation --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
|
python -m pytest -vv tests/llm_translation --cov=litellm --cov-report=xml -x -v --junitxml=test-results/junit.xml --durations=5
|
||||||
no_output_timeout: 120m
|
no_output_timeout: 120m
|
||||||
- run:
|
- run:
|
||||||
name: Rename the coverage files
|
name: Rename the coverage files
|
||||||
|
@ -1108,6 +1127,7 @@ jobs:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install wheel setuptools
|
||||||
python -m pip install -r requirements.txt
|
python -m pip install -r requirements.txt
|
||||||
pip install "pytest==7.3.1"
|
pip install "pytest==7.3.1"
|
||||||
pip install "pytest-retry==1.6.3"
|
pip install "pytest-retry==1.6.3"
|
||||||
|
@ -1433,7 +1453,7 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
pwd
|
pwd
|
||||||
ls
|
ls
|
||||||
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
|
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/spend_tracking_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/mcp_tests --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
|
||||||
no_output_timeout: 120m
|
no_output_timeout: 120m
|
||||||
|
|
||||||
# Store test results
|
# Store test results
|
||||||
|
@ -1726,6 +1746,96 @@ jobs:
|
||||||
# Store test results
|
# Store test results
|
||||||
- store_test_results:
|
- store_test_results:
|
||||||
path: test-results
|
path: test-results
|
||||||
|
proxy_spend_accuracy_tests:
|
||||||
|
machine:
|
||||||
|
image: ubuntu-2204:2023.10.1
|
||||||
|
resource_class: xlarge
|
||||||
|
working_directory: ~/project
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
|
- run:
|
||||||
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y docker-ce docker-ce-cli containerd.io
|
||||||
|
- run:
|
||||||
|
name: Install Python 3.9
|
||||||
|
command: |
|
||||||
|
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
|
||||||
|
bash miniconda.sh -b -p $HOME/miniconda
|
||||||
|
export PATH="$HOME/miniconda/bin:$PATH"
|
||||||
|
conda init bash
|
||||||
|
source ~/.bashrc
|
||||||
|
conda create -n myenv python=3.9 -y
|
||||||
|
conda activate myenv
|
||||||
|
python --version
|
||||||
|
- run:
|
||||||
|
name: Install Dependencies
|
||||||
|
command: |
|
||||||
|
pip install "pytest==7.3.1"
|
||||||
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
pip install aiohttp
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
- run:
|
||||||
|
name: Build Docker image
|
||||||
|
command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
|
||||||
|
- run:
|
||||||
|
name: Run Docker container
|
||||||
|
# intentionally give bad redis credentials here
|
||||||
|
# the OTEL test - should get this as a trace
|
||||||
|
command: |
|
||||||
|
docker run -d \
|
||||||
|
-p 4000:4000 \
|
||||||
|
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
||||||
|
-e REDIS_HOST=$REDIS_HOST \
|
||||||
|
-e REDIS_PASSWORD=$REDIS_PASSWORD \
|
||||||
|
-e REDIS_PORT=$REDIS_PORT \
|
||||||
|
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||||
|
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||||
|
-e LITELLM_LICENSE=$LITELLM_LICENSE \
|
||||||
|
-e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \
|
||||||
|
-e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
|
||||||
|
-e USE_DDTRACE=True \
|
||||||
|
-e DD_API_KEY=$DD_API_KEY \
|
||||||
|
-e DD_SITE=$DD_SITE \
|
||||||
|
-e AWS_REGION_NAME=$AWS_REGION_NAME \
|
||||||
|
--name my-app \
|
||||||
|
-v $(pwd)/litellm/proxy/example_config_yaml/spend_tracking_config.yaml:/app/config.yaml \
|
||||||
|
my-app:latest \
|
||||||
|
--config /app/config.yaml \
|
||||||
|
--port 4000 \
|
||||||
|
--detailed_debug \
|
||||||
|
- run:
|
||||||
|
name: Install curl and dockerize
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y curl
|
||||||
|
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
|
||||||
|
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
|
||||||
|
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
|
||||||
|
- run:
|
||||||
|
name: Start outputting logs
|
||||||
|
command: docker logs -f my-app
|
||||||
|
background: true
|
||||||
|
- run:
|
||||||
|
name: Wait for app to be ready
|
||||||
|
command: dockerize -wait http://localhost:4000 -timeout 5m
|
||||||
|
- run:
|
||||||
|
name: Run tests
|
||||||
|
command: |
|
||||||
|
pwd
|
||||||
|
ls
|
||||||
|
python -m pytest -vv tests/spend_tracking_tests -x --junitxml=test-results/junit.xml --durations=5
|
||||||
|
no_output_timeout:
|
||||||
|
120m
|
||||||
|
# Clean up first container
|
||||||
|
- run:
|
||||||
|
name: Stop and remove first container
|
||||||
|
command: |
|
||||||
|
docker stop my-app
|
||||||
|
docker rm my-app
|
||||||
|
|
||||||
proxy_multi_instance_tests:
|
proxy_multi_instance_tests:
|
||||||
machine:
|
machine:
|
||||||
|
@ -2536,6 +2646,12 @@ workflows:
|
||||||
only:
|
only:
|
||||||
- main
|
- main
|
||||||
- /litellm_.*/
|
- /litellm_.*/
|
||||||
|
- proxy_spend_accuracy_tests:
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
- /litellm_.*/
|
||||||
- proxy_multi_instance_tests:
|
- proxy_multi_instance_tests:
|
||||||
filters:
|
filters:
|
||||||
branches:
|
branches:
|
||||||
|
@ -2697,6 +2813,7 @@ workflows:
|
||||||
- installing_litellm_on_python
|
- installing_litellm_on_python
|
||||||
- installing_litellm_on_python_3_13
|
- installing_litellm_on_python_3_13
|
||||||
- proxy_logging_guardrails_model_info_tests
|
- proxy_logging_guardrails_model_info_tests
|
||||||
|
- proxy_spend_accuracy_tests
|
||||||
- proxy_multi_instance_tests
|
- proxy_multi_instance_tests
|
||||||
- proxy_store_model_in_db_tests
|
- proxy_store_model_in_db_tests
|
||||||
- proxy_build_from_pip_tests
|
- proxy_build_from_pip_tests
|
||||||
|
|
|
@ -8,7 +8,8 @@ redis==5.2.1
|
||||||
redisvl==0.4.1
|
redisvl==0.4.1
|
||||||
anthropic
|
anthropic
|
||||||
orjson==3.9.15
|
orjson==3.9.15
|
||||||
pydantic==2.7.1
|
pydantic==2.10.2
|
||||||
google-cloud-aiplatform==1.43.0
|
google-cloud-aiplatform==1.43.0
|
||||||
fastapi-sso==0.10.0
|
fastapi-sso==0.16.0
|
||||||
uvloop==0.21.0
|
uvloop==0.21.0
|
||||||
|
mcp==1.5.0 # for MCP server
|
||||||
|
|
206
.github/workflows/publish-migrations.yml
vendored
Normal file
|
@ -0,0 +1,206 @@
|
||||||
|
name: Publish Prisma Migrations
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- 'schema.prisma' # Check root schema.prisma
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
publish-migrations:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:14
|
||||||
|
env:
|
||||||
|
POSTGRES_DB: temp_db
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
ports:
|
||||||
|
- 5432:5432
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
|
# Add shadow database service
|
||||||
|
postgres_shadow:
|
||||||
|
image: postgres:14
|
||||||
|
env:
|
||||||
|
POSTGRES_DB: shadow_db
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
ports:
|
||||||
|
- 5433:5432
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
pip install prisma
|
||||||
|
pip install python-dotenv
|
||||||
|
|
||||||
|
- name: Generate Initial Migration if None Exists
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
mkdir -p deploy/migrations
|
||||||
|
echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
|
||||||
|
|
||||||
|
if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
|
||||||
|
echo "No existing migrations found, creating baseline..."
|
||||||
|
VERSION=$(date +%Y%m%d%H%M%S)
|
||||||
|
mkdir -p deploy/migrations/${VERSION}_initial
|
||||||
|
|
||||||
|
echo "Generating initial migration..."
|
||||||
|
# Save raw output for debugging
|
||||||
|
prisma migrate diff \
|
||||||
|
--from-empty \
|
||||||
|
--to-schema-datamodel schema.prisma \
|
||||||
|
--shadow-database-url "${SHADOW_DATABASE_URL}" \
|
||||||
|
--script > deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
|
||||||
|
echo "Raw migration file content:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
|
||||||
|
echo "Cleaning migration file..."
|
||||||
|
# Clean the file
|
||||||
|
sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
|
||||||
|
# Verify the migration file
|
||||||
|
if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
|
||||||
|
echo "ERROR: Migration file is empty after cleaning"
|
||||||
|
echo "Original content was:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Final migration file content:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
|
||||||
|
# Verify it starts with SQL
|
||||||
|
if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
|
||||||
|
echo "ERROR: Migration file does not start with SQL command or comment"
|
||||||
|
echo "First line is:"
|
||||||
|
head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
echo "Full content is:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Compare and Generate Migration
|
||||||
|
if: success()
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
# Create temporary migration workspace
|
||||||
|
mkdir -p temp_migrations
|
||||||
|
|
||||||
|
# Copy existing migrations (will not fail if directory is empty)
|
||||||
|
cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
|
||||||
|
|
||||||
|
VERSION=$(date +%Y%m%d%H%M%S)
|
||||||
|
|
||||||
|
# Generate diff against existing migrations or empty state
|
||||||
|
prisma migrate diff \
|
||||||
|
--from-migrations temp_migrations \
|
||||||
|
--to-schema-datamodel schema.prisma \
|
||||||
|
--shadow-database-url "${SHADOW_DATABASE_URL}" \
|
||||||
|
--script > temp_migrations/migration_${VERSION}.sql
|
||||||
|
|
||||||
|
# Check if there are actual changes
|
||||||
|
if [ -s temp_migrations/migration_${VERSION}.sql ]; then
|
||||||
|
echo "Changes detected, creating new migration"
|
||||||
|
mkdir -p deploy/migrations/${VERSION}_schema_update
|
||||||
|
mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
|
||||||
|
echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
|
||||||
|
else
|
||||||
|
echo "No schema changes detected"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Verify Migration
|
||||||
|
if: success()
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
# Create test database
|
||||||
|
psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
|
||||||
|
|
||||||
|
# Apply all migrations in order to verify
|
||||||
|
for migration in deploy/migrations/*/migration.sql; do
|
||||||
|
echo "Applying migration: $migration"
|
||||||
|
psql "${SHADOW_DATABASE_URL}" -f $migration
|
||||||
|
done
|
||||||
|
|
||||||
|
# Add this step before create-pull-request to debug permissions
|
||||||
|
- name: Check Token Permissions
|
||||||
|
run: |
|
||||||
|
echo "Checking token permissions..."
|
||||||
|
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||||
|
-H "Accept: application/vnd.github.v3+json" \
|
||||||
|
https://api.github.com/repos/BerriAI/litellm/collaborators
|
||||||
|
|
||||||
|
echo "\nChecking if token can create PRs..."
|
||||||
|
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||||
|
-H "Accept: application/vnd.github.v3+json" \
|
||||||
|
https://api.github.com/repos/BerriAI/litellm
|
||||||
|
|
||||||
|
# Add this debug step before git push
|
||||||
|
- name: Debug Changed Files
|
||||||
|
run: |
|
||||||
|
echo "Files staged for commit:"
|
||||||
|
git diff --name-status --staged
|
||||||
|
|
||||||
|
echo "\nAll changed files:"
|
||||||
|
git status
|
||||||
|
|
||||||
|
- name: Create Pull Request
|
||||||
|
if: success()
|
||||||
|
uses: peter-evans/create-pull-request@v5
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
commit-message: "chore: update prisma migrations"
|
||||||
|
title: "Update Prisma Migrations"
|
||||||
|
body: |
|
||||||
|
Auto-generated migration based on schema.prisma changes.
|
||||||
|
|
||||||
|
Generated files:
|
||||||
|
- deploy/migrations/${VERSION}_schema_update/migration.sql
|
||||||
|
- deploy/migrations/${VERSION}_schema_update/README.md
|
||||||
|
branch: feat/prisma-migration-${{ env.VERSION }}
|
||||||
|
base: main
|
||||||
|
delete-branch: true
|
||||||
|
|
||||||
|
- name: Generate and Save Migrations
|
||||||
|
run: |
|
||||||
|
# Only add migration files
|
||||||
|
git add deploy/migrations/
|
||||||
|
git status # Debug what's being committed
|
||||||
|
git commit -m "chore: update prisma migrations"
|
53
.github/workflows/test-linting.yml
vendored
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
name: LiteLLM Linting
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lint:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Install Poetry
|
||||||
|
uses: snok/install-poetry@v1
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
poetry install --with dev
|
||||||
|
|
||||||
|
- name: Run Black formatting
|
||||||
|
run: |
|
||||||
|
cd litellm
|
||||||
|
poetry run black .
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
- name: Run Ruff linting
|
||||||
|
run: |
|
||||||
|
cd litellm
|
||||||
|
poetry run ruff check .
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
- name: Run MyPy type checking
|
||||||
|
run: |
|
||||||
|
cd litellm
|
||||||
|
poetry run mypy . --ignore-missing-imports
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
- name: Check for circular imports
|
||||||
|
run: |
|
||||||
|
cd litellm
|
||||||
|
poetry run python ../tests/documentation_tests/test_circular_imports.py
|
||||||
|
cd ..
|
||||||
|
|
||||||
|
- name: Check import safety
|
||||||
|
run: |
|
||||||
|
poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
35
.github/workflows/test-litellm.yml
vendored
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
name: LiteLLM Mock Tests (folder - tests/litellm)
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches: [ main ]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Thank You Message
|
||||||
|
run: |
|
||||||
|
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Install Poetry
|
||||||
|
uses: snok/install-poetry@v1
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
poetry install --with dev,proxy-dev --extras proxy
|
||||||
|
poetry run pip install pytest-xdist
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: |
|
||||||
|
poetry run pytest tests/litellm -x -vv -n 4
|
1
.gitignore
vendored
|
@ -83,4 +83,5 @@ tests/llm_translation/test_vertex_key.json
|
||||||
litellm/proxy/migrations/0_init/migration.sql
|
litellm/proxy/migrations/0_init/migration.sql
|
||||||
litellm/proxy/db/migrations/0_init/migration.sql
|
litellm/proxy/db/migrations/0_init/migration.sql
|
||||||
litellm/proxy/db/migrations/*
|
litellm/proxy/db/migrations/*
|
||||||
|
litellm/proxy/migrations/*config.yaml
|
||||||
litellm/proxy/migrations/*
|
litellm/proxy/migrations/*
|
|
@ -6,44 +6,35 @@ repos:
|
||||||
entry: pyright
|
entry: pyright
|
||||||
language: system
|
language: system
|
||||||
types: [python]
|
types: [python]
|
||||||
files: ^litellm/
|
files: ^(litellm/|litellm_proxy_extras/)
|
||||||
- id: isort
|
- id: isort
|
||||||
name: isort
|
name: isort
|
||||||
entry: isort
|
entry: isort
|
||||||
language: system
|
language: system
|
||||||
types: [python]
|
types: [python]
|
||||||
files: litellm/.*\.py
|
files: (litellm/|litellm_proxy_extras/).*\.py
|
||||||
exclude: ^litellm/__init__.py$
|
exclude: ^litellm/__init__.py$
|
||||||
- repo: https://github.com/psf/black
|
|
||||||
rev: 24.2.0
|
|
||||||
hooks:
|
|
||||||
- id: black
|
- id: black
|
||||||
|
name: black
|
||||||
|
entry: poetry run black
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
files: (litellm/|litellm_proxy_extras/).*\.py
|
||||||
- repo: https://github.com/pycqa/flake8
|
- repo: https://github.com/pycqa/flake8
|
||||||
rev: 7.0.0 # The version of flake8 to use
|
rev: 7.0.0 # The version of flake8 to use
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
|
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
|
||||||
additional_dependencies: [flake8-print]
|
additional_dependencies: [flake8-print]
|
||||||
files: litellm/.*\.py
|
files: (litellm/|litellm_proxy_extras/).*\.py
|
||||||
# - id: flake8
|
|
||||||
# name: flake8 (router.py function length)
|
|
||||||
# files: ^litellm/router\.py$
|
|
||||||
# args: [--max-function-length=40]
|
|
||||||
# # additional_dependencies: [flake8-functions]
|
|
||||||
- repo: https://github.com/python-poetry/poetry
|
- repo: https://github.com/python-poetry/poetry
|
||||||
rev: 1.8.0
|
rev: 1.8.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: poetry-check
|
- id: poetry-check
|
||||||
|
files: ^(pyproject.toml|litellm-proxy-extras/pyproject.toml)$
|
||||||
- repo: local
|
- repo: local
|
||||||
hooks:
|
hooks:
|
||||||
- id: check-files-match
|
- id: check-files-match
|
||||||
name: Check if files match
|
name: Check if files match
|
||||||
entry: python3 ci_cd/check_files_match.py
|
entry: python3 ci_cd/check_files_match.py
|
||||||
language: system
|
language: system
|
||||||
# - id: check-file-length
|
|
||||||
# name: Check file length
|
|
||||||
# entry: python check_file_length.py
|
|
||||||
# args: ["10000"] # set your desired maximum number of lines
|
|
||||||
# language: python
|
|
||||||
# files: litellm/.*\.py
|
|
||||||
# exclude: ^litellm/tests/
|
|
|
@ -12,8 +12,7 @@ WORKDIR /app
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
# Install build dependencies
|
# Install build dependencies
|
||||||
RUN apk update && \
|
RUN apk add --no-cache gcc python3-dev openssl openssl-dev
|
||||||
apk add --no-cache gcc python3-dev openssl openssl-dev
|
|
||||||
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip && \
|
RUN pip install --upgrade pip && \
|
||||||
|
@ -52,8 +51,7 @@ FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
# Install runtime dependencies
|
# Install runtime dependencies
|
||||||
RUN apk update && \
|
RUN apk add --no-cache openssl
|
||||||
apk add --no-cache openssl
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
# Copy the current directory contents into the container at /app
|
# Copy the current directory contents into the container at /app
|
||||||
|
|
3
Makefile
|
@ -14,6 +14,9 @@ help:
|
||||||
install-dev:
|
install-dev:
|
||||||
poetry install --with dev
|
poetry install --with dev
|
||||||
|
|
||||||
|
install-proxy-dev:
|
||||||
|
poetry install --with dev,proxy-dev
|
||||||
|
|
||||||
lint: install-dev
|
lint: install-dev
|
||||||
poetry run pip install types-requests types-setuptools types-redis types-PyYAML
|
poetry run pip install types-requests types-setuptools types-redis types-PyYAML
|
||||||
cd litellm && poetry run mypy . --ignore-missing-imports
|
cd litellm && poetry run mypy . --ignore-missing-imports
|
||||||
|
|
|
@ -16,9 +16,6 @@
|
||||||
<a href="https://pypi.org/project/litellm/" target="_blank">
|
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||||
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
|
|
||||||
<img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
|
|
||||||
</a>
|
|
||||||
<a href="https://www.ycombinator.com/companies/berriai">
|
<a href="https://www.ycombinator.com/companies/berriai">
|
||||||
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
|
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
|
||||||
</a>
|
</a>
|
||||||
|
|
60
ci_cd/baseline_db.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def create_baseline():
|
||||||
|
"""Create baseline migration in deploy/migrations"""
|
||||||
|
try:
|
||||||
|
# Get paths
|
||||||
|
root_dir = Path(__file__).parent.parent
|
||||||
|
deploy_dir = root_dir / "deploy"
|
||||||
|
migrations_dir = deploy_dir / "migrations"
|
||||||
|
schema_path = root_dir / "schema.prisma"
|
||||||
|
|
||||||
|
# Create migrations directory
|
||||||
|
migrations_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Create migration_lock.toml if it doesn't exist
|
||||||
|
lock_file = migrations_dir / "migration_lock.toml"
|
||||||
|
if not lock_file.exists():
|
||||||
|
lock_file.write_text('provider = "postgresql"\n')
|
||||||
|
|
||||||
|
# Create timestamp-based migration directory
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
migration_dir = migrations_dir / f"{timestamp}_baseline"
|
||||||
|
migration_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Generate migration SQL
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"prisma",
|
||||||
|
"migrate",
|
||||||
|
"diff",
|
||||||
|
"--from-empty",
|
||||||
|
"--to-schema-datamodel",
|
||||||
|
str(schema_path),
|
||||||
|
"--script",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Write the SQL to migration.sql
|
||||||
|
migration_file = migration_dir / "migration.sql"
|
||||||
|
migration_file.write_text(result.stdout)
|
||||||
|
|
||||||
|
print(f"Created baseline migration in {migration_dir}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error running prisma command: {e.stderr}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating baseline migration: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
create_baseline()
|
19
ci_cd/publish-proxy-extras.sh
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Exit on error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
echo "🚀 Building and publishing litellm-proxy-extras"
|
||||||
|
|
||||||
|
# Navigate to litellm-proxy-extras directory
|
||||||
|
cd "$(dirname "$0")/../litellm-proxy-extras"
|
||||||
|
|
||||||
|
# Build the package
|
||||||
|
echo "📦 Building package..."
|
||||||
|
poetry build
|
||||||
|
|
||||||
|
# Publish to PyPI
|
||||||
|
echo "🌎 Publishing to PyPI..."
|
||||||
|
poetry publish
|
||||||
|
|
||||||
|
echo "✅ Done! Package published successfully"
|
95
ci_cd/run_migration.py
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import testing.postgresql
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
|
def create_migration(migration_name: str = None):
|
||||||
|
"""
|
||||||
|
Create a new migration SQL file in the migrations directory by comparing
|
||||||
|
current database state with schema
|
||||||
|
|
||||||
|
Args:
|
||||||
|
migration_name (str): Name for the migration
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get paths
|
||||||
|
root_dir = Path(__file__).parent.parent
|
||||||
|
migrations_dir = root_dir / "litellm-proxy-extras" / "litellm_proxy_extras" / "migrations"
|
||||||
|
schema_path = root_dir / "schema.prisma"
|
||||||
|
|
||||||
|
# Create temporary PostgreSQL database
|
||||||
|
with testing.postgresql.Postgresql() as postgresql:
|
||||||
|
db_url = postgresql.url()
|
||||||
|
|
||||||
|
# Create temporary migrations directory next to schema.prisma
|
||||||
|
temp_migrations_dir = schema_path.parent / "migrations"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Copy existing migrations to temp directory
|
||||||
|
if temp_migrations_dir.exists():
|
||||||
|
shutil.rmtree(temp_migrations_dir)
|
||||||
|
shutil.copytree(migrations_dir, temp_migrations_dir)
|
||||||
|
|
||||||
|
# Apply existing migrations to temp database
|
||||||
|
os.environ["DATABASE_URL"] = db_url
|
||||||
|
subprocess.run(
|
||||||
|
["prisma", "migrate", "deploy", "--schema", str(schema_path)],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate diff between current database and schema
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"prisma",
|
||||||
|
"migrate",
|
||||||
|
"diff",
|
||||||
|
"--from-url",
|
||||||
|
db_url,
|
||||||
|
"--to-schema-datamodel",
|
||||||
|
str(schema_path),
|
||||||
|
"--script",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.stdout.strip():
|
||||||
|
# Generate timestamp and create migration directory
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
migration_name = migration_name or "unnamed_migration"
|
||||||
|
migration_dir = migrations_dir / f"{timestamp}_{migration_name}"
|
||||||
|
migration_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Write the SQL to migration.sql
|
||||||
|
migration_file = migration_dir / "migration.sql"
|
||||||
|
migration_file.write_text(result.stdout)
|
||||||
|
|
||||||
|
print(f"Created migration in {migration_dir}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print("No schema changes detected. Migration not needed.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up: remove temporary migrations directory
|
||||||
|
if temp_migrations_dir.exists():
|
||||||
|
shutil.rmtree(temp_migrations_dir)
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error generating migration: {e.stderr}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating migration: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# If running directly, can optionally pass migration name as argument
|
||||||
|
import sys
|
||||||
|
|
||||||
|
migration_name = sys.argv[1] if len(sys.argv) > 1 else None
|
||||||
|
create_migration(migration_name)
|
314
cookbook/LiteLLM_HuggingFace.ipynb
vendored
|
@ -7,7 +7,8 @@
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## LiteLLM Hugging Face\n",
|
"## LiteLLM Hugging Face\n",
|
||||||
"Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface"
|
"\n",
|
||||||
|
"Docs for huggingface: https://docs.litellm.ai/docs/providers/huggingface\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -27,23 +28,18 @@
|
||||||
"id": "yp5UXRqtpu9f"
|
"id": "yp5UXRqtpu9f"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Hugging Face Free Serverless Inference API\n",
|
"## Serverless Inference Providers\n",
|
||||||
"Read more about the Free Serverless Inference API here: https://huggingface.co/docs/api-inference.\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"In order to use litellm to call Serverless Inference API:\n",
|
"Read more about Inference Providers here: https://huggingface.co/blog/inference-providers.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"* Browse Serverless Inference compatible models here: https://huggingface.co/models?inference=warm&pipeline_tag=text-generation.\n",
|
"In order to use litellm with Hugging Face Inference Providers, you need to set `model=huggingface/<provider>/<model-id>`.\n",
|
||||||
"* Copy the model name from hugging face\n",
|
|
||||||
"* Set `model = \"huggingface/<model-name>\"`\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"Example set `model=huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct` to call `meta-llama/Meta-Llama-3.1-8B-Instruct`\n",
|
"Example: `huggingface/together/deepseek-ai/DeepSeek-R1` to run DeepSeek-R1 (https://huggingface.co/deepseek-ai/DeepSeek-R1) through Together AI.\n"
|
||||||
"\n",
|
|
||||||
"https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
|
@ -51,107 +47,18 @@
|
||||||
"id": "Pi5Oww8gpCUm",
|
"id": "Pi5Oww8gpCUm",
|
||||||
"outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
|
"outputId": "659a67c7-f90d-4c06-b94e-2c4aa92d897a"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"ModelResponse(id='chatcmpl-c54dfb68-1491-4d68-a4dc-35e603ea718a', choices=[Choices(finish_reason='eos_token', index=0, message=Message(content=\"I'm just a computer program, so I don't have feelings, but thank you for asking! How can I assist you today?\", role='assistant', tool_calls=None, function_call=None))], created=1724858285, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=27, prompt_tokens=47, total_tokens=74))\n",
|
|
||||||
"ModelResponse(id='chatcmpl-d2ae38e6-4974-431c-bb9b-3fa3f95e5a6d', choices=[Choices(finish_reason='length', index=0, message=Message(content=\"\\n\\nI’m doing well, thank you. I’ve been keeping busy with work and some personal projects. How about you?\\n\\nI'm doing well, thank you. I've been enjoying some time off and catching up on some reading. How can I assist you today?\\n\\nI'm looking for a good book to read. Do you have any recommendations?\\n\\nOf course! Here are a few book recommendations across different genres:\\n\\n1.\", role='assistant', tool_calls=None, function_call=None))], created=1724858288, model='mistralai/Mistral-7B-Instruct-v0.3', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=85, prompt_tokens=6, total_tokens=91))\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import litellm\n",
|
"from litellm import completion\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Make sure to create an API_KEY with inference permissions at https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained\n",
|
"# You can create a HF token here: https://huggingface.co/settings/tokens\n",
|
||||||
"os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
|
"os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Call https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct\n",
|
"# Call DeepSeek-R1 model through Together AI\n",
|
||||||
"# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
|
"response = completion(\n",
|
||||||
"response = litellm.completion(\n",
|
" model=\"huggingface/together/deepseek-ai/DeepSeek-R1\",\n",
|
||||||
" model=\"huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
" messages=[{\"content\": \"How many r's are in the word `strawberry`?\", \"role\": \"user\"}],\n",
|
||||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
|
||||||
")\n",
|
|
||||||
"print(response)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# Call https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3\n",
|
|
||||||
"response = litellm.completion(\n",
|
|
||||||
" model=\"huggingface/mistralai/Mistral-7B-Instruct-v0.3\",\n",
|
|
||||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
|
|
||||||
")\n",
|
|
||||||
"print(response)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"id": "-klhAhjLtclv"
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"## Hugging Face Dedicated Inference Endpoints\n",
|
|
||||||
"\n",
|
|
||||||
"Steps to use\n",
|
|
||||||
"* Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/\n",
|
|
||||||
"* Set `api_base` to your deployed api base\n",
|
|
||||||
"* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 9,
|
|
||||||
"metadata": {
|
|
||||||
"colab": {
|
|
||||||
"base_uri": "https://localhost:8080/"
|
|
||||||
},
|
|
||||||
"id": "Lbmw8Gl_pHns",
|
|
||||||
"outputId": "ea8408bf-1cc3-4670-ecea-f12666d204a8"
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"{\n",
|
|
||||||
" \"object\": \"chat.completion\",\n",
|
|
||||||
" \"choices\": [\n",
|
|
||||||
" {\n",
|
|
||||||
" \"finish_reason\": \"length\",\n",
|
|
||||||
" \"index\": 0,\n",
|
|
||||||
" \"message\": {\n",
|
|
||||||
" \"content\": \"\\n\\nI am doing well, thank you for asking. How about you?\\nI am doing\",\n",
|
|
||||||
" \"role\": \"assistant\",\n",
|
|
||||||
" \"logprobs\": -8.9481967812\n",
|
|
||||||
" }\n",
|
|
||||||
" }\n",
|
|
||||||
" ],\n",
|
|
||||||
" \"id\": \"chatcmpl-74dc9d89-3916-47ce-9bea-b80e66660f77\",\n",
|
|
||||||
" \"created\": 1695871068.8413374,\n",
|
|
||||||
" \"model\": \"glaiveai/glaive-coder-7b\",\n",
|
|
||||||
" \"usage\": {\n",
|
|
||||||
" \"prompt_tokens\": 6,\n",
|
|
||||||
" \"completion_tokens\": 18,\n",
|
|
||||||
" \"total_tokens\": 24\n",
|
|
||||||
" }\n",
|
|
||||||
"}\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import litellm\n",
|
|
||||||
"\n",
|
|
||||||
"os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
|
|
||||||
"\n",
|
|
||||||
"# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
|
|
||||||
"# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
|
|
||||||
"# set api base to your deployed api endpoint from hugging face\n",
|
|
||||||
"response = litellm.completion(\n",
|
|
||||||
" model=\"huggingface/glaiveai/glaive-coder-7b\",\n",
|
|
||||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
|
||||||
" api_base=\"https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud\"\n",
|
|
||||||
")\n",
|
")\n",
|
||||||
"print(response)"
|
"print(response)"
|
||||||
]
|
]
|
||||||
|
@ -162,13 +69,12 @@
|
||||||
"id": "EU0UubrKzTFe"
|
"id": "EU0UubrKzTFe"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## HuggingFace - Streaming (Serveless or Dedicated)\n",
|
"## Streaming\n"
|
||||||
"Set stream = True"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
|
@ -176,74 +82,147 @@
|
||||||
"id": "y-QfIvA-uJKX",
|
"id": "y-QfIvA-uJKX",
|
||||||
"outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768"
|
"outputId": "b007bb98-00d0-44a4-8264-c8a2caed6768"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"<litellm.utils.CustomStreamWrapper object at 0x1278471d0>\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='I', role='assistant', function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"'m\", role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' just', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' a', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' computer', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' program', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=',', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' so', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' I', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' don', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=\"'t\", role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' have', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' feelings', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=',', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' but', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' thank', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' you', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' for', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' asking', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='!', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' How', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' can', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' I', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' assist', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' you', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content=' today', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='?', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason=None, index=0, delta=Delta(content='<|eot_id|>', role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n",
|
|
||||||
"ModelResponse(id='chatcmpl-ffeb4491-624b-4ddf-8005-60358cf67d36', choices=[StreamingChoices(finish_reason='stop', index=0, delta=Delta(content=None, role=None, function_call=None, tool_calls=None), logprobs=None)], created=1724858353, model='meta-llama/Meta-Llama-3.1-8B-Instruct', object='chat.completion.chunk', system_fingerprint=None)\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import litellm\n",
|
"from litellm import completion\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Make sure to create an API_KEY with inference permissions at https://huggingface.co/settings/tokens/new?globalPermissions=inference.serverless.write&tokenType=fineGrained\n",
|
"os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
|
||||||
"os.environ[\"HUGGINGFACE_API_KEY\"] = \"\"\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Call https://huggingface.co/glaiveai/glaive-coder-7b\n",
|
"response = completion(\n",
|
||||||
"# add the 'huggingface/' prefix to the model to set huggingface as the provider\n",
|
" model=\"huggingface/together/deepseek-ai/DeepSeek-R1\",\n",
|
||||||
"# set api base to your deployed api endpoint from hugging face\n",
|
" messages=[\n",
|
||||||
"response = litellm.completion(\n",
|
" {\n",
|
||||||
" model=\"huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
|
" \"role\": \"user\",\n",
|
||||||
" messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
|
" \"content\": \"How many r's are in the word `strawberry`?\",\n",
|
||||||
" stream=True\n",
|
" \n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" stream=True,\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(response)\n",
|
|
||||||
"\n",
|
|
||||||
"for chunk in response:\n",
|
"for chunk in response:\n",
|
||||||
" print(chunk)"
|
" print(chunk)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## With images as input\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"id": "CKXAnK55zQRl"
|
|
||||||
},
|
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": [
|
||||||
|
"from litellm import completion\n",
|
||||||
|
"\n",
|
||||||
|
"# Set your Hugging Face Token\n",
|
||||||
|
"os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
|
||||||
|
"\n",
|
||||||
|
"messages = [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": [\n",
|
||||||
|
" {\"type\": \"text\", \"text\": \"What's in this image?\"},\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"image_url\",\n",
|
||||||
|
" \"image_url\": {\n",
|
||||||
|
" \"url\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\",\n",
|
||||||
|
" },\n",
|
||||||
|
" },\n",
|
||||||
|
" ],\n",
|
||||||
|
" }\n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"response = completion(\n",
|
||||||
|
" model=\"huggingface/sambanova/meta-llama/Llama-3.3-70B-Instruct\",\n",
|
||||||
|
" messages=messages,\n",
|
||||||
|
")\n",
|
||||||
|
"print(response.choices[0])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Tools - Function Calling\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"from litellm import completion\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Set your Hugging Face Token\n",
|
||||||
|
"os.environ[\"HF_TOKEN\"] = \"hf_xxxxxx\"\n",
|
||||||
|
"\n",
|
||||||
|
"tools = [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"type\": \"function\",\n",
|
||||||
|
" \"function\": {\n",
|
||||||
|
" \"name\": \"get_current_weather\",\n",
|
||||||
|
" \"description\": \"Get the current weather in a given location\",\n",
|
||||||
|
" \"parameters\": {\n",
|
||||||
|
" \"type\": \"object\",\n",
|
||||||
|
" \"properties\": {\n",
|
||||||
|
" \"location\": {\n",
|
||||||
|
" \"type\": \"string\",\n",
|
||||||
|
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
||||||
|
" },\n",
|
||||||
|
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
||||||
|
" },\n",
|
||||||
|
" \"required\": [\"location\"],\n",
|
||||||
|
" },\n",
|
||||||
|
" },\n",
|
||||||
|
" }\n",
|
||||||
|
"]\n",
|
||||||
|
"messages = [{\"role\": \"user\", \"content\": \"What's the weather like in Boston today?\"}]\n",
|
||||||
|
"\n",
|
||||||
|
"response = completion(\n",
|
||||||
|
" model=\"huggingface/sambanova/meta-llama/Llama-3.1-8B-Instruct\", messages=messages, tools=tools, tool_choice=\"auto\"\n",
|
||||||
|
")\n",
|
||||||
|
"print(response)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Hugging Face Dedicated Inference Endpoints\n",
|
||||||
|
"\n",
|
||||||
|
"Steps to use\n",
|
||||||
|
"\n",
|
||||||
|
"- Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/\n",
|
||||||
|
"- Set `api_base` to your deployed api base\n",
|
||||||
|
"- set the model to `huggingface/tgi` so that litellm knows it's a huggingface Deployed Inference Endpoint.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import litellm\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"response = litellm.completion(\n",
|
||||||
|
" model=\"huggingface/tgi\",\n",
|
||||||
|
" messages=[{\"content\": \"Hello, how are you?\", \"role\": \"user\"}],\n",
|
||||||
|
" api_base=\"https://my-endpoint.endpoints.huggingface.cloud/v1/\",\n",
|
||||||
|
")\n",
|
||||||
|
"print(response)"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -251,7 +230,8 @@
|
||||||
"provenance": []
|
"provenance": []
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": ".venv",
|
||||||
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
|
@ -264,7 +244,7 @@
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.12.2"
|
"version": "3.12.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|
|
@ -1,2 +1,11 @@
|
||||||
python3 -m build
|
python3 -m build
|
||||||
twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ -
|
twine upload --verbose dist/litellm-1.18.13.dev4.tar.gz -u __token__ -
|
||||||
|
|
||||||
|
|
||||||
|
Note: You might need to make a MANIFEST.ini file on root for build process incase it fails
|
||||||
|
|
||||||
|
Place this in MANIFEST.ini
|
||||||
|
recursive-exclude venv *
|
||||||
|
recursive-exclude myenv *
|
||||||
|
recursive-exclude py313_env *
|
||||||
|
recursive-exclude **/.venv *
|
||||||
|
|
|
@ -2,6 +2,10 @@ apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ include "litellm.fullname" . }}
|
name: {{ include "litellm.fullname" . }}
|
||||||
|
{{- with .Values.service.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{- toYaml . | nindent 4 }}
|
||||||
|
{{- end }}
|
||||||
labels:
|
labels:
|
||||||
{{- include "litellm.labels" . | nindent 4 }}
|
{{- include "litellm.labels" . | nindent 4 }}
|
||||||
spec:
|
spec:
|
||||||
|
|
|
@ -66,5 +66,3 @@ volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
name: litellm_postgres_data # Named volume for Postgres data persistence
|
name: litellm_postgres_data # Named volume for Postgres data persistence
|
||||||
|
|
||||||
|
|
||||||
# ...rest of your docker-compose config if any
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
|
||||||
FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
||||||
|
|
||||||
# Update dependencies and clean up
|
# Update dependencies and clean up
|
||||||
RUN apk update && apk upgrade && rm -rf /var/cache/apk/*
|
RUN apk upgrade --no-cache
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
|
|
@ -12,8 +12,7 @@ WORKDIR /app
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
# Install build dependencies
|
# Install build dependencies
|
||||||
RUN apk update && \
|
RUN apk add --no-cache gcc python3-dev openssl openssl-dev
|
||||||
apk add --no-cache gcc python3-dev openssl openssl-dev
|
|
||||||
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip && \
|
RUN pip install --upgrade pip && \
|
||||||
|
@ -44,8 +43,7 @@ FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
||||||
USER root
|
USER root
|
||||||
|
|
||||||
# Install runtime dependencies
|
# Install runtime dependencies
|
||||||
RUN apk update && \
|
RUN apk add --no-cache openssl
|
||||||
apk add --no-cache openssl
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
# Copy the current directory contents into the container at /app
|
# Copy the current directory contents into the container at /app
|
||||||
|
|
|
@ -3,9 +3,10 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# /v1/messages [BETA]
|
# /v1/messages [BETA]
|
||||||
|
|
||||||
LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint.
|
Use LiteLLM to call all your LLM APIs in the Anthropic `v1/messages` format.
|
||||||
|
|
||||||
This currently just supports the Anthropic API.
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
| Feature | Supported | Notes |
|
| Feature | Supported | Notes |
|
||||||
|-------|-------|-------|
|
|-------|-------|-------|
|
||||||
|
@ -21,9 +22,61 @@ Planned improvement:
|
||||||
- Bedrock Anthropic support
|
- Bedrock Anthropic support
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
---
|
||||||
|
|
||||||
|
### LiteLLM Python SDK
|
||||||
|
|
||||||
|
#### Non-streaming example
|
||||||
|
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||||
|
import litellm
|
||||||
|
response = await litellm.anthropic.messages.acreate(
|
||||||
|
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||||
|
api_key=api_key,
|
||||||
|
model="anthropic/claude-3-haiku-20240307",
|
||||||
|
max_tokens=100,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Example response:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"text": "Hi! this is a very short joke",
|
||||||
|
"type": "text"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||||
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
|
"role": "assistant",
|
||||||
|
"stop_reason": "end_turn",
|
||||||
|
"stop_sequence": null,
|
||||||
|
"type": "message",
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 2095,
|
||||||
|
"output_tokens": 503,
|
||||||
|
"cache_creation_input_tokens": 2095,
|
||||||
|
"cache_read_input_tokens": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming example
|
||||||
|
```python showLineNumbers title="Example using LiteLLM Python SDK"
|
||||||
|
import litellm
|
||||||
|
response = await litellm.anthropic.messages.acreate(
|
||||||
|
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||||
|
api_key=api_key,
|
||||||
|
model="anthropic/claude-3-haiku-20240307",
|
||||||
|
max_tokens=100,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
async for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
```
|
||||||
|
|
||||||
|
### LiteLLM Proxy Server
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem label="PROXY" value="proxy">
|
|
||||||
|
|
||||||
1. Setup config.yaml
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
@ -42,7 +95,28 @@ litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
3. Test it!
|
3. Test it!
|
||||||
|
|
||||||
```bash
|
<Tabs>
|
||||||
|
<TabItem label="Anthropic Python SDK" value="python">
|
||||||
|
|
||||||
|
```python showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||||
|
import anthropic
|
||||||
|
|
||||||
|
# point anthropic sdk to litellm proxy
|
||||||
|
client = anthropic.Anthropic(
|
||||||
|
base_url="http://0.0.0.0:4000",
|
||||||
|
api_key="sk-1234",
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.messages.create(
|
||||||
|
messages=[{"role": "user", "content": "Hello, can you tell me a short joke?"}],
|
||||||
|
model="anthropic-claude",
|
||||||
|
max_tokens=100,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem label="curl" value="curl">
|
||||||
|
|
||||||
|
```bash showLineNumbers title="Example using LiteLLM Proxy Server"
|
||||||
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||||
-H 'content-type: application/json' \
|
-H 'content-type: application/json' \
|
||||||
-H 'x-api-key: $LITELLM_API_KEY' \
|
-H 'x-api-key: $LITELLM_API_KEY' \
|
||||||
|
@ -52,41 +126,176 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": "Hello, can you tell me a short joke?"
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "List 5 important events in the XIX century"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"max_tokens": 4096
|
"max_tokens": 100
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
</TabItem>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
|
|
||||||
# set env
|
|
||||||
os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
|
|
||||||
|
|
||||||
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
|
|
||||||
|
|
||||||
# Call the handler
|
|
||||||
async def call():
|
|
||||||
response = await anthropic_messages(
|
|
||||||
messages=messages,
|
|
||||||
api_key=api_key,
|
|
||||||
model="claude-3-haiku-20240307",
|
|
||||||
max_tokens=100,
|
|
||||||
)
|
|
||||||
|
|
||||||
asyncio.run(call())
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Request Format
|
||||||
|
---
|
||||||
|
|
||||||
|
Request body will be in the Anthropic messages API format. **litellm follows the Anthropic messages specification for this endpoint.**
|
||||||
|
|
||||||
|
#### Example request body
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, world"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Required Fields
|
||||||
|
- **model** (string):
|
||||||
|
The model identifier (e.g., `"claude-3-7-sonnet-20250219"`).
|
||||||
|
- **max_tokens** (integer):
|
||||||
|
The maximum number of tokens to generate before stopping.
|
||||||
|
_Note: The model may stop before reaching this limit; value must be greater than 1._
|
||||||
|
- **messages** (array of objects):
|
||||||
|
An ordered list of conversational turns.
|
||||||
|
Each message object must include:
|
||||||
|
- **role** (enum: `"user"` or `"assistant"`):
|
||||||
|
Specifies the speaker of the message.
|
||||||
|
- **content** (string or array of content blocks):
|
||||||
|
The text or content blocks (e.g., an array containing objects with a `type` such as `"text"`) that form the message.
|
||||||
|
_Example equivalence:_
|
||||||
|
```json
|
||||||
|
{"role": "user", "content": "Hello, Claude"}
|
||||||
|
```
|
||||||
|
is equivalent to:
|
||||||
|
```json
|
||||||
|
{"role": "user", "content": [{"type": "text", "text": "Hello, Claude"}]}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Optional Fields
|
||||||
|
- **metadata** (object):
|
||||||
|
Contains additional metadata about the request (e.g., `user_id` as an opaque identifier).
|
||||||
|
- **stop_sequences** (array of strings):
|
||||||
|
Custom sequences that, when encountered in the generated text, cause the model to stop.
|
||||||
|
- **stream** (boolean):
|
||||||
|
Indicates whether to stream the response using server-sent events.
|
||||||
|
- **system** (string or array):
|
||||||
|
A system prompt providing context or specific instructions to the model.
|
||||||
|
- **temperature** (number):
|
||||||
|
Controls randomness in the model’s responses. Valid range: `0 < temperature < 1`.
|
||||||
|
- **thinking** (object):
|
||||||
|
Configuration for enabling extended thinking. If enabled, it includes:
|
||||||
|
- **budget_tokens** (integer):
|
||||||
|
Minimum of 1024 tokens (and less than `max_tokens`).
|
||||||
|
- **type** (enum):
|
||||||
|
E.g., `"enabled"`.
|
||||||
|
- **tool_choice** (object):
|
||||||
|
Instructs how the model should utilize any provided tools.
|
||||||
|
- **tools** (array of objects):
|
||||||
|
Definitions for tools available to the model. Each tool includes:
|
||||||
|
- **name** (string):
|
||||||
|
The tool’s name.
|
||||||
|
- **description** (string):
|
||||||
|
A detailed description of the tool.
|
||||||
|
- **input_schema** (object):
|
||||||
|
A JSON schema describing the expected input format for the tool.
|
||||||
|
- **top_k** (integer):
|
||||||
|
Limits sampling to the top K options.
|
||||||
|
- **top_p** (number):
|
||||||
|
Enables nucleus sampling with a cumulative probability cutoff. Valid range: `0 < top_p < 1`.
|
||||||
|
|
||||||
|
|
||||||
|
## Response Format
|
||||||
|
---
|
||||||
|
|
||||||
|
Responses will be in the Anthropic messages API format.
|
||||||
|
|
||||||
|
#### Example Response
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"text": "Hi! My name is Claude.",
|
||||||
|
"type": "text"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
|
||||||
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
|
"role": "assistant",
|
||||||
|
"stop_reason": "end_turn",
|
||||||
|
"stop_sequence": null,
|
||||||
|
"type": "message",
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 2095,
|
||||||
|
"output_tokens": 503,
|
||||||
|
"cache_creation_input_tokens": 2095,
|
||||||
|
"cache_read_input_tokens": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Response fields
|
||||||
|
|
||||||
|
- **content** (array of objects):
|
||||||
|
Contains the generated content blocks from the model. Each block includes:
|
||||||
|
- **type** (string):
|
||||||
|
Indicates the type of content (e.g., `"text"`, `"tool_use"`, `"thinking"`, or `"redacted_thinking"`).
|
||||||
|
- **text** (string):
|
||||||
|
The generated text from the model.
|
||||||
|
_Note: Maximum length is 5,000,000 characters._
|
||||||
|
- **citations** (array of objects or `null`):
|
||||||
|
Optional field providing citation details. Each citation includes:
|
||||||
|
- **cited_text** (string):
|
||||||
|
The excerpt being cited.
|
||||||
|
- **document_index** (integer):
|
||||||
|
An index referencing the cited document.
|
||||||
|
- **document_title** (string or `null`):
|
||||||
|
The title of the cited document.
|
||||||
|
- **start_char_index** (integer):
|
||||||
|
The starting character index for the citation.
|
||||||
|
- **end_char_index** (integer):
|
||||||
|
The ending character index for the citation.
|
||||||
|
- **type** (string):
|
||||||
|
Typically `"char_location"`.
|
||||||
|
|
||||||
|
- **id** (string):
|
||||||
|
A unique identifier for the response message.
|
||||||
|
_Note: The format and length of IDs may change over time._
|
||||||
|
|
||||||
|
- **model** (string):
|
||||||
|
Specifies the model that generated the response.
|
||||||
|
|
||||||
|
- **role** (string):
|
||||||
|
Indicates the role of the generated message. For responses, this is always `"assistant"`.
|
||||||
|
|
||||||
|
- **stop_reason** (string):
|
||||||
|
Explains why the model stopped generating text. Possible values include:
|
||||||
|
- `"end_turn"`: The model reached a natural stopping point.
|
||||||
|
- `"max_tokens"`: The generation stopped because the maximum token limit was reached.
|
||||||
|
- `"stop_sequence"`: A custom stop sequence was encountered.
|
||||||
|
- `"tool_use"`: The model invoked one or more tools.
|
||||||
|
|
||||||
|
- **stop_sequence** (string or `null`):
|
||||||
|
Contains the specific stop sequence that caused the generation to halt, if applicable; otherwise, it is `null`.
|
||||||
|
|
||||||
|
- **type** (string):
|
||||||
|
Denotes the type of response object, which is always `"message"`.
|
||||||
|
|
||||||
|
- **usage** (object):
|
||||||
|
Provides details on token usage for billing and rate limiting. This includes:
|
||||||
|
- **input_tokens** (integer):
|
||||||
|
Total number of input tokens processed.
|
||||||
|
- **output_tokens** (integer):
|
||||||
|
Total number of output tokens generated.
|
||||||
|
- **cache_creation_input_tokens** (integer or `null`):
|
||||||
|
Number of tokens used to create a cache entry.
|
||||||
|
- **cache_read_input_tokens** (integer or `null`):
|
||||||
|
Number of tokens read from the cache.
|
||||||
|
|
|
@ -3,7 +3,7 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
|
# Caching - In-Memory, Redis, s3, Redis Semantic Cache, Disk
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm.caching.caching.py)
|
[**See Code**](https://github.com/BerriAI/litellm/blob/main/litellm/caching/caching.py)
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
|
|
|
@ -27,16 +27,18 @@ os.environ["AWS_REGION_NAME"] = ""
|
||||||
|
|
||||||
|
|
||||||
# pdf url
|
# pdf url
|
||||||
image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
file_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
||||||
|
|
||||||
# model
|
# model
|
||||||
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
|
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||||
|
|
||||||
image_content = [
|
file_content = [
|
||||||
{"type": "text", "text": "What's this file about?"},
|
{"type": "text", "text": "What's this file about?"},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": image_url, # OR {"url": image_url}
|
"file": {
|
||||||
|
"file_id": file_url,
|
||||||
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -46,7 +48,7 @@ if not supports_pdf_input(model, None):
|
||||||
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": image_content}],
|
messages=[{"role": "user", "content": file_content}],
|
||||||
)
|
)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
```
|
```
|
||||||
|
@ -80,11 +82,15 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "bedrock-model",
|
"model": "bedrock-model",
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "user", "content": {"type": "text", "text": "What's this file about?"}},
|
{"role": "user", "content": [
|
||||||
|
{"type": "text", "text": "What's this file about?"},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
|
"file": {
|
||||||
|
"file_id": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
]},
|
||||||
]
|
]
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -116,11 +122,13 @@ base64_url = f"data:application/pdf;base64,{encoded_file}"
|
||||||
# model
|
# model
|
||||||
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
|
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||||
|
|
||||||
image_content = [
|
file_content = [
|
||||||
{"type": "text", "text": "What's this file about?"},
|
{"type": "text", "text": "What's this file about?"},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": base64_url, # OR {"url": base64_url}
|
"file": {
|
||||||
|
"file_data": base64_url,
|
||||||
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -130,11 +138,53 @@ if not supports_pdf_input(model, None):
|
||||||
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{"role": "user", "content": image_content}],
|
messages=[{"role": "user", "content": file_content}],
|
||||||
)
|
)
|
||||||
assert response is not None
|
assert response is not None
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock-model
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0
|
||||||
|
aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID
|
||||||
|
aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY
|
||||||
|
aws_region_name: os.environ/AWS_REGION_NAME
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "bedrock-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": [
|
||||||
|
{"type": "text", "text": "What's this file about?"},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"file_data": "data:application/pdf;base64...",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]},
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Checking if a model supports pdf input
|
## Checking if a model supports pdf input
|
||||||
|
@ -200,92 +250,3 @@ Expected Response
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## OpenAI 'file' message type
|
|
||||||
|
|
||||||
This is currently only supported for OpenAI models.
|
|
||||||
|
|
||||||
This will be supported for all providers soon.
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import base64
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
with open("draconomicon.pdf", "rb") as f:
|
|
||||||
data = f.read()
|
|
||||||
|
|
||||||
base64_string = base64.b64encode(data).decode("utf-8")
|
|
||||||
|
|
||||||
completion = completion(
|
|
||||||
model="gpt-4o",
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "file",
|
|
||||||
"file": {
|
|
||||||
"filename": "draconomicon.pdf",
|
|
||||||
"file_data": f"data:application/pdf;base64,{base64_string}",
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "What is the first dragon in the book?",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
print(completion.choices[0].message.content)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Setup config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: openai-model
|
|
||||||
litellm_params:
|
|
||||||
model: gpt-4o
|
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start the proxy
|
|
||||||
|
|
||||||
```bash
|
|
||||||
litellm --config config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-H 'Authorization: Bearer sk-1234' \
|
|
||||||
-d '{
|
|
||||||
"model": "openai-model",
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": [
|
|
||||||
{
|
|
||||||
"type": "file",
|
|
||||||
"file": {
|
|
||||||
"filename": "draconomicon.pdf",
|
|
||||||
"file_data": f"data:application/pdf;base64,{base64_string}",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
|
@ -108,3 +108,75 @@ response = litellm.completion(
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
**additional_drop_params**: List or null - Is a list of openai params you want to drop when making a call to the model.
|
**additional_drop_params**: List or null - Is a list of openai params you want to drop when making a call to the model.
|
||||||
|
|
||||||
|
## Specify allowed openai params in a request
|
||||||
|
|
||||||
|
Tell litellm to allow specific openai params in a request. Use this if you get a `litellm.UnsupportedParamsError` and want to allow a param. LiteLLM will pass the param as is to the model.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="LiteLLM Python SDK">
|
||||||
|
|
||||||
|
In this example we pass `allowed_openai_params=["tools"]` to allow the `tools` param.
|
||||||
|
|
||||||
|
```python showLineNumbers title="Pass allowed_openai_params to LiteLLM Python SDK"
|
||||||
|
await litellm.acompletion(
|
||||||
|
model="azure/o_series/<my-deployment-name>",
|
||||||
|
api_key="xxxxx",
|
||||||
|
api_base=api_base,
|
||||||
|
messages=[{"role": "user", "content": "Hello! return a json object"}],
|
||||||
|
tools=[{"type": "function", "function": {"name": "get_current_time", "description": "Get the current time in a given location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name, e.g. San Francisco"}}, "required": ["location"]}}}]
|
||||||
|
allowed_openai_params=["tools"],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM Proxy">
|
||||||
|
|
||||||
|
When using litellm proxy you can pass `allowed_openai_params` in two ways:
|
||||||
|
|
||||||
|
1. Dynamically pass `allowed_openai_params` in a request
|
||||||
|
2. Set `allowed_openai_params` on the config.yaml file for a specific model
|
||||||
|
|
||||||
|
#### Dynamically pass allowed_openai_params in a request
|
||||||
|
In this example we pass `allowed_openai_params=["tools"]` to allow the `tools` param for a request sent to the model set on the proxy.
|
||||||
|
|
||||||
|
```python showLineNumbers title="Dynamically pass allowed_openai_params in a request"
|
||||||
|
import openai
|
||||||
|
from openai import AsyncAzureOpenAI
|
||||||
|
|
||||||
|
import openai
|
||||||
|
client = openai.OpenAI(
|
||||||
|
api_key="anything",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "this is a test request, write a short poem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
extra_body={
|
||||||
|
"allowed_openai_params": ["tools"]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Set allowed_openai_params on config.yaml
|
||||||
|
|
||||||
|
You can also set `allowed_openai_params` on the config.yaml file for a specific model. This means that all requests to this deployment are allowed to pass in the `tools` param.
|
||||||
|
|
||||||
|
```yaml showLineNumbers title="Set allowed_openai_params on config.yaml"
|
||||||
|
model_list:
|
||||||
|
- model_name: azure-o1-preview
|
||||||
|
litellm_params:
|
||||||
|
model: azure/o_series/<my-deployment-name>
|
||||||
|
api_key: xxxxx
|
||||||
|
api_base: https://openai-prod-test.openai.azure.com/openai/deployments/o1/chat/completions?api-version=2025-01-01-preview
|
||||||
|
allowed_openai_params: ["tools"]
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
|
@ -1,3 +1,5 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
# Enterprise
|
# Enterprise
|
||||||
For companies that need SSO, user management and professional support for LiteLLM Proxy
|
For companies that need SSO, user management and professional support for LiteLLM Proxy
|
||||||
|
|
||||||
|
@ -7,6 +9,8 @@ Get free 7-day trial key [here](https://www.litellm.ai/#trial)
|
||||||
|
|
||||||
Includes all enterprise features.
|
Includes all enterprise features.
|
||||||
|
|
||||||
|
<Image img={require('../img/enterprise_vs_oss.png')} />
|
||||||
|
|
||||||
[**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)
|
[**Procurement available via AWS / Azure Marketplace**](./data_security.md#legalcompliance-faqs)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -14,48 +14,105 @@ Files are used to upload documents that can be used with features like Assistant
|
||||||
- Delete File
|
- Delete File
|
||||||
- Get File Content
|
- Get File Content
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="proxy" label="LiteLLM PROXY Server">
|
<TabItem value="proxy" label="LiteLLM PROXY Server">
|
||||||
|
|
||||||
```bash
|
### 1. Setup config.yaml
|
||||||
$ export OPENAI_API_KEY="sk-..."
|
|
||||||
|
|
||||||
$ litellm
|
```
|
||||||
|
# for /files endpoints
|
||||||
# RUNNING on http://0.0.0.0:4000
|
files_settings:
|
||||||
|
- custom_llm_provider: azure
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||||
|
api_key: fake-key
|
||||||
|
api_version: "2023-03-15-preview"
|
||||||
|
- custom_llm_provider: openai
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
```
|
```
|
||||||
|
|
||||||
**Upload a File**
|
### 2. Start LiteLLM PROXY Server
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:4000/v1/files \
|
litellm --config /path/to/config.yaml
|
||||||
-H "Authorization: Bearer sk-1234" \
|
|
||||||
-F purpose="fine-tune" \
|
## RUNNING on http://0.0.0.0:4000
|
||||||
-F file="@mydata.jsonl"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
**List Files**
|
### 3. Use OpenAI's /files endpoints
|
||||||
```bash
|
|
||||||
curl http://localhost:4000/v1/files \
|
Upload a File
|
||||||
-H "Authorization: Bearer sk-1234"
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="sk-...",
|
||||||
|
base_url="http://0.0.0.0:4000/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
client.files.create(
|
||||||
|
file=wav_data,
|
||||||
|
purpose="user_data",
|
||||||
|
extra_body={"custom_llm_provider": "openai"}
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Retrieve File Information**
|
List Files
|
||||||
```bash
|
|
||||||
curl http://localhost:4000/v1/files/file-abc123 \
|
```python
|
||||||
-H "Authorization: Bearer sk-1234"
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="sk-...",
|
||||||
|
base_url="http://0.0.0.0:4000/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
files = client.files.list(extra_body={"custom_llm_provider": "openai"})
|
||||||
|
print("files=", files)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Delete File**
|
Retrieve File Information
|
||||||
```bash
|
|
||||||
curl http://localhost:4000/v1/files/file-abc123 \
|
```python
|
||||||
-X DELETE \
|
from openai import OpenAI
|
||||||
-H "Authorization: Bearer sk-1234"
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="sk-...",
|
||||||
|
base_url="http://0.0.0.0:4000/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
file = client.files.retrieve(file_id="file-abc123", extra_body={"custom_llm_provider": "openai"})
|
||||||
|
print("file=", file)
|
||||||
```
|
```
|
||||||
|
|
||||||
**Get File Content**
|
Delete File
|
||||||
```bash
|
|
||||||
curl http://localhost:4000/v1/files/file-abc123/content \
|
```python
|
||||||
-H "Authorization: Bearer sk-1234"
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="sk-...",
|
||||||
|
base_url="http://0.0.0.0:4000/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.files.delete(file_id="file-abc123", extra_body={"custom_llm_provider": "openai"})
|
||||||
|
print("delete response=", response)
|
||||||
|
```
|
||||||
|
|
||||||
|
Get File Content
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="sk-...",
|
||||||
|
base_url="http://0.0.0.0:4000/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
content = client.files.content(file_id="file-abc123", extra_body={"custom_llm_provider": "openai"})
|
||||||
|
print("content=", content)
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
@ -120,7 +177,7 @@ print("file content=", content)
|
||||||
|
|
||||||
### [OpenAI](#quick-start)
|
### [OpenAI](#quick-start)
|
||||||
|
|
||||||
## [Azure OpenAI](./providers/azure#azure-batches-api)
|
### [Azure OpenAI](./providers/azure#azure-batches-api)
|
||||||
|
|
||||||
### [Vertex AI](./providers/vertex#batch-apis)
|
### [Vertex AI](./providers/vertex#batch-apis)
|
||||||
|
|
||||||
|
|
|
@ -4,21 +4,177 @@ import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
# /mcp [BETA] - Model Context Protocol
|
# /mcp [BETA] - Model Context Protocol
|
||||||
|
|
||||||
Use Model Context Protocol with LiteLLM
|
## Expose MCP tools on LiteLLM Proxy Server
|
||||||
|
|
||||||
|
This allows you to define tools that can be called by any MCP compatible client. Define your `mcp_servers` with LiteLLM and all your clients can list and call available tools.
|
||||||
|
|
||||||
<Image
|
<Image
|
||||||
img={require('../img/litellm_mcp.png')}
|
img={require('../img/mcp_2.png')}
|
||||||
style={{width: '100%', display: 'block', margin: '2rem auto'}}
|
style={{width: '100%', display: 'block', margin: '2rem auto'}}
|
||||||
/>
|
/>
|
||||||
<p style={{textAlign: 'left', color: '#666'}}>
|
<p style={{textAlign: 'left', color: '#666'}}>
|
||||||
LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models
|
LiteLLM MCP Architecture: Use MCP tools with all LiteLLM supported models
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
#### How it works
|
||||||
|
|
||||||
## Overview
|
LiteLLM exposes the following MCP endpoints:
|
||||||
|
|
||||||
LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
|
- `/mcp/tools/list` - List all available tools
|
||||||
|
- `/mcp/tools/call` - Call a specific tool with the provided arguments
|
||||||
|
|
||||||
|
When MCP clients connect to LiteLLM they can follow this workflow:
|
||||||
|
|
||||||
|
1. Connect to the LiteLLM MCP server
|
||||||
|
2. List all available tools on LiteLLM
|
||||||
|
3. Client makes LLM API request with tool call(s)
|
||||||
|
4. LLM API returns which tools to call and with what arguments
|
||||||
|
5. MCP client makes MCP tool calls to LiteLLM
|
||||||
|
6. LiteLLM makes the tool calls to the appropriate MCP server
|
||||||
|
7. LiteLLM returns the tool call results to the MCP client
|
||||||
|
|
||||||
|
#### Usage
|
||||||
|
|
||||||
|
#### 1. Define your tools on under `mcp_servers` in your config.yaml file.
|
||||||
|
|
||||||
|
LiteLLM allows you to define your tools on the `mcp_servers` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
|
||||||
|
|
||||||
|
```yaml title="config.yaml" showLineNumbers
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4o
|
||||||
|
api_key: sk-xxxxxxx
|
||||||
|
|
||||||
|
mcp_servers:
|
||||||
|
{
|
||||||
|
"zapier_mcp": {
|
||||||
|
"url": "https://actions.zapier.com/mcp/sk-akxxxxx/sse"
|
||||||
|
},
|
||||||
|
"fetch": {
|
||||||
|
"url": "http://localhost:8000/sse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### 2. Start LiteLLM Gateway
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="docker" label="Docker Run">
|
||||||
|
|
||||||
|
```shell title="Docker Run" showLineNumbers
|
||||||
|
docker run -d \
|
||||||
|
-p 4000:4000 \
|
||||||
|
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
||||||
|
--name my-app \
|
||||||
|
-v $(pwd)/my_config.yaml:/app/config.yaml \
|
||||||
|
my-app:latest \
|
||||||
|
--config /app/config.yaml \
|
||||||
|
--port 4000 \
|
||||||
|
--detailed_debug \
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="py" label="litellm pip">
|
||||||
|
|
||||||
|
```shell title="litellm pip" showLineNumbers
|
||||||
|
litellm --config config.yaml --detailed_debug
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
#### 3. Make an LLM API request
|
||||||
|
|
||||||
|
In this example we will do the following:
|
||||||
|
|
||||||
|
1. Use MCP client to list MCP tools on LiteLLM Proxy
|
||||||
|
2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
|
||||||
|
3. Provide the MCP tools to `gpt-4o`
|
||||||
|
4. Handle tool call from `gpt-4o`
|
||||||
|
5. Convert OpenAI tool call to MCP tool call
|
||||||
|
6. Execute tool call on MCP server
|
||||||
|
|
||||||
|
```python title="MCP Client List Tools" showLineNumbers
|
||||||
|
import asyncio
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
from openai.types.chat import ChatCompletionUserMessageParam
|
||||||
|
from mcp import ClientSession
|
||||||
|
from mcp.client.sse import sse_client
|
||||||
|
from litellm.experimental_mcp_client.tools import (
|
||||||
|
transform_mcp_tool_to_openai_tool,
|
||||||
|
transform_openai_tool_call_request_to_mcp_tool_call_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# Initialize clients
|
||||||
|
|
||||||
|
# point OpenAI client to LiteLLM Proxy
|
||||||
|
client = AsyncOpenAI(api_key="sk-1234", base_url="http://localhost:4000")
|
||||||
|
|
||||||
|
# Point MCP client to LiteLLM Proxy
|
||||||
|
async with sse_client("http://localhost:4000/mcp/") as (read, write):
|
||||||
|
async with ClientSession(read, write) as session:
|
||||||
|
await session.initialize()
|
||||||
|
|
||||||
|
# 1. List MCP tools on LiteLLM Proxy
|
||||||
|
mcp_tools = await session.list_tools()
|
||||||
|
print("List of MCP tools for MCP server:", mcp_tools.tools)
|
||||||
|
|
||||||
|
# Create message
|
||||||
|
messages = [
|
||||||
|
ChatCompletionUserMessageParam(
|
||||||
|
content="Send an email about LiteLLM supporting MCP", role="user"
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# 2. Use `transform_mcp_tool_to_openai_tool` to convert MCP tools to OpenAI tools
|
||||||
|
# Since OpenAI only supports tools in the OpenAI format, we need to convert the MCP tools to the OpenAI format.
|
||||||
|
openai_tools = [
|
||||||
|
transform_mcp_tool_to_openai_tool(tool) for tool in mcp_tools.tools
|
||||||
|
]
|
||||||
|
|
||||||
|
# 3. Provide the MCP tools to `gpt-4o`
|
||||||
|
response = await client.chat.completions.create(
|
||||||
|
model="gpt-4o",
|
||||||
|
messages=messages,
|
||||||
|
tools=openai_tools,
|
||||||
|
tool_choice="auto",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 4. Handle tool call from `gpt-4o`
|
||||||
|
if response.choices[0].message.tool_calls:
|
||||||
|
tool_call = response.choices[0].message.tool_calls[0]
|
||||||
|
if tool_call:
|
||||||
|
|
||||||
|
# 5. Convert OpenAI tool call to MCP tool call
|
||||||
|
# Since MCP servers expect tools in the MCP format, we need to convert the OpenAI tool call to the MCP format.
|
||||||
|
# This is done using litellm.experimental_mcp_client.tools.transform_openai_tool_call_request_to_mcp_tool_call_request
|
||||||
|
mcp_call = (
|
||||||
|
transform_openai_tool_call_request_to_mcp_tool_call_request(
|
||||||
|
openai_tool=tool_call.model_dump()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# 6. Execute tool call on MCP server
|
||||||
|
result = await session.call_tool(
|
||||||
|
name=mcp_call.name, arguments=mcp_call.arguments
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Result:", result)
|
||||||
|
|
||||||
|
|
||||||
|
# Run it
|
||||||
|
asyncio.run(main())
|
||||||
|
```
|
||||||
|
|
||||||
|
## LiteLLM Python SDK MCP Bridge
|
||||||
|
|
||||||
|
LiteLLM Python SDK acts as a MCP bridge to utilize MCP tools with all LiteLLM supported models. LiteLLM offers the following features for using MCP
|
||||||
|
|
||||||
- **List** Available MCP Tools: OpenAI clients can view all available MCP tools
|
- **List** Available MCP Tools: OpenAI clients can view all available MCP tools
|
||||||
- `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools
|
- `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools
|
||||||
|
@ -26,8 +182,6 @@ LiteLLM acts as a MCP bridge to utilize MCP tools with all LiteLLM supported mod
|
||||||
- `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
|
- `litellm.experimental_mcp_client.call_openai_tool` to call an OpenAI tool on an MCP server
|
||||||
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
### 1. List Available MCP Tools
|
### 1. List Available MCP Tools
|
||||||
|
|
||||||
In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways:
|
In this example we'll use `litellm.experimental_mcp_client.load_mcp_tools` to list all available MCP tools on any MCP server. This method can be used in two ways:
|
||||||
|
@ -271,215 +425,3 @@ async with stdio_client(server_params) as (read, write):
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Upcoming Features
|
|
||||||
|
|
||||||
:::info
|
|
||||||
|
|
||||||
**This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above.
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
|
|
||||||
### Expose MCP tools on LiteLLM Proxy Server
|
|
||||||
|
|
||||||
This allows you to define tools that can be called by any MCP compatible client. Define your mcp_tools with LiteLLM and all your clients can list and call available tools.
|
|
||||||
|
|
||||||
#### How it works
|
|
||||||
|
|
||||||
LiteLLM exposes the following MCP endpoints:
|
|
||||||
|
|
||||||
- `/mcp/list_tools` - List all available tools
|
|
||||||
- `/mcp/call_tool` - Call a specific tool with the provided arguments
|
|
||||||
|
|
||||||
When MCP clients connect to LiteLLM they can follow this workflow:
|
|
||||||
|
|
||||||
1. Connect to the LiteLLM MCP server
|
|
||||||
2. List all available tools on LiteLLM
|
|
||||||
3. Client makes LLM API request with tool call(s)
|
|
||||||
4. LLM API returns which tools to call and with what arguments
|
|
||||||
5. MCP client makes tool calls to LiteLLM
|
|
||||||
6. LiteLLM makes the tool calls to the appropriate handlers
|
|
||||||
7. LiteLLM returns the tool call results to the MCP client
|
|
||||||
|
|
||||||
#### Usage
|
|
||||||
|
|
||||||
#### 1. Define your tools on mcp_tools
|
|
||||||
|
|
||||||
LiteLLM allows you to define your tools on the `mcp_tools` section in your config.yaml file. All tools listed here will be available to MCP clients (when they connect to LiteLLM and call `list_tools`).
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: gpt-4o
|
|
||||||
litellm_params:
|
|
||||||
model: openai/gpt-4o
|
|
||||||
api_key: sk-xxxxxxx
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
mcp_tools:
|
|
||||||
- name: "get_current_time"
|
|
||||||
description: "Get the current time"
|
|
||||||
input_schema: {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The format of the time to return",
|
|
||||||
"enum": ["short"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
handler: "mcp_tools.get_current_time"
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 2. Define a handler for your tool
|
|
||||||
|
|
||||||
Create a new file called `mcp_tools.py` and add this code. The key method here is `get_current_time` which gets executed when the `get_current_time` tool is called.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# mcp_tools.py
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
def get_current_time(format: str = "short"):
|
|
||||||
"""
|
|
||||||
Simple handler for the 'get_current_time' tool.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
format (str): The format of the time to return ('short').
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The current time formatted as 'HH:MM'.
|
|
||||||
"""
|
|
||||||
# Get the current time
|
|
||||||
current_time = datetime.now()
|
|
||||||
|
|
||||||
# Format the time as 'HH:MM'
|
|
||||||
return current_time.strftime('%H:%M')
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 3. Start LiteLLM Gateway
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="docker" label="Docker Run">
|
|
||||||
|
|
||||||
Mount your `mcp_tools.py` on the LiteLLM Docker container.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
docker run -d \
|
|
||||||
-p 4000:4000 \
|
|
||||||
-e OPENAI_API_KEY=$OPENAI_API_KEY \
|
|
||||||
--name my-app \
|
|
||||||
-v $(pwd)/my_config.yaml:/app/config.yaml \
|
|
||||||
-v $(pwd)/mcp_tools.py:/app/mcp_tools.py \
|
|
||||||
my-app:latest \
|
|
||||||
--config /app/config.yaml \
|
|
||||||
--port 4000 \
|
|
||||||
--detailed_debug \
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="py" label="litellm pip">
|
|
||||||
|
|
||||||
```shell
|
|
||||||
litellm --config config.yaml --detailed_debug
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
#### 4. Make an LLM API request
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
```python
|
|
||||||
import asyncio
|
|
||||||
from langchain_mcp_adapters.tools import load_mcp_tools
|
|
||||||
from langchain_openai import ChatOpenAI
|
|
||||||
from langgraph.prebuilt import create_react_agent
|
|
||||||
from mcp import ClientSession
|
|
||||||
from mcp.client.sse import sse_client
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
# Initialize the model with your API key
|
|
||||||
model = ChatOpenAI(model="gpt-4o")
|
|
||||||
|
|
||||||
# Connect to the MCP server
|
|
||||||
async with sse_client(url="http://localhost:4000/mcp/") as (read, write):
|
|
||||||
async with ClientSession(read, write) as session:
|
|
||||||
# Initialize the session
|
|
||||||
print("Initializing session...")
|
|
||||||
await session.initialize()
|
|
||||||
print("Session initialized")
|
|
||||||
|
|
||||||
# Load available tools from MCP
|
|
||||||
print("Loading tools...")
|
|
||||||
tools = await load_mcp_tools(session)
|
|
||||||
print(f"Loaded {len(tools)} tools")
|
|
||||||
|
|
||||||
# Create a ReAct agent with the model and tools
|
|
||||||
agent = create_react_agent(model, tools)
|
|
||||||
|
|
||||||
# Run the agent with a user query
|
|
||||||
user_query = "What's the weather in Tokyo?"
|
|
||||||
print(f"Asking: {user_query}")
|
|
||||||
agent_response = await agent.ainvoke({"messages": user_query})
|
|
||||||
print("Agent response:")
|
|
||||||
print(agent_response)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Specification for `mcp_tools`
|
|
||||||
|
|
||||||
The `mcp_tools` section in your LiteLLM config defines tools that can be called by MCP-compatible clients.
|
|
||||||
|
|
||||||
#### Tool Definition Format
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
mcp_tools:
|
|
||||||
- name: string # Required: Name of the tool
|
|
||||||
description: string # Required: Description of what the tool does
|
|
||||||
input_schema: object # Required: JSON Schema defining the tool's input parameters
|
|
||||||
handler: string # Required: Path to the function that implements the tool
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Field Details
|
|
||||||
|
|
||||||
- `name`: A unique identifier for the tool
|
|
||||||
- `description`: A clear description of what the tool does, used by LLMs to determine when to call it
|
|
||||||
- `input_schema`: JSON Schema object defining the expected input parameters
|
|
||||||
- `handler`: String path to the Python function that implements the tool (e.g., "module.submodule.function_name")
|
|
||||||
|
|
||||||
#### Example Tool Definition
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
mcp_tools:
|
|
||||||
- name: "get_current_time"
|
|
||||||
description: "Get the current time in a specified format"
|
|
||||||
input_schema: {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"format": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The format of the time to return",
|
|
||||||
"enum": ["short", "long", "iso"]
|
|
||||||
},
|
|
||||||
"timezone": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The timezone to use (e.g., 'UTC', 'America/New_York')",
|
|
||||||
"default": "UTC"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["format"]
|
|
||||||
}
|
|
||||||
handler: "mcp_tools.get_current_time"
|
|
||||||
```
|
|
||||||
|
|
|
@ -821,6 +821,14 @@ print(f"\nResponse: {resp}")
|
||||||
|
|
||||||
## Usage - Thinking / `reasoning_content`
|
## Usage - Thinking / `reasoning_content`
|
||||||
|
|
||||||
|
LiteLLM translates OpenAI's `reasoning_effort` to Anthropic's `thinking` parameter. [Code](https://github.com/BerriAI/litellm/blob/23051d89dd3611a81617d84277059cd88b2df511/litellm/llms/anthropic/chat/transformation.py#L298)
|
||||||
|
|
||||||
|
| reasoning_effort | thinking |
|
||||||
|
| ---------------- | -------- |
|
||||||
|
| "low" | "budget_tokens": 1024 |
|
||||||
|
| "medium" | "budget_tokens": 2048 |
|
||||||
|
| "high" | "budget_tokens": 4096 |
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="SDK">
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
@ -830,7 +838,7 @@ from litellm import completion
|
||||||
resp = completion(
|
resp = completion(
|
||||||
model="anthropic/claude-3-7-sonnet-20250219",
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
reasoning_effort="low",
|
||||||
)
|
)
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -863,7 +871,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "claude-3-7-sonnet-20250219",
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
"reasoning_effort": "low"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -927,6 +935,44 @@ ModelResponse(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Pass `thinking` to Anthropic models
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Anthropic models.
|
||||||
|
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Anthropic models.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## **Passing Extra Headers to Anthropic API**
|
## **Passing Extra Headers to Anthropic API**
|
||||||
|
|
||||||
Pass `extra_headers: dict` to `litellm.completion`
|
Pass `extra_headers: dict` to `litellm.completion`
|
||||||
|
@ -1035,8 +1081,10 @@ response = completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
"file": {
|
||||||
|
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -1081,8 +1129,10 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
|
"file": {
|
||||||
|
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
@ -1076,32 +1076,24 @@ print(response)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### Parallel Function calling
|
### Tool Calling / Function Calling
|
||||||
|
|
||||||
See a detailed walthrough of parallel function calling with litellm [here](https://docs.litellm.ai/docs/completion/function_call)
|
See a detailed walthrough of parallel function calling with litellm [here](https://docs.litellm.ai/docs/completion/function_call)
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# set Azure env variables
|
# set Azure env variables
|
||||||
import os
|
import os
|
||||||
|
import litellm
|
||||||
|
import json
|
||||||
|
|
||||||
os.environ['AZURE_API_KEY'] = "" # litellm reads AZURE_API_KEY from .env and sends the request
|
os.environ['AZURE_API_KEY'] = "" # litellm reads AZURE_API_KEY from .env and sends the request
|
||||||
os.environ['AZURE_API_BASE'] = "https://openai-gpt-4-test-v-1.openai.azure.com/"
|
os.environ['AZURE_API_BASE'] = "https://openai-gpt-4-test-v-1.openai.azure.com/"
|
||||||
os.environ['AZURE_API_VERSION'] = "2023-07-01-preview"
|
os.environ['AZURE_API_VERSION'] = "2023-07-01-preview"
|
||||||
|
|
||||||
import litellm
|
|
||||||
import json
|
|
||||||
# Example dummy function hard coded to return the same weather
|
|
||||||
# In production, this could be your backend API or an external API
|
|
||||||
def get_current_weather(location, unit="fahrenheit"):
|
|
||||||
"""Get the current weather in a given location"""
|
|
||||||
if "tokyo" in location.lower():
|
|
||||||
return json.dumps({"location": "Tokyo", "temperature": "10", "unit": "celsius"})
|
|
||||||
elif "san francisco" in location.lower():
|
|
||||||
return json.dumps({"location": "San Francisco", "temperature": "72", "unit": "fahrenheit"})
|
|
||||||
elif "paris" in location.lower():
|
|
||||||
return json.dumps({"location": "Paris", "temperature": "22", "unit": "celsius"})
|
|
||||||
else:
|
|
||||||
return json.dumps({"location": location, "temperature": "unknown"})
|
|
||||||
|
|
||||||
## Step 1: send the conversation and available functions to the model
|
|
||||||
messages = [{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}]
|
|
||||||
tools = [
|
tools = [
|
||||||
{
|
{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
|
@ -1125,7 +1117,7 @@ tools = [
|
||||||
|
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="azure/chatgpt-functioncalling", # model = azure/<your-azure-deployment-name>
|
model="azure/chatgpt-functioncalling", # model = azure/<your-azure-deployment-name>
|
||||||
messages=messages,
|
messages=[{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris?"}],
|
||||||
tools=tools,
|
tools=tools,
|
||||||
tool_choice="auto", # auto is default, but we'll be explicit
|
tool_choice="auto", # auto is default, but we'll be explicit
|
||||||
)
|
)
|
||||||
|
@ -1134,8 +1126,49 @@ response_message = response.choices[0].message
|
||||||
tool_calls = response.choices[0].message.tool_calls
|
tool_calls = response.choices[0].message.tool_calls
|
||||||
print("\nTool Choice:\n", tool_calls)
|
print("\nTool Choice:\n", tool_calls)
|
||||||
```
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: azure-gpt-3.5
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-functioncalling
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://localhost:4000/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "azure-gpt-3.5",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey, how'\''s it going? Thinking long and hard before replying - what is the meaning of the world and life itself"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
### Spend Tracking for Azure OpenAI Models (PROXY)
|
### Spend Tracking for Azure OpenAI Models (PROXY)
|
||||||
|
|
||||||
Set base model for cost tracking azure image-gen call
|
Set base model for cost tracking azure image-gen call
|
||||||
|
|
|
@ -476,7 +476,7 @@ os.environ["AWS_REGION_NAME"] = ""
|
||||||
resp = completion(
|
resp = completion(
|
||||||
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
model="bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
reasoning_effort="low",
|
||||||
)
|
)
|
||||||
|
|
||||||
print(resp)
|
print(resp)
|
||||||
|
@ -491,7 +491,7 @@ model_list:
|
||||||
- model_name: bedrock-claude-3-7
|
- model_name: bedrock-claude-3-7
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||||
thinking: {"type": "enabled", "budget_tokens": 1024} # 👈 EITHER HERE OR ON REQUEST
|
reasoning_effort: "low" # 👈 EITHER HERE OR ON REQUEST
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Start proxy
|
2. Start proxy
|
||||||
|
@ -509,7 +509,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
-d '{
|
-d '{
|
||||||
"model": "bedrock-claude-3-7",
|
"model": "bedrock-claude-3-7",
|
||||||
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
"thinking": {"type": "enabled", "budget_tokens": 1024} # 👈 EITHER HERE OR ON CONFIG.YAML
|
"reasoning_effort": "low" # 👈 EITHER HERE OR ON CONFIG.YAML
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -558,6 +558,10 @@ Same as [Anthropic API response](../providers/anthropic#usage---thinking--reason
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Pass `thinking` to Anthropic models
|
||||||
|
|
||||||
|
Same as [Anthropic API response](../providers/anthropic#usage---thinking--reasoning_content).
|
||||||
|
|
||||||
|
|
||||||
## Usage - Structured Output / JSON mode
|
## Usage - Structured Output / JSON mode
|
||||||
|
|
||||||
|
@ -664,6 +668,58 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Usage - Latency Optimized Inference
|
||||||
|
|
||||||
|
Valid from v1.65.1+
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
performanceConfig={"latency": "optimized"},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: bedrock-claude-3-7
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||||
|
performanceConfig: {"latency": "optimized"} # 👈 EITHER HERE OR ON REQUEST
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "bedrock-claude-3-7",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"performanceConfig": {"latency": "optimized"} # 👈 EITHER HERE OR ON CONFIG.YAML
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
## Usage - Bedrock Guardrails
|
## Usage - Bedrock Guardrails
|
||||||
|
|
||||||
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
|
Example of using [Bedrock Guardrails with LiteLLM](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-use-converse-api.html)
|
||||||
|
@ -1116,14 +1172,22 @@ os.environ["AWS_REGION_NAME"] = ""
|
||||||
# pdf url
|
# pdf url
|
||||||
image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
image_url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
|
||||||
|
|
||||||
|
# Download the file
|
||||||
|
response = requests.get(url)
|
||||||
|
file_data = response.content
|
||||||
|
|
||||||
|
encoded_file = base64.b64encode(file_data).decode("utf-8")
|
||||||
|
|
||||||
# model
|
# model
|
||||||
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
|
model = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
|
||||||
|
|
||||||
image_content = [
|
image_content = [
|
||||||
{"type": "text", "text": "What's this file about?"},
|
{"type": "text", "text": "What's this file about?"},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": image_url, # OR {"url": image_url}
|
"file": {
|
||||||
|
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -1169,8 +1233,10 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "user", "content": {"type": "text", "text": "What's this file about?"}},
|
{"role": "user", "content": {"type": "text", "text": "What's this file about?"}},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
|
"file": {
|
||||||
|
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}'
|
}'
|
||||||
|
@ -1776,6 +1842,7 @@ response = completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
1. Setup config.yaml
|
1. Setup config.yaml
|
||||||
|
@ -1820,11 +1887,13 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
### SSO Login (AWS Profile)
|
### SSO Login (AWS Profile)
|
||||||
- Set `AWS_PROFILE` environment variable
|
- Set `AWS_PROFILE` environment variable
|
||||||
- Make bedrock completion call
|
- Make bedrock completion call
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import os
|
import os
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
@ -1917,12 +1986,46 @@ model_list:
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
Text to Image :
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
-d '{
|
||||||
|
"model": "amazon.nova-canvas-v1:0",
|
||||||
|
"prompt": "A cute baby sea otter"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Color Guided Generation:
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
-d '{
|
||||||
|
"model": "amazon.nova-canvas-v1:0",
|
||||||
|
"prompt": "A cute baby sea otter",
|
||||||
|
"taskType": "COLOR_GUIDED_GENERATION",
|
||||||
|
"colorGuidedGenerationParams":{"colors":["#FFFFFF"]}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|-------------------------|---------------------------------------------|
|
||||||
|
| Stable Diffusion 3 - v0 | `image_generation(model="bedrock/stability.stability.sd3-large-v1:0", prompt=prompt)` |
|
||||||
|
| Stable Diffusion - v0 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v0", prompt=prompt)` |
|
||||||
|
| Stable Diffusion - v1 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v1", prompt=prompt)` |
|
||||||
|
| Amazon Nova Canvas - v0 | `image_generation(model="bedrock/amazon.nova-canvas-v1:0", prompt=prompt)` |
|
||||||
|
|
||||||
|
|
||||||
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
||||||
|
|
||||||
|
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
||||||
|
|
||||||
:::warning
|
:::warning
|
||||||
|
|
||||||
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Experimental - 2024-Jun-23:
|
Experimental - 2024-Jun-23:
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# 🆕 Databricks
|
# Databricks
|
||||||
|
|
||||||
LiteLLM supports all models on Databricks
|
LiteLLM supports all models on Databricks
|
||||||
|
|
||||||
|
@ -154,7 +154,205 @@ response = completion(
|
||||||
temperature: 0.5
|
temperature: 0.5
|
||||||
```
|
```
|
||||||
|
|
||||||
## Passings Databricks specific params - 'instruction'
|
|
||||||
|
## Usage - Thinking / `reasoning_content`
|
||||||
|
|
||||||
|
LiteLLM translates OpenAI's `reasoning_effort` to Anthropic's `thinking` parameter. [Code](https://github.com/BerriAI/litellm/blob/23051d89dd3611a81617d84277059cd88b2df511/litellm/llms/anthropic/chat/transformation.py#L298)
|
||||||
|
|
||||||
|
| reasoning_effort | thinking |
|
||||||
|
| ---------------- | -------- |
|
||||||
|
| "low" | "budget_tokens": 1024 |
|
||||||
|
| "medium" | "budget_tokens": 2048 |
|
||||||
|
| "high" | "budget_tokens": 4096 |
|
||||||
|
|
||||||
|
|
||||||
|
Known Limitations:
|
||||||
|
- Support for passing thinking blocks back to Claude [Issue](https://github.com/BerriAI/litellm/issues/9790)
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set ENV variables (can also be passed in to .completion() - e.g. `api_base`, `api_key`)
|
||||||
|
os.environ["DATABRICKS_API_KEY"] = "databricks key"
|
||||||
|
os.environ["DATABRICKS_API_BASE"] = "databricks base url"
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="databricks/databricks-claude-3-7-sonnet",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
reasoning_effort="low",
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: claude-3-7-sonnet
|
||||||
|
litellm_params:
|
||||||
|
model: databricks/databricks-claude-3-7-sonnet
|
||||||
|
api_key: os.environ/DATABRICKS_API_KEY
|
||||||
|
api_base: os.environ/DATABRICKS_API_BASE
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"reasoning_effort": "low"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```python
|
||||||
|
ModelResponse(
|
||||||
|
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
|
||||||
|
created=1740470510,
|
||||||
|
model='claude-3-7-sonnet-20250219',
|
||||||
|
object='chat.completion',
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="The capital of France is Paris.",
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
provider_specific_fields={
|
||||||
|
'citations': None,
|
||||||
|
'thinking_blocks': [
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6...'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
thinking_blocks=[
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6AGB...'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=68,
|
||||||
|
prompt_tokens=42,
|
||||||
|
total_tokens=110,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None,
|
||||||
|
cached_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
image_tokens=None
|
||||||
|
),
|
||||||
|
cache_creation_input_tokens=0,
|
||||||
|
cache_read_input_tokens=0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pass `thinking` to Anthropic models
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Anthropic models.
|
||||||
|
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Anthropic models.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set ENV variables (can also be passed in to .completion() - e.g. `api_base`, `api_key`)
|
||||||
|
os.environ["DATABRICKS_API_KEY"] = "databricks key"
|
||||||
|
os.environ["DATABRICKS_API_BASE"] = "databricks base url"
|
||||||
|
|
||||||
|
response = litellm.completion(
|
||||||
|
model="databricks/databricks-claude-3-7-sonnet",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "databricks/databricks-claude-3-7-sonnet",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Supported Databricks Chat Completion Models
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
**We support ALL Databricks models, just set `model=databricks/<any-model-on-databricks>` as a prefix when sending litellm requests**
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
| Model Name | Command |
|
||||||
|
|----------------------------|------------------------------------------------------------------|
|
||||||
|
| databricks/databricks-claude-3-7-sonnet | `completion(model='databricks/databricks/databricks-claude-3-7-sonnet', messages=messages)` |
|
||||||
|
| databricks-meta-llama-3-1-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-70b-instruct', messages=messages)` |
|
||||||
|
| databricks-meta-llama-3-1-405b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-405b-instruct', messages=messages)` |
|
||||||
|
| databricks-dbrx-instruct | `completion(model='databricks/databricks-dbrx-instruct', messages=messages)` |
|
||||||
|
| databricks-meta-llama-3-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-70b-instruct', messages=messages)` |
|
||||||
|
| databricks-llama-2-70b-chat | `completion(model='databricks/databricks-llama-2-70b-chat', messages=messages)` |
|
||||||
|
| databricks-mixtral-8x7b-instruct | `completion(model='databricks/databricks-mixtral-8x7b-instruct', messages=messages)` |
|
||||||
|
| databricks-mpt-30b-instruct | `completion(model='databricks/databricks-mpt-30b-instruct', messages=messages)` |
|
||||||
|
| databricks-mpt-7b-instruct | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)` |
|
||||||
|
|
||||||
|
|
||||||
|
## Embedding Models
|
||||||
|
|
||||||
|
### Passing Databricks specific params - 'instruction'
|
||||||
|
|
||||||
For embedding models, databricks lets you pass in an additional param 'instruction'. [Full Spec](https://github.com/BerriAI/litellm/blob/43353c28b341df0d9992b45c6ce464222ebd7984/litellm/llms/databricks.py#L164)
|
For embedding models, databricks lets you pass in an additional param 'instruction'. [Full Spec](https://github.com/BerriAI/litellm/blob/43353c28b341df0d9992b45c6ce464222ebd7984/litellm/llms/databricks.py#L164)
|
||||||
|
|
||||||
|
@ -187,27 +385,6 @@ response = litellm.embedding(
|
||||||
instruction: "Represent this sentence for searching relevant passages:"
|
instruction: "Represent this sentence for searching relevant passages:"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Supported Databricks Chat Completion Models
|
|
||||||
|
|
||||||
:::tip
|
|
||||||
|
|
||||||
**We support ALL Databricks models, just set `model=databricks/<any-model-on-databricks>` as a prefix when sending litellm requests**
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
|
|
||||||
| Model Name | Command |
|
|
||||||
|----------------------------|------------------------------------------------------------------|
|
|
||||||
| databricks-meta-llama-3-1-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-70b-instruct', messages=messages)` |
|
|
||||||
| databricks-meta-llama-3-1-405b-instruct | `completion(model='databricks/databricks-meta-llama-3-1-405b-instruct', messages=messages)` |
|
|
||||||
| databricks-dbrx-instruct | `completion(model='databricks/databricks-dbrx-instruct', messages=messages)` |
|
|
||||||
| databricks-meta-llama-3-70b-instruct | `completion(model='databricks/databricks-meta-llama-3-70b-instruct', messages=messages)` |
|
|
||||||
| databricks-llama-2-70b-chat | `completion(model='databricks/databricks-llama-2-70b-chat', messages=messages)` |
|
|
||||||
| databricks-mixtral-8x7b-instruct | `completion(model='databricks/databricks-mixtral-8x7b-instruct', messages=messages)` |
|
|
||||||
| databricks-mpt-30b-instruct | `completion(model='databricks/databricks-mpt-30b-instruct', messages=messages)` |
|
|
||||||
| databricks-mpt-7b-instruct | `completion(model='databricks/databricks-mpt-7b-instruct', messages=messages)` |
|
|
||||||
|
|
||||||
## Supported Databricks Embedding Models
|
## Supported Databricks Embedding Models
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|
|
@ -365,7 +365,7 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Specifying Safety Settings
|
## Specifying Safety Settings
|
||||||
In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
|
In certain use-cases you may need to make calls to the models and pass [safety settings](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
response = completion(
|
response = completion(
|
||||||
|
@ -438,6 +438,179 @@ assert isinstance(
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Google Search Tool
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["GEMINI_API_KEY"] = ".."
|
||||||
|
|
||||||
|
tools = [{"googleSearch": {}}] # 👈 ADD GOOGLE SEARCH
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="gemini/gemini-2.0-flash",
|
||||||
|
messages=[{"role": "user", "content": "What is the weather in San Francisco?"}],
|
||||||
|
tools=tools,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gemini-2.0-flash
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
```bash
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make Request!
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini-2.0-flash",
|
||||||
|
"messages": [{"role": "user", "content": "What is the weather in San Francisco?"}],
|
||||||
|
"tools": [{"googleSearch": {}}]
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
### Google Search Retrieval
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["GEMINI_API_KEY"] = ".."
|
||||||
|
|
||||||
|
tools = [{"googleSearchRetrieval": {}}] # 👈 ADD GOOGLE SEARCH
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="gemini/gemini-2.0-flash",
|
||||||
|
messages=[{"role": "user", "content": "What is the weather in San Francisco?"}],
|
||||||
|
tools=tools,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gemini-2.0-flash
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
```bash
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make Request!
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini-2.0-flash",
|
||||||
|
"messages": [{"role": "user", "content": "What is the weather in San Francisco?"}],
|
||||||
|
"tools": [{"googleSearchRetrieval": {}}]
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
### Code Execution Tool
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["GEMINI_API_KEY"] = ".."
|
||||||
|
|
||||||
|
tools = [{"codeExecution": {}}] # 👈 ADD GOOGLE SEARCH
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="gemini/gemini-2.0-flash",
|
||||||
|
messages=[{"role": "user", "content": "What is the weather in San Francisco?"}],
|
||||||
|
tools=tools,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gemini-2.0-flash
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
```bash
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make Request!
|
||||||
|
```bash
|
||||||
|
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini-2.0-flash",
|
||||||
|
"messages": [{"role": "user", "content": "What is the weather in San Francisco?"}],
|
||||||
|
"tools": [{"codeExecution": {}}]
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## JSON Mode
|
## JSON Mode
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
|
@ -589,8 +762,10 @@ response = litellm.completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "Please summarize the audio."},
|
{"type": "text", "text": "Please summarize the audio."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
|
"file": {
|
||||||
|
"file_data": "data:audio/mp3;base64,{}".format(encoded_data), # 👈 SET MIME_TYPE + DATA
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -640,8 +815,11 @@ response = litellm.completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "Please summarize the file."},
|
{"type": "text", "text": "Please summarize the file."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "https://storage..." # 👈 SET THE IMG URL
|
"file": {
|
||||||
|
"file_id": "https://storage...", # 👈 SET THE IMG URL
|
||||||
|
"format": "application/pdf" # OPTIONAL
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -668,8 +846,11 @@ response = litellm.completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "Please summarize the file."},
|
{"type": "text", "text": "Please summarize the file."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "gs://..." # 👈 SET THE cloud storage bucket url
|
"file": {
|
||||||
|
"file_id": "gs://storage...", # 👈 SET THE IMG URL
|
||||||
|
"format": "application/pdf" # OPTIONAL
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -879,3 +1060,54 @@ response = await client.chat.completions.create(
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Image Generation
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="gemini/gemini-2.0-flash-exp-image-generation",
|
||||||
|
messages=[{"role": "user", "content": "Generate an image of a cat"}],
|
||||||
|
modalities=["image", "text"],
|
||||||
|
)
|
||||||
|
assert response.choices[0].message.content is not None # ".."
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gemini-2.0-flash-exp-image-generation
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash-exp-image-generation
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://localhost:4000/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "gemini-2.0-flash-exp-image-generation",
|
||||||
|
"messages": [{"role": "user", "content": "Generate an image of a cat"}],
|
||||||
|
"modalities": ["image", "text"]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
161
docs/my-website/docs/providers/google_ai_studio/files.md
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# [BETA] Google AI Studio (Gemini) Files API
|
||||||
|
|
||||||
|
Use this to upload files to Google AI Studio (Gemini).
|
||||||
|
|
||||||
|
Useful to pass in large media files to Gemini's `/generateContent` endpoint.
|
||||||
|
|
||||||
|
| Action | Supported |
|
||||||
|
|----------|-----------|
|
||||||
|
| `create` | Yes |
|
||||||
|
| `delete` | No |
|
||||||
|
| `retrieve` | No |
|
||||||
|
| `list` | No |
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import base64
|
||||||
|
import requests
|
||||||
|
from litellm import completion, create_file
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
### UPLOAD FILE ###
|
||||||
|
|
||||||
|
# Fetch the audio file and convert it to a base64 encoded string
|
||||||
|
url = "https://cdn.openai.com/API/docs/audio/alloy.wav"
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
wav_data = response.content
|
||||||
|
encoded_string = base64.b64encode(wav_data).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
file = create_file(
|
||||||
|
file=wav_data,
|
||||||
|
purpose="user_data",
|
||||||
|
extra_body={"custom_llm_provider": "gemini"},
|
||||||
|
api_key=os.getenv("GEMINI_API_KEY"),
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"file: {file}")
|
||||||
|
|
||||||
|
assert file is not None
|
||||||
|
|
||||||
|
|
||||||
|
### GENERATE CONTENT ###
|
||||||
|
completion = completion(
|
||||||
|
model="gemini-2.0-flash",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What is in this recording?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"file_id": file.id,
|
||||||
|
"filename": "my-test-name",
|
||||||
|
"format": "audio/wav"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(completion.choices[0].message)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: "gemini-2.0-flash"
|
||||||
|
litellm_params:
|
||||||
|
model: gemini/gemini-2.0-flash
|
||||||
|
api_key: os.environ/GEMINI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it
|
||||||
|
|
||||||
|
```python
|
||||||
|
import base64
|
||||||
|
import requests
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://0.0.0.0:4000",
|
||||||
|
api_key="sk-1234"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fetch the audio file and convert it to a base64 encoded string
|
||||||
|
url = "https://cdn.openai.com/API/docs/audio/alloy.wav"
|
||||||
|
response = requests.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
wav_data = response.content
|
||||||
|
encoded_string = base64.b64encode(wav_data).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
file = client.files.create(
|
||||||
|
file=wav_data,
|
||||||
|
purpose="user_data",
|
||||||
|
extra_body={"target_model_names": "gemini-2.0-flash"}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"file: {file}")
|
||||||
|
|
||||||
|
assert file is not None
|
||||||
|
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model="gemini-2.0-flash",
|
||||||
|
modalities=["text", "audio"],
|
||||||
|
audio={"voice": "alloy", "format": "wav"},
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What is in this recording?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "file",
|
||||||
|
"file": {
|
||||||
|
"file_id": file.id,
|
||||||
|
"filename": "my-test-name",
|
||||||
|
"format": "audio/wav"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
],
|
||||||
|
extra_body={"drop_params": True}
|
||||||
|
)
|
||||||
|
|
||||||
|
print(completion.choices[0].message)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
|
@ -2,466 +2,392 @@ import Image from '@theme/IdealImage';
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Huggingface
|
# Hugging Face
|
||||||
|
LiteLLM supports running inference across multiple services for models hosted on the Hugging Face Hub.
|
||||||
|
|
||||||
LiteLLM supports the following types of Hugging Face models:
|
- **Serverless Inference Providers** - Hugging Face offers an easy and unified access to serverless AI inference through multiple inference providers, like [Together AI](https://together.ai) and [Sambanova](https://sambanova.ai). This is the fastest way to integrate AI in your products with a maintenance-free and scalable solution. More details in the [Inference Providers documentation](https://huggingface.co/docs/inference-providers/index).
|
||||||
|
- **Dedicated Inference Endpoints** - which is a product to easily deploy models to production. Inference is run by Hugging Face in a dedicated, fully managed infrastructure on a cloud provider of your choice. You can deploy your model on Hugging Face Inference Endpoints by following [these steps](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint).
|
||||||
|
|
||||||
- Serverless Inference API (free) - loaded and ready to use: https://huggingface.co/models?inference=warm&pipeline_tag=text-generation
|
|
||||||
- Dedicated Inference Endpoints (paid) - manual deployment: https://ui.endpoints.huggingface.co/
|
## Supported Models
|
||||||
- All LLMs served via Hugging Face's Inference use [Text-generation-inference](https://huggingface.co/docs/text-generation-inference).
|
|
||||||
|
### Serverless Inference Providers
|
||||||
|
You can check available models for an inference provider by going to [huggingface.co/models](https://huggingface.co/models), clicking the "Other" filter tab, and selecting your desired provider:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
For example, you can find all Fireworks supported models [here](https://huggingface.co/models?inference_provider=fireworks-ai&sort=trending).
|
||||||
|
|
||||||
|
|
||||||
|
### Dedicated Inference Endpoints
|
||||||
|
Refer to the [Inference Endpoints catalog](https://endpoints.huggingface.co/catalog) for a list of available models.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/LiteLLM_HuggingFace.ipynb">
|
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
||||||
</a>
|
|
||||||
|
|
||||||
You need to tell LiteLLM when you're calling Huggingface.
|
|
||||||
This is done by adding the "huggingface/" prefix to `model`, example `completion(model="huggingface/<model_name>",...)`.
|
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="serverless" label="Serverless Inference API">
|
<TabItem value="serverless" label="Serverless Inference Providers">
|
||||||
|
|
||||||
By default, LiteLLM will assume a Hugging Face call follows the [Messages API](https://huggingface.co/docs/text-generation-inference/messages_api), which is fully compatible with the OpenAI Chat Completion API.
|
### Authentication
|
||||||
|
With a single Hugging Face token, you can access inference through multiple providers. Your calls are routed through Hugging Face and the usage is billed directly to your Hugging Face account at the standard provider API rates.
|
||||||
|
|
||||||
<Tabs>
|
Simply set the `HF_TOKEN` environment variable with your Hugging Face token, you can create one here: https://huggingface.co/settings/tokens.
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
# [OPTIONAL] set env var
|
|
||||||
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
|
||||||
|
|
||||||
messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}]
|
|
||||||
|
|
||||||
# e.g. Call 'https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct' from Serverless Inference API
|
|
||||||
response = completion(
|
|
||||||
model="huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
stream=True
|
|
||||||
)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Add models to your config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: llama-3.1-8B-instruct
|
|
||||||
litellm_params:
|
|
||||||
model: huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
||||||
api_key: os.environ/HUGGINGFACE_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start the proxy
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ litellm --config /path/to/config.yaml --debug
|
export HF_TOKEN="hf_xxxxxx"
|
||||||
|
```
|
||||||
|
or alternatively, you can pass your Hugging Face token as a parameter:
|
||||||
|
```python
|
||||||
|
completion(..., api_key="hf_xxxxxx")
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Test it!
|
### Getting Started
|
||||||
|
|
||||||
```shell
|
To use a Hugging Face model, specify both the provider and model you want to use in the following format:
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data '{
|
|
||||||
"model": "llama-3.1-8B-instruct",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "I like you!"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}'
|
|
||||||
```
|
```
|
||||||
|
huggingface/<provider>/<hf_org_or_user>/<hf_model>
|
||||||
|
```
|
||||||
|
Where `<hf_org_or_user>/<hf_model>` is the Hugging Face model ID and `<provider>` is the inference provider.
|
||||||
|
By default, if you don't specify a provider, LiteLLM will use the [HF Inference API](https://huggingface.co/docs/api-inference/en/index).
|
||||||
|
|
||||||
</TabItem>
|
Examples:
|
||||||
</Tabs>
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="classification" label="Text Classification">
|
|
||||||
|
|
||||||
Append `text-classification` to the model name
|
|
||||||
|
|
||||||
e.g. `huggingface/text-classification/<model-name>`
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import os
|
# Run DeepSeek-R1 inference through Together AI
|
||||||
from litellm import completion
|
completion(model="huggingface/together/deepseek-ai/DeepSeek-R1",...)
|
||||||
|
|
||||||
# [OPTIONAL] set env var
|
# Run Qwen2.5-72B-Instruct inference through Sambanova
|
||||||
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
completion(model="huggingface/sambanova/Qwen/Qwen2.5-72B-Instruct",...)
|
||||||
|
|
||||||
messages = [{ "content": "I like you, I love you!","role": "user"}]
|
# Run Llama-3.3-70B-Instruct inference through HF Inference API
|
||||||
|
completion(model="huggingface/meta-llama/Llama-3.3-70B-Instruct",...)
|
||||||
# e.g. Call 'shahrukhx01/question-vs-statement-classifier' hosted on HF Inference endpoints
|
|
||||||
response = completion(
|
|
||||||
model="huggingface/text-classification/shahrukhx01/question-vs-statement-classifier",
|
|
||||||
messages=messages,
|
|
||||||
api_base="https://my-endpoint.endpoints.huggingface.cloud",
|
|
||||||
)
|
|
||||||
|
|
||||||
print(response)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Add models to your config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: bert-classifier
|
|
||||||
litellm_params:
|
|
||||||
model: huggingface/text-classification/shahrukhx01/question-vs-statement-classifier
|
|
||||||
api_key: os.environ/HUGGINGFACE_API_KEY
|
|
||||||
api_base: "https://my-endpoint.endpoints.huggingface.cloud"
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start the proxy
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ litellm --config /path/to/config.yaml --debug
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data '{
|
|
||||||
"model": "bert-classifier",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "I like you!"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="dedicated" label="Dedicated Inference Endpoints">
|
|
||||||
|
|
||||||
Steps to use
|
|
||||||
* Create your own Hugging Face dedicated endpoint here: https://ui.endpoints.huggingface.co/
|
|
||||||
* Set `api_base` to your deployed api base
|
|
||||||
* Add the `huggingface/` prefix to your model so litellm knows it's a huggingface Deployed Inference Endpoint
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
os.environ["HUGGINGFACE_API_KEY"] = ""
|
|
||||||
|
|
||||||
# TGI model: Call https://huggingface.co/glaiveai/glaive-coder-7b
|
|
||||||
# add the 'huggingface/' prefix to the model to set huggingface as the provider
|
|
||||||
# set api base to your deployed api endpoint from hugging face
|
|
||||||
response = completion(
|
|
||||||
model="huggingface/glaiveai/glaive-coder-7b",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
api_base="https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud"
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Add models to your config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: glaive-coder
|
|
||||||
litellm_params:
|
|
||||||
model: huggingface/glaiveai/glaive-coder-7b
|
|
||||||
api_key: os.environ/HUGGINGFACE_API_KEY
|
|
||||||
api_base: "https://wjiegasee9bmqke2.us-east-1.aws.endpoints.huggingface.cloud"
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start the proxy
|
|
||||||
|
|
||||||
```bash
|
|
||||||
$ litellm --config /path/to/config.yaml --debug
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data '{
|
|
||||||
"model": "glaive-coder",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "I like you!"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
## Streaming
|
|
||||||
|
|
||||||
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/LiteLLM_HuggingFace.ipynb">
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/LiteLLM_HuggingFace.ipynb">
|
||||||
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
</a>
|
</a>
|
||||||
|
|
||||||
You need to tell LiteLLM when you're calling Huggingface.
|
### Basic Completion
|
||||||
This is done by adding the "huggingface/" prefix to `model`, example `completion(model="huggingface/<model_name>",...)`.
|
Here's an example of chat completion using the DeepSeek-R1 model through Together AI:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import os
|
import os
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
# [OPTIONAL] set env var
|
os.environ["HF_TOKEN"] = "hf_xxxxxx"
|
||||||
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
|
||||||
|
|
||||||
messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}]
|
|
||||||
|
|
||||||
# e.g. Call 'facebook/blenderbot-400M-distill' hosted on HF Inference endpoints
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model="huggingface/facebook/blenderbot-400M-distill",
|
model="huggingface/together/deepseek-ai/DeepSeek-R1",
|
||||||
messages=messages,
|
messages=[
|
||||||
api_base="https://my-endpoint.huggingface.cloud",
|
{
|
||||||
stream=True
|
"role": "user",
|
||||||
|
"content": "How many r's are in the word 'strawberry'?",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Streaming
|
||||||
|
Now, let's see what a streaming request looks like.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["HF_TOKEN"] = "hf_xxxxxx"
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="huggingface/together/deepseek-ai/DeepSeek-R1",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "How many r's are in the word `strawberry`?",
|
||||||
|
|
||||||
|
}
|
||||||
|
],
|
||||||
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(response)
|
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
print(chunk)
|
print(chunk)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Image Input
|
||||||
|
You can also pass images when the model supports it. Here is an example using [Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct) model through Sambanova.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
# Set your Hugging Face Token
|
||||||
|
os.environ["HF_TOKEN"] = "hf_xxxxxx"
|
||||||
|
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="huggingface/sambanova/meta-llama/Llama-3.2-11B-Vision-Instruct",
|
||||||
|
messages=messages,
|
||||||
|
)
|
||||||
|
print(response.choices[0])
|
||||||
|
```
|
||||||
|
|
||||||
|
### Function Calling
|
||||||
|
You can extend the model's capabilities by giving them access to tools. Here is an example with function calling using [Qwen2.5-72B-Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct) model through Sambanova.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
# Set your Hugging Face Token
|
||||||
|
os.environ["HF_TOKEN"] = "hf_xxxxxx"
|
||||||
|
|
||||||
|
tools = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state, e.g. San Francisco, CA",
|
||||||
|
},
|
||||||
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in Boston today?",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="huggingface/sambanova/meta-llama/Llama-3.3-70B-Instruct",
|
||||||
|
messages=messages,
|
||||||
|
tools=tools,
|
||||||
|
tool_choice="auto"
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="endpoints" label="Inference Endpoints">
|
||||||
|
|
||||||
|
<a target="_blank" href="https://colab.research.google.com/github/BerriAI/litellm/blob/main/cookbook/LiteLLM_HuggingFace.ipynb">
|
||||||
|
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
||||||
|
</a>
|
||||||
|
|
||||||
|
### Basic Completion
|
||||||
|
After you have [deployed your Hugging Face Inference Endpoint](https://endpoints.huggingface.co/new) on dedicated infrastructure, you can run inference on it by providing the endpoint base URL in `api_base`, and indicating `huggingface/tgi` as the model name.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["HF_TOKEN"] = "hf_xxxxxx"
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="huggingface/tgi",
|
||||||
|
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||||
|
api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/"
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Streaming
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["HF_TOKEN"] = "hf_xxxxxx"
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="huggingface/tgi",
|
||||||
|
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||||
|
api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in response:
|
||||||
|
print(chunk)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Image Input
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["HF_TOKEN"] = "hf_xxxxxx"
|
||||||
|
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
response = completion(
|
||||||
|
model="huggingface/tgi",
|
||||||
|
messages=messages,
|
||||||
|
api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/""
|
||||||
|
)
|
||||||
|
print(response.choices[0])
|
||||||
|
```
|
||||||
|
|
||||||
|
### Function Calling
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["HF_TOKEN"] = "hf_xxxxxx"
|
||||||
|
|
||||||
|
functions = [{
|
||||||
|
"name": "get_weather",
|
||||||
|
"description": "Get the weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The location to get weather for"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["location"]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="huggingface/tgi",
|
||||||
|
messages=[{"content": "What's the weather like in San Francisco?", "role": "user"}],
|
||||||
|
api_base="https://my-endpoint.endpoints.huggingface.cloud/v1/",
|
||||||
|
functions=functions
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## LiteLLM Proxy Server with Hugging Face models
|
||||||
|
You can set up a [LiteLLM Proxy Server](https://docs.litellm.ai/#litellm-proxy-server-llm-gateway) to serve Hugging Face models through any of the supported Inference Providers. Here's how to do it:
|
||||||
|
|
||||||
|
### Step 1. Setup the config file
|
||||||
|
|
||||||
|
In this case, we are configuring a proxy to serve `DeepSeek R1` from Hugging Face, using Together AI as the backend Inference Provider.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: my-r1-model
|
||||||
|
litellm_params:
|
||||||
|
model: huggingface/together/deepseek-ai/DeepSeek-R1
|
||||||
|
api_key: os.environ/HF_TOKEN # ensure you have `HF_TOKEN` in your .env
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2. Start the server
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3. Make a request to the server
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"model": "my-r1-model",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, how are you?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="python" label="python">
|
||||||
|
|
||||||
|
```python
|
||||||
|
# pip install openai
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://0.0.0.0:4000",
|
||||||
|
api_key="anything",
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="my-r1-model",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hello, how are you?"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Embedding
|
## Embedding
|
||||||
|
|
||||||
LiteLLM supports Hugging Face's [text-embedding-inference](https://github.com/huggingface/text-embeddings-inference) format.
|
LiteLLM supports Hugging Face's [text-embedding-inference](https://github.com/huggingface/text-embeddings-inference) models as well.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import embedding
|
from litellm import embedding
|
||||||
import os
|
import os
|
||||||
os.environ['HUGGINGFACE_API_KEY'] = ""
|
os.environ['HF_TOKEN'] = "hf_xxxxxx"
|
||||||
response = embedding(
|
response = embedding(
|
||||||
model='huggingface/microsoft/codebert-base',
|
model='huggingface/microsoft/codebert-base',
|
||||||
input=["good morning from litellm"]
|
input=["good morning from litellm"]
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Advanced
|
|
||||||
|
|
||||||
### Setting API KEYS + API BASE
|
|
||||||
|
|
||||||
If required, you can set the api key + api base, set it in your os environment. [Code for how it's sent](https://github.com/BerriAI/litellm/blob/0100ab2382a0e720c7978fbf662cc6e6920e7e03/litellm/llms/huggingface_restapi.py#L25)
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
os.environ["HUGGINGFACE_API_KEY"] = ""
|
|
||||||
os.environ["HUGGINGFACE_API_BASE"] = ""
|
|
||||||
```
|
|
||||||
|
|
||||||
### Viewing Log probs
|
|
||||||
|
|
||||||
#### Using `decoder_input_details` - OpenAI `echo`
|
|
||||||
|
|
||||||
The `echo` param is supported by OpenAI Completions - Use `litellm.text_completion()` for this
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import text_completion
|
|
||||||
response = text_completion(
|
|
||||||
model="huggingface/bigcode/starcoder",
|
|
||||||
prompt="good morning",
|
|
||||||
max_tokens=10, logprobs=10,
|
|
||||||
echo=True
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Output
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"id": "chatcmpl-3fc71792-c442-4ba1-a611-19dd0ac371ad",
|
|
||||||
"object": "text_completion",
|
|
||||||
"created": 1698801125.936519,
|
|
||||||
"model": "bigcode/starcoder",
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"text": ", I'm going to make you a sand",
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": {
|
|
||||||
"tokens": [
|
|
||||||
"good",
|
|
||||||
" morning",
|
|
||||||
",",
|
|
||||||
" I",
|
|
||||||
"'m",
|
|
||||||
" going",
|
|
||||||
" to",
|
|
||||||
" make",
|
|
||||||
" you",
|
|
||||||
" a",
|
|
||||||
" s",
|
|
||||||
"and"
|
|
||||||
],
|
|
||||||
"token_logprobs": [
|
|
||||||
"None",
|
|
||||||
-14.96875,
|
|
||||||
-2.2285156,
|
|
||||||
-2.734375,
|
|
||||||
-2.0957031,
|
|
||||||
-2.0917969,
|
|
||||||
-0.09429932,
|
|
||||||
-3.1132812,
|
|
||||||
-1.3203125,
|
|
||||||
-1.2304688,
|
|
||||||
-1.6201172,
|
|
||||||
-0.010292053
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"finish_reason": "length"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"usage": {
|
|
||||||
"completion_tokens": 9,
|
|
||||||
"prompt_tokens": 2,
|
|
||||||
"total_tokens": 11
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Models with Prompt Formatting
|
|
||||||
|
|
||||||
For models with special prompt templates (e.g. Llama2), we format the prompt to fit their template.
|
|
||||||
|
|
||||||
#### Models with natively Supported Prompt Templates
|
|
||||||
|
|
||||||
| Model Name | Works for Models | Function Call | Required OS Variables |
|
|
||||||
| ------------------------------------ | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ----------------------------------- |
|
|
||||||
| mistralai/Mistral-7B-Instruct-v0.1 | mistralai/Mistral-7B-Instruct-v0.1 | `completion(model='huggingface/mistralai/Mistral-7B-Instruct-v0.1', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
|
||||||
| meta-llama/Llama-2-7b-chat | All meta-llama llama2 chat models | `completion(model='huggingface/meta-llama/Llama-2-7b', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
|
||||||
| tiiuae/falcon-7b-instruct | All falcon instruct models | `completion(model='huggingface/tiiuae/falcon-7b-instruct', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
|
||||||
| mosaicml/mpt-7b-chat | All mpt chat models | `completion(model='huggingface/mosaicml/mpt-7b-chat', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
|
||||||
| codellama/CodeLlama-34b-Instruct-hf | All codellama instruct models | `completion(model='huggingface/codellama/CodeLlama-34b-Instruct-hf', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
|
||||||
| WizardLM/WizardCoder-Python-34B-V1.0 | All wizardcoder models | `completion(model='huggingface/WizardLM/WizardCoder-Python-34B-V1.0', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
|
||||||
| Phind/Phind-CodeLlama-34B-v2 | All phind-codellama models | `completion(model='huggingface/Phind/Phind-CodeLlama-34B-v2', messages=messages, api_base="your_api_endpoint")` | `os.environ['HUGGINGFACE_API_KEY']` |
|
|
||||||
|
|
||||||
**What if we don't support a model you need?**
|
|
||||||
You can also specify you're own custom prompt formatting, in case we don't have your model covered yet.
|
|
||||||
|
|
||||||
**Does this mean you have to specify a prompt for all models?**
|
|
||||||
No. By default we'll concatenate your message content to make a prompt.
|
|
||||||
|
|
||||||
**Default Prompt Template**
|
|
||||||
|
|
||||||
```python
|
|
||||||
def default_pt(messages):
|
|
||||||
return " ".join(message["content"] for message in messages)
|
|
||||||
```
|
|
||||||
|
|
||||||
[Code for how prompt formats work in LiteLLM](https://github.com/BerriAI/litellm/blob/main/litellm/llms/prompt_templates/factory.py)
|
|
||||||
|
|
||||||
#### Custom prompt templates
|
|
||||||
|
|
||||||
```python
|
|
||||||
import litellm
|
|
||||||
|
|
||||||
# Create your own custom prompt template works
|
|
||||||
litellm.register_prompt_template(
|
|
||||||
model="togethercomputer/LLaMA-2-7B-32K",
|
|
||||||
roles={
|
|
||||||
"system": {
|
|
||||||
"pre_message": "[INST] <<SYS>>\n",
|
|
||||||
"post_message": "\n<</SYS>>\n [/INST]\n"
|
|
||||||
},
|
|
||||||
"user": {
|
|
||||||
"pre_message": "[INST] ",
|
|
||||||
"post_message": " [/INST]\n"
|
|
||||||
},
|
|
||||||
"assistant": {
|
|
||||||
"post_message": "\n"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_huggingface_custom_model():
|
|
||||||
model = "huggingface/togethercomputer/LLaMA-2-7B-32K"
|
|
||||||
response = completion(model=model, messages=messages, api_base="https://ecd4sb5n09bo4ei2.us-east-1.aws.endpoints.huggingface.cloud")
|
|
||||||
print(response['choices'][0]['message']['content'])
|
|
||||||
return response
|
|
||||||
|
|
||||||
test_huggingface_custom_model()
|
|
||||||
```
|
|
||||||
|
|
||||||
[Implementation Code](https://github.com/BerriAI/litellm/blob/c0b3da2c14c791a0b755f0b1e5a9ef065951ecbf/litellm/llms/huggingface_restapi.py#L52)
|
|
||||||
|
|
||||||
### Deploying a model on huggingface
|
|
||||||
|
|
||||||
You can use any chat/text model from Hugging Face with the following steps:
|
|
||||||
|
|
||||||
- Copy your model id/url from Huggingface Inference Endpoints
|
|
||||||
- [ ] Go to https://ui.endpoints.huggingface.co/
|
|
||||||
- [ ] Copy the url of the specific model you'd like to use
|
|
||||||
<Image img={require('../../img/hf_inference_endpoint.png')} alt="HF_Dashboard" style={{ maxWidth: '50%', height: 'auto' }}/>
|
|
||||||
- Set it as your model name
|
|
||||||
- Set your HUGGINGFACE_API_KEY as an environment variable
|
|
||||||
|
|
||||||
Need help deploying a model on huggingface? [Check out this guide.](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint)
|
|
||||||
|
|
||||||
# output
|
|
||||||
|
|
||||||
Same as the OpenAI format, but also includes logprobs. [See the code](https://github.com/BerriAI/litellm/blob/b4b2dbf005142e0a483d46a07a88a19814899403/litellm/llms/huggingface_restapi.py#L115)
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"choices": [
|
|
||||||
{
|
|
||||||
"finish_reason": "stop",
|
|
||||||
"index": 0,
|
|
||||||
"message": {
|
|
||||||
"content": "\ud83d\ude31\n\nComment: @SarahSzabo I'm",
|
|
||||||
"role": "assistant",
|
|
||||||
"logprobs": -22.697942825499993
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"created": 1693436637.38206,
|
|
||||||
"model": "https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud",
|
|
||||||
"usage": {
|
|
||||||
"prompt_tokens": 14,
|
|
||||||
"completion_tokens": 11,
|
|
||||||
"total_tokens": 25
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
# FAQ
|
# FAQ
|
||||||
|
|
||||||
**Does this support stop sequences?**
|
**How does billing work with Hugging Face Inference Providers?**
|
||||||
|
|
||||||
Yes, we support stop sequences - and you can pass as many as allowed by Hugging Face (or any provider!)
|
> Billing is centralized on your Hugging Face account, no matter which providers you are using. You are billed the standard provider API rates with no additional markup - Hugging Face simply passes through the provider costs. Note that [Hugging Face PRO](https://huggingface.co/subscribe/pro) users get $2 worth of Inference credits every month that can be used across providers.
|
||||||
|
|
||||||
**How do you deal with repetition penalty?**
|
**Do I need to create an account for each Inference Provider?**
|
||||||
|
|
||||||
We map the presence penalty parameter in openai to the repetition penalty parameter on Hugging Face. [See code](https://github.com/BerriAI/litellm/blob/b4b2dbf005142e0a483d46a07a88a19814899403/litellm/utils.py#L757).
|
> No, you don't need to create separate accounts. All requests are routed through Hugging Face, so you only need your HF token. This allows you to easily benchmark different providers and choose the one that best fits your needs.
|
||||||
|
|
||||||
|
**Will more inference providers be supported by Hugging Face in the future?**
|
||||||
|
|
||||||
|
> Yes! New inference providers (and models) are being added gradually.
|
||||||
|
|
||||||
We welcome any suggestions for improving our Hugging Face integration - Create an [issue](https://github.com/BerriAI/litellm/issues/new/choose)/[Join the Discord](https://discord.com/invite/wuPM9dRgDw)!
|
We welcome any suggestions for improving our Hugging Face integration - Create an [issue](https://github.com/BerriAI/litellm/issues/new/choose)/[Join the Discord](https://discord.com/invite/wuPM9dRgDw)!
|
|
@ -325,6 +325,74 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
|
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
|
||||||
|
|
||||||
|
|
||||||
|
## OpenAI Audio Transcription
|
||||||
|
|
||||||
|
LiteLLM supports OpenAI Audio Transcription endpoint.
|
||||||
|
|
||||||
|
Supported models:
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|---------------------------|-----------------------------------------------------------------|
|
||||||
|
| `whisper-1` | `response = completion(model="whisper-1", file=audio_file)` |
|
||||||
|
| `gpt-4o-transcribe` | `response = completion(model="gpt-4o-transcribe", file=audio_file)` |
|
||||||
|
| `gpt-4o-mini-transcribe` | `response = completion(model="gpt-4o-mini-transcribe", file=audio_file)` |
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import transcription
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set api keys
|
||||||
|
os.environ["OPENAI_API_KEY"] = ""
|
||||||
|
audio_file = open("/path/to/audio.mp3", "rb")
|
||||||
|
|
||||||
|
response = transcription(model="gpt-4o-transcribe", file=audio_file)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o-transcribe
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-4o-transcribe
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
|
mode: audio_transcription
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:8000/v1/audio/transcriptions' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--form 'file=@"/Users/krrishdholakia/Downloads/gettysburg.wav"' \
|
||||||
|
--form 'model="gpt-4o-transcribe"'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Advanced
|
## Advanced
|
||||||
|
|
||||||
### Getting OpenAI API Response Headers
|
### Getting OpenAI API Response Headers
|
||||||
|
|
|
@ -398,6 +398,8 @@ curl http://localhost:4000/v1/chat/completions \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
You can also use the `enterpriseWebSearch` tool for an [enterprise compliant search](https://cloud.google.com/vertex-ai/generative-ai/docs/grounding/web-grounding-enterprise).
|
||||||
|
|
||||||
#### **Moving from Vertex AI SDK to LiteLLM (GROUNDING)**
|
#### **Moving from Vertex AI SDK to LiteLLM (GROUNDING)**
|
||||||
|
|
||||||
|
|
||||||
|
@ -1369,6 +1371,103 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Gemini Pro
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|------------------|--------------------------------------|
|
||||||
|
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
|
||||||
|
|
||||||
|
## Fine-tuned Models
|
||||||
|
|
||||||
|
You can call fine-tuned Vertex AI Gemini models through LiteLLM
|
||||||
|
|
||||||
|
| Property | Details |
|
||||||
|
|----------|---------|
|
||||||
|
| Provider Route | `vertex_ai/gemini/{MODEL_ID}` |
|
||||||
|
| Vertex Documentation | [Vertex AI - Fine-tuned Gemini Models](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-supervised-tuning#test_the_tuned_model_with_a_prompt)|
|
||||||
|
| Supported Operations | `/chat/completions`, `/completions`, `/embeddings`, `/images` |
|
||||||
|
|
||||||
|
To use a model that follows the `/gemini` request/response format, simply set the model parameter as
|
||||||
|
|
||||||
|
```python title="Model parameter for calling fine-tuned gemini models"
|
||||||
|
model="vertex_ai/gemini/<your-finetuned-model>"
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="LiteLLM Python SDK">
|
||||||
|
|
||||||
|
```python showLineNumbers title="Example"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
## set ENV variables
|
||||||
|
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
|
||||||
|
os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
||||||
|
|
||||||
|
response = litellm.completion(
|
||||||
|
model="vertex_ai/gemini/<your-finetuned-model>", # e.g. vertex_ai/gemini/4965075652664360960
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM Proxy">
|
||||||
|
|
||||||
|
1. Add Vertex Credentials to your env
|
||||||
|
|
||||||
|
```bash title="Authenticate to Vertex AI"
|
||||||
|
!gcloud auth application-default login
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml showLineNumbers title="Add to litellm config"
|
||||||
|
- model_name: finetuned-gemini
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini/<ENDPOINT_ID>
|
||||||
|
vertex_project: <PROJECT_ID>
|
||||||
|
vertex_location: <LOCATION>
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python showLineNumbers title="Example request"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="your-litellm-key",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="finetuned-gemini",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "hi"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```bash showLineNumbers title="Example request"
|
||||||
|
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: <LITELLM_KEY>' \
|
||||||
|
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Model Garden
|
## Model Garden
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
@ -1479,67 +1578,6 @@ response = completion(
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Gemini Pro
|
|
||||||
| Model Name | Function Call |
|
|
||||||
|------------------|--------------------------------------|
|
|
||||||
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
|
|
||||||
|
|
||||||
## Fine-tuned Models
|
|
||||||
|
|
||||||
Fine tuned models on vertex have a numerical model/endpoint id.
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
import os
|
|
||||||
|
|
||||||
## set ENV variables
|
|
||||||
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
|
|
||||||
os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="vertex_ai/<your-finetuned-model>", # e.g. vertex_ai/4965075652664360960
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
base_model="vertex_ai/gemini-1.5-pro" # the base model - used for routing
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Add Vertex Credentials to your env
|
|
||||||
|
|
||||||
```bash
|
|
||||||
!gcloud auth application-default login
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Setup config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
- model_name: finetuned-gemini
|
|
||||||
litellm_params:
|
|
||||||
model: vertex_ai/<ENDPOINT_ID>
|
|
||||||
vertex_project: <PROJECT_ID>
|
|
||||||
vertex_location: <LOCATION>
|
|
||||||
model_info:
|
|
||||||
base_model: vertex_ai/gemini-1.5-pro # IMPORTANT
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--header 'Authorization: <LITELLM_KEY>' \
|
|
||||||
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Gemini Pro Vision
|
## Gemini Pro Vision
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|
@ -1684,23 +1722,25 @@ assert isinstance(
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Usage - PDF / Videos / etc. Files
|
## Usage - PDF / Videos / Audio etc. Files
|
||||||
|
|
||||||
Pass any file supported by Vertex AI, through LiteLLM.
|
Pass any file supported by Vertex AI, through LiteLLM.
|
||||||
|
|
||||||
LiteLLM Supports the following image types passed in url
|
LiteLLM Supports the following file types passed in url.
|
||||||
|
|
||||||
|
Using `file` message type for VertexAI is live from v1.65.1+
|
||||||
|
|
||||||
```
|
```
|
||||||
Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
|
Files with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
|
||||||
Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
Files with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
||||||
Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
|
Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
|
||||||
Base64 Encoded Local Images
|
Base64 Encoded Local Files
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="SDK">
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
### **Using `gs://`**
|
### **Using `gs://` or any URL**
|
||||||
```python
|
```python
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
|
||||||
|
@ -1712,8 +1752,11 @@ response = completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf", # 👈 PDF
|
"file": {
|
||||||
|
"file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
|
||||||
|
"format": "application/pdf" # OPTIONAL - specify mime-type
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -1747,8 +1790,16 @@ response = completion(
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
{"type": "text", "text": "You are a very professional document summarization specialist. Please summarize the given document."},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
"file": {
|
||||||
|
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "audio_input",
|
||||||
|
"audio_input {
|
||||||
|
"audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
|
||||||
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
@ -1794,8 +1845,11 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf" # 👈 PDF
|
"file": {
|
||||||
|
"file_id": "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf",
|
||||||
|
"format": "application/pdf" # OPTIONAL
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -1822,10 +1876,17 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
"text": "You are a very professional document summarization specialist. Please summarize the given document"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "file",
|
||||||
"image_url": "data:application/pdf;base64,{encoded_file}" # 👈 PDF
|
"file": {
|
||||||
}
|
"file_data": f"data:application/pdf;base64,{encoded_file}", # 👈 PDF
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "audio_input",
|
||||||
|
"audio_input {
|
||||||
|
"audio_input": f"data:audio/mp3;base64,{encoded_file}", # 👈 AUDIO File ('file' message works as too)
|
||||||
}
|
}
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -1836,6 +1897,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Chat Models
|
## Chat Models
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|------------------|--------------------------------------|
|
|------------------|--------------------------------------|
|
||||||
|
@ -2044,7 +2106,12 @@ print(response)
|
||||||
|
|
||||||
## **Multi-Modal Embeddings**
|
## **Multi-Modal Embeddings**
|
||||||
|
|
||||||
Usage
|
|
||||||
|
Known Limitations:
|
||||||
|
- Only supports 1 image / video / image per request
|
||||||
|
- Only supports GCS or base64 encoded images / videos
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="SDK">
|
<TabItem value="sdk" label="SDK">
|
||||||
|
@ -2260,6 +2327,115 @@ print(f"Text Embedding: {embeddings.text_embedding}")
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
### Text + Image + Video Embeddings
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
Text + Image
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = await litellm.aembedding(
|
||||||
|
model="vertex_ai/multimodalembedding@001",
|
||||||
|
input=["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"] # will be sent as a gcs image
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Text + Video
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = await litellm.aembedding(
|
||||||
|
model="vertex_ai/multimodalembedding@001",
|
||||||
|
input=["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Image + Video
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = await litellm.aembedding(
|
||||||
|
model="vertex_ai/multimodalembedding@001",
|
||||||
|
input=["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"] # will be sent as a gcs image
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM PROXY (Unified Endpoint)">
|
||||||
|
|
||||||
|
1. Add model to config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: multimodalembedding@001
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/multimodalembedding@001
|
||||||
|
vertex_project: "adroit-crow-413218"
|
||||||
|
vertex_location: "us-central1"
|
||||||
|
vertex_credentials: adroit-crow-413218-a956eef1a2a8.json
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
drop_params: True
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start Proxy
|
||||||
|
|
||||||
|
```
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make Request use OpenAI Python SDK, Langchain Python SDK
|
||||||
|
|
||||||
|
|
||||||
|
Text + Image
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
# # request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.embeddings.create(
|
||||||
|
model="multimodalembedding@001",
|
||||||
|
input = ["hey", "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
Text + Video
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
# # request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.embeddings.create(
|
||||||
|
model="multimodalembedding@001",
|
||||||
|
input = ["hey", "gs://my-bucket/embeddings/supermarket-video.mp4"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
Image + Video
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
|
||||||
|
|
||||||
|
# # request sent to model set on litellm proxy, `litellm --model`
|
||||||
|
response = client.embeddings.create(
|
||||||
|
model="multimodalembedding@001",
|
||||||
|
input = ["gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", "gs://my-bucket/embeddings/supermarket-video.mp4"],
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## **Image Generation Models**
|
## **Image Generation Models**
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
|
|
|
@ -82,7 +82,7 @@ from litellm import completion
|
||||||
os.environ["XAI_API_KEY"] = "your-api-key"
|
os.environ["XAI_API_KEY"] = "your-api-key"
|
||||||
|
|
||||||
response = completion(
|
response = completion(
|
||||||
model="xai/grok-2-latest",
|
model="xai/grok-2-vision-latest",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
|
|
@ -147,11 +147,16 @@ Some SSO providers require a specific redirect url for login and logout. You can
|
||||||
- Login: `<your-proxy-base-url>/sso/key/generate`
|
- Login: `<your-proxy-base-url>/sso/key/generate`
|
||||||
- Logout: `<your-proxy-base-url>`
|
- Logout: `<your-proxy-base-url>`
|
||||||
|
|
||||||
|
Here's the env var to set the logout url on the proxy
|
||||||
|
```bash
|
||||||
|
PROXY_LOGOUT_URL="https://www.google.com"
|
||||||
|
```
|
||||||
|
|
||||||
#### Step 3. Set `PROXY_BASE_URL` in your .env
|
#### Step 3. Set `PROXY_BASE_URL` in your .env
|
||||||
|
|
||||||
Set this in your .env (so the proxy can set the correct redirect url)
|
Set this in your .env (so the proxy can set the correct redirect url)
|
||||||
```shell
|
```shell
|
||||||
PROXY_BASE_URL=https://litellm-api.up.railway.app/
|
PROXY_BASE_URL=https://litellm-api.up.railway.app
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Step 4. Test flow
|
#### Step 4. Test flow
|
||||||
|
|
|
@ -160,7 +160,7 @@ general_settings:
|
||||||
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
|
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
|
||||||
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
|
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
|
||||||
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
|
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
|
||||||
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
|
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key [Doc on graceful db unavailability](prod#5-if-running-litellm-on-vpc-gracefully-handle-db-unavailability) |
|
||||||
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
|
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
|
||||||
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
|
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
|
||||||
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
|
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
|
||||||
|
@ -406,6 +406,7 @@ router_settings:
|
||||||
| HELICONE_API_KEY | API key for Helicone service
|
| HELICONE_API_KEY | API key for Helicone service
|
||||||
| HOSTNAME | Hostname for the server, this will be [emitted to `datadog` logs](https://docs.litellm.ai/docs/proxy/logging#datadog)
|
| HOSTNAME | Hostname for the server, this will be [emitted to `datadog` logs](https://docs.litellm.ai/docs/proxy/logging#datadog)
|
||||||
| HUGGINGFACE_API_BASE | Base URL for Hugging Face API
|
| HUGGINGFACE_API_BASE | Base URL for Hugging Face API
|
||||||
|
| HUGGINGFACE_API_KEY | API key for Hugging Face API
|
||||||
| IAM_TOKEN_DB_AUTH | IAM token for database authentication
|
| IAM_TOKEN_DB_AUTH | IAM token for database authentication
|
||||||
| JSON_LOGS | Enable JSON formatted logging
|
| JSON_LOGS | Enable JSON formatted logging
|
||||||
| JWT_AUDIENCE | Expected audience for JWT tokens
|
| JWT_AUDIENCE | Expected audience for JWT tokens
|
||||||
|
@ -448,6 +449,7 @@ router_settings:
|
||||||
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
|
| MICROSOFT_CLIENT_ID | Client ID for Microsoft services
|
||||||
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
|
| MICROSOFT_CLIENT_SECRET | Client secret for Microsoft services
|
||||||
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure
|
| MICROSOFT_TENANT | Tenant ID for Microsoft Azure
|
||||||
|
| MICROSOFT_SERVICE_PRINCIPAL_ID | Service Principal ID for Microsoft Enterprise Application. (This is an advanced feature if you want litellm to auto-assign members to Litellm Teams based on their Microsoft Entra ID Groups)
|
||||||
| NO_DOCS | Flag to disable documentation generation
|
| NO_DOCS | Flag to disable documentation generation
|
||||||
| NO_PROXY | List of addresses to bypass proxy
|
| NO_PROXY | List of addresses to bypass proxy
|
||||||
| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval
|
| OAUTH_TOKEN_INFO_ENDPOINT | Endpoint for OAuth token info retrieval
|
||||||
|
@ -479,7 +481,7 @@ router_settings:
|
||||||
| PROXY_ADMIN_ID | Admin identifier for proxy server
|
| PROXY_ADMIN_ID | Admin identifier for proxy server
|
||||||
| PROXY_BASE_URL | Base URL for proxy service
|
| PROXY_BASE_URL | Base URL for proxy service
|
||||||
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
|
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
|
||||||
| PROXY_MASTER_KEY | Master key for proxy authentication
|
| LITELLM_MASTER_KEY | Master key for proxy authentication
|
||||||
| QDRANT_API_BASE | Base URL for Qdrant API
|
| QDRANT_API_BASE | Base URL for Qdrant API
|
||||||
| QDRANT_API_KEY | API key for Qdrant service
|
| QDRANT_API_KEY | API key for Qdrant service
|
||||||
| QDRANT_URL | Connection URL for Qdrant database
|
| QDRANT_URL | Connection URL for Qdrant database
|
||||||
|
@ -515,4 +517,5 @@ router_settings:
|
||||||
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
|
| UPSTREAM_LANGFUSE_RELEASE | Release version identifier for upstream Langfuse
|
||||||
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
|
| UPSTREAM_LANGFUSE_SECRET_KEY | Secret key for upstream Langfuse authentication
|
||||||
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
|
| USE_AWS_KMS | Flag to enable AWS Key Management Service for encryption
|
||||||
|
| USE_PRISMA_MIGRATE | Flag to use prisma migrate instead of prisma db push. Recommended for production environments.
|
||||||
| WEBHOOK_URL | URL for receiving webhooks from external services
|
| WEBHOOK_URL | URL for receiving webhooks from external services
|
||||||
|
|
|
@ -6,6 +6,8 @@ import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
Track spend for keys, users, and teams across 100+ LLMs.
|
Track spend for keys, users, and teams across 100+ LLMs.
|
||||||
|
|
||||||
|
LiteLLM automatically tracks spend for all known models. See our [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
||||||
|
|
||||||
### How to Track Spend with LiteLLM
|
### How to Track Spend with LiteLLM
|
||||||
|
|
||||||
**Step 1**
|
**Step 1**
|
||||||
|
@ -35,10 +37,10 @@ response = client.chat.completions.create(
|
||||||
"content": "this is a test request, write a short poem"
|
"content": "this is a test request, write a short poem"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
user="palantir",
|
user="palantir", # OPTIONAL: pass user to track spend by user
|
||||||
extra_body={
|
extra_body={
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"]
|
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -63,9 +65,9 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
"content": "what llm are you"
|
"content": "what llm are you"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"user": "palantir",
|
"user": "palantir", # OPTIONAL: pass user to track spend by user
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"]
|
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags
|
||||||
}
|
}
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
@ -90,7 +92,7 @@ chat = ChatOpenAI(
|
||||||
user="palantir",
|
user="palantir",
|
||||||
extra_body={
|
extra_body={
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"]
|
"tags": ["jobID:214590dsff09fds", "taskName:run_page_classification"] # ENTERPRISE: pass tags to track spend by tags
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
@ -150,8 +152,134 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## ✨ (Enterprise) API Endpoints to get Spend
|
### Allowing Non-Proxy Admins to access `/spend` endpoints
|
||||||
### Getting Spend Reports - To Charge Other Teams, Customers, Users
|
|
||||||
|
Use this when you want non-proxy admins to access `/spend` endpoints
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Schedule a [meeting with us to get your Enterprise License](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
##### Create Key
|
||||||
|
Create Key with with `permissions={"get_spend_routes": true}`
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"permissions": {"get_spend_routes": true}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Use generated key on `/spend` endpoints
|
||||||
|
|
||||||
|
Access spend Routes with newly generate keys
|
||||||
|
```shell
|
||||||
|
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \
|
||||||
|
-H 'Authorization: Bearer sk-H16BKvrSNConSsBYLGc_7A'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#### Reset Team, API Key Spend - MASTER KEY ONLY
|
||||||
|
|
||||||
|
Use `/global/spend/reset` if you want to:
|
||||||
|
- Reset the Spend for all API Keys, Teams. The `spend` for ALL Teams and Keys in `LiteLLM_TeamTable` and `LiteLLM_VerificationToken` will be set to `spend=0`
|
||||||
|
|
||||||
|
- LiteLLM will maintain all the logs in `LiteLLMSpendLogs` for Auditing Purposes
|
||||||
|
|
||||||
|
##### Request
|
||||||
|
Only the `LITELLM_MASTER_KEY` you set can access this route
|
||||||
|
```shell
|
||||||
|
curl -X POST \
|
||||||
|
'http://localhost:4000/global/spend/reset' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Expected Responses
|
||||||
|
|
||||||
|
```shell
|
||||||
|
{"message":"Spend for all API Keys and Teams reset successfully","status":"success"}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Set 'base_model' for Cost Tracking (e.g. Azure deployments)
|
||||||
|
|
||||||
|
**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking
|
||||||
|
|
||||||
|
**Solution** ✅ : Set `base_model` on your config so litellm uses the correct model for calculating azure cost
|
||||||
|
|
||||||
|
Get the base model name from [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
||||||
|
|
||||||
|
Example config with `base_model`
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: azure-gpt-3.5
|
||||||
|
litellm_params:
|
||||||
|
model: azure/chatgpt-v-2
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_key: os.environ/AZURE_API_KEY
|
||||||
|
api_version: "2023-07-01-preview"
|
||||||
|
model_info:
|
||||||
|
base_model: azure/gpt-4-1106-preview
|
||||||
|
```
|
||||||
|
|
||||||
|
## Daily Spend Breakdown API
|
||||||
|
|
||||||
|
Retrieve granular daily usage data for a user (by model, provider, and API key) with a single endpoint.
|
||||||
|
|
||||||
|
Example Request:
|
||||||
|
|
||||||
|
```shell title="Daily Spend Breakdown API" showLineNumbers
|
||||||
|
curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \
|
||||||
|
-H 'Authorization: Bearer sk-...'
|
||||||
|
```
|
||||||
|
|
||||||
|
```json title="Daily Spend Breakdown API Response" showLineNumbers
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"date": "2025-03-27",
|
||||||
|
"metrics": {
|
||||||
|
"spend": 0.0177072,
|
||||||
|
"prompt_tokens": 111,
|
||||||
|
"completion_tokens": 1711,
|
||||||
|
"total_tokens": 1822,
|
||||||
|
"api_requests": 11
|
||||||
|
},
|
||||||
|
"breakdown": {
|
||||||
|
"models": {
|
||||||
|
"gpt-4o-mini": {
|
||||||
|
"spend": 1.095e-05,
|
||||||
|
"prompt_tokens": 37,
|
||||||
|
"completion_tokens": 9,
|
||||||
|
"total_tokens": 46,
|
||||||
|
"api_requests": 1
|
||||||
|
},
|
||||||
|
"providers": { "openai": { ... }, "azure_ai": { ... } },
|
||||||
|
"api_keys": { "3126b6eaf1...": { ... } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"total_spend": 0.7274667,
|
||||||
|
"total_prompt_tokens": 280990,
|
||||||
|
"total_completion_tokens": 376674,
|
||||||
|
"total_api_requests": 14
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### API Reference
|
||||||
|
|
||||||
|
See our [Swagger API](https://litellm-api.up.railway.app/#/Budget%20%26%20Spend%20Tracking/get_user_daily_activity_user_daily_activity_get) for more details on the `/user/daily/activity` endpoint
|
||||||
|
|
||||||
|
## ✨ (Enterprise) Generate Spend Reports
|
||||||
|
|
||||||
|
Use this to charge other teams, customers, users
|
||||||
|
|
||||||
Use the `/global/spend/report` endpoint to get spend reports
|
Use the `/global/spend/report` endpoint to get spend reports
|
||||||
|
|
||||||
|
@ -470,105 +598,6 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
### Allowing Non-Proxy Admins to access `/spend` endpoints
|
|
||||||
|
|
||||||
Use this when you want non-proxy admins to access `/spend` endpoints
|
|
||||||
|
|
||||||
:::info
|
|
||||||
|
|
||||||
Schedule a [meeting with us to get your Enterprise License](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
##### Create Key
|
|
||||||
Create Key with with `permissions={"get_spend_routes": true}`
|
|
||||||
```shell
|
|
||||||
curl --location 'http://0.0.0.0:4000/key/generate' \
|
|
||||||
--header 'Authorization: Bearer sk-1234' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--data '{
|
|
||||||
"permissions": {"get_spend_routes": true}
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Use generated key on `/spend` endpoints
|
|
||||||
|
|
||||||
Access spend Routes with newly generate keys
|
|
||||||
```shell
|
|
||||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30' \
|
|
||||||
-H 'Authorization: Bearer sk-H16BKvrSNConSsBYLGc_7A'
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#### Reset Team, API Key Spend - MASTER KEY ONLY
|
|
||||||
|
|
||||||
Use `/global/spend/reset` if you want to:
|
|
||||||
- Reset the Spend for all API Keys, Teams. The `spend` for ALL Teams and Keys in `LiteLLM_TeamTable` and `LiteLLM_VerificationToken` will be set to `spend=0`
|
|
||||||
|
|
||||||
- LiteLLM will maintain all the logs in `LiteLLMSpendLogs` for Auditing Purposes
|
|
||||||
|
|
||||||
##### Request
|
|
||||||
Only the `LITELLM_MASTER_KEY` you set can access this route
|
|
||||||
```shell
|
|
||||||
curl -X POST \
|
|
||||||
'http://localhost:4000/global/spend/reset' \
|
|
||||||
-H 'Authorization: Bearer sk-1234' \
|
|
||||||
-H 'Content-Type: application/json'
|
|
||||||
```
|
|
||||||
|
|
||||||
##### Expected Responses
|
|
||||||
|
|
||||||
```shell
|
|
||||||
{"message":"Spend for all API Keys and Teams reset successfully","status":"success"}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Spend Tracking for Azure OpenAI Models
|
|
||||||
|
|
||||||
Set base model for cost tracking azure image-gen call
|
|
||||||
|
|
||||||
#### Image Generation
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: dall-e-3
|
|
||||||
litellm_params:
|
|
||||||
model: azure/dall-e-3-test
|
|
||||||
api_version: 2023-06-01-preview
|
|
||||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
base_model: dall-e-3 # 👈 set dall-e-3 as base model
|
|
||||||
model_info:
|
|
||||||
mode: image_generation
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Chat Completions / Embeddings
|
|
||||||
|
|
||||||
**Problem**: Azure returns `gpt-4` in the response when `azure/gpt-4-1106-preview` is used. This leads to inaccurate cost tracking
|
|
||||||
|
|
||||||
**Solution** ✅ : Set `base_model` on your config so litellm uses the correct model for calculating azure cost
|
|
||||||
|
|
||||||
Get the base model name from [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json)
|
|
||||||
|
|
||||||
Example config with `base_model`
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: azure-gpt-3.5
|
|
||||||
litellm_params:
|
|
||||||
model: azure/chatgpt-v-2
|
|
||||||
api_base: os.environ/AZURE_API_BASE
|
|
||||||
api_key: os.environ/AZURE_API_KEY
|
|
||||||
api_version: "2023-07-01-preview"
|
|
||||||
model_info:
|
|
||||||
base_model: azure/gpt-4-1106-preview
|
|
||||||
```
|
|
||||||
|
|
||||||
## Custom Input/Output Pricing
|
|
||||||
|
|
||||||
👉 Head to [Custom Input/Output Pricing](https://docs.litellm.ai/docs/proxy/custom_pricing) to setup custom pricing or your models
|
|
||||||
|
|
||||||
## ✨ Custom Spend Log metadata
|
## ✨ Custom Spend Log metadata
|
||||||
|
|
||||||
|
@ -588,3 +617,4 @@ Logging specific key,value pairs in spend logs metadata is an enterprise feature
|
||||||
Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags)
|
Tracking spend with Custom tags is an enterprise feature. [See here](./enterprise.md#tracking-spend-for-custom-tags)
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
|
|
@ -26,10 +26,12 @@ model_list:
|
||||||
- model_name: sagemaker-completion-model
|
- model_name: sagemaker-completion-model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
|
model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
|
||||||
|
model_info:
|
||||||
input_cost_per_second: 0.000420
|
input_cost_per_second: 0.000420
|
||||||
- model_name: sagemaker-embedding-model
|
- model_name: sagemaker-embedding-model
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
|
model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
|
||||||
|
model_info:
|
||||||
input_cost_per_second: 0.000420
|
input_cost_per_second: 0.000420
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -55,11 +57,33 @@ model_list:
|
||||||
api_key: os.environ/AZURE_API_KEY
|
api_key: os.environ/AZURE_API_KEY
|
||||||
api_base: os.environ/AZURE_API_BASE
|
api_base: os.environ/AZURE_API_BASE
|
||||||
api_version: os.envrion/AZURE_API_VERSION
|
api_version: os.envrion/AZURE_API_VERSION
|
||||||
|
model_info:
|
||||||
input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
|
input_cost_per_token: 0.000421 # 👈 ONLY to track cost per token
|
||||||
output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
|
output_cost_per_token: 0.000520 # 👈 ONLY to track cost per token
|
||||||
```
|
```
|
||||||
|
|
||||||
### Debugging
|
## Override Model Cost Map
|
||||||
|
|
||||||
|
You can override [our model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) with your own custom pricing for a mapped model.
|
||||||
|
|
||||||
|
Just add a `model_info` key to your model in the config, and override the desired keys.
|
||||||
|
|
||||||
|
Example: Override Anthropic's model cost map for the `prod/claude-3-5-sonnet-20241022` model.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: "prod/claude-3-5-sonnet-20241022"
|
||||||
|
litellm_params:
|
||||||
|
model: "anthropic/claude-3-5-sonnet-20241022"
|
||||||
|
api_key: os.environ/ANTHROPIC_PROD_API_KEY
|
||||||
|
model_info:
|
||||||
|
input_cost_per_token: 0.000006
|
||||||
|
output_cost_per_token: 0.00003
|
||||||
|
cache_creation_input_token_cost: 0.0000075
|
||||||
|
cache_read_input_token_cost: 0.0000006
|
||||||
|
```
|
||||||
|
|
||||||
|
## Debugging
|
||||||
|
|
||||||
If you're custom pricing is not being used or you're seeing errors, please check the following:
|
If you're custom pricing is not being used or you're seeing errors, please check the following:
|
||||||
|
|
||||||
|
|
86
docs/my-website/docs/proxy/db_deadlocks.md
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# High Availability Setup (Resolve DB Deadlocks)
|
||||||
|
|
||||||
|
Resolve any Database Deadlocks you see in high traffic by using this setup
|
||||||
|
|
||||||
|
## What causes the problem?
|
||||||
|
|
||||||
|
LiteLLM writes `UPDATE` and `UPSERT` queries to the DB. When using 10+ instances of LiteLLM, these queries can cause deadlocks since each instance could simultaneously attempt to update the same `user_id`, `team_id`, `key` etc.
|
||||||
|
|
||||||
|
## How the high availability setup fixes the problem
|
||||||
|
- All instances will write to a Redis queue instead of the DB.
|
||||||
|
- A single instance will acquire a lock on the DB and flush the redis queue to the DB.
|
||||||
|
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
|
||||||
|
### Stage 1. Each instance writes updates to redis
|
||||||
|
|
||||||
|
Each instance will accumlate the spend updates for a key, user, team, etc and write the updates to a redis queue.
|
||||||
|
|
||||||
|
<Image img={require('../../img/deadlock_fix_1.png')} style={{ width: '900px', height: 'auto' }} />
|
||||||
|
<p style={{textAlign: 'left', color: '#666'}}>
|
||||||
|
Each instance writes updates to redis
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
### Stage 2. A single instance flushes the redis queue to the DB
|
||||||
|
|
||||||
|
A single instance will acquire a lock on the DB and flush all elements in the redis queue to the DB.
|
||||||
|
|
||||||
|
- 1 instance will attempt to acquire the lock for the DB update job
|
||||||
|
- The status of the lock is stored in redis
|
||||||
|
- If the instance acquires the lock to write to DB
|
||||||
|
- It will read all updates from redis
|
||||||
|
- Aggregate all updates into 1 transaction
|
||||||
|
- Write updates to DB
|
||||||
|
- Release the lock
|
||||||
|
- Note: Only 1 instance can acquire the lock at a time, this limits the number of instances that can write to the DB at once
|
||||||
|
|
||||||
|
|
||||||
|
<Image img={require('../../img/deadlock_fix_2.png')} style={{ width: '900px', height: 'auto' }} />
|
||||||
|
<p style={{textAlign: 'left', color: '#666'}}>
|
||||||
|
A single instance flushes the redis queue to the DB
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Required components
|
||||||
|
|
||||||
|
- Redis
|
||||||
|
- Postgres
|
||||||
|
|
||||||
|
### Setup on LiteLLM config
|
||||||
|
|
||||||
|
You can enable using the redis buffer by setting `use_redis_transaction_buffer: true` in the `general_settings` section of your `proxy_config.yaml` file.
|
||||||
|
|
||||||
|
Note: This setup requires litellm to be connected to a redis instance.
|
||||||
|
|
||||||
|
```yaml showLineNumbers title="litellm proxy_config.yaml"
|
||||||
|
general_settings:
|
||||||
|
use_redis_transaction_buffer: true
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
cache: True
|
||||||
|
cache_params:
|
||||||
|
type: redis
|
||||||
|
supported_call_types: [] # Optional: Set cache for proxy, but not on the actual llm api call
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
|
||||||
|
LiteLLM emits the following prometheus metrics to monitor the health/status of the in memory buffer and redis buffer.
|
||||||
|
|
||||||
|
|
||||||
|
| Metric Name | Description | Storage Type |
|
||||||
|
|-----------------------------------------------------|-----------------------------------------------------------------------------|--------------|
|
||||||
|
| `litellm_pod_lock_manager_size` | Indicates which pod has the lock to write updates to the database. | Redis |
|
||||||
|
| `litellm_in_memory_daily_spend_update_queue_size` | Number of items in the in-memory daily spend update queue. These are the aggregate spend logs for each user. | In-Memory |
|
||||||
|
| `litellm_redis_daily_spend_update_queue_size` | Number of items in the Redis daily spend update queue. These are the aggregate spend logs for each user. | Redis |
|
||||||
|
| `litellm_in_memory_spend_update_queue_size` | In-memory aggregate spend values for keys, users, teams, team members, etc.| In-Memory |
|
||||||
|
| `litellm_redis_spend_update_queue_size` | Redis aggregate spend values for keys, users, teams, etc. | Redis |
|
||||||
|
|
|
@ -23,6 +23,12 @@ In the newly created guard's page, you can find a reference to the prompt policy
|
||||||
|
|
||||||
You can decide which detections will be enabled, and set the threshold for each detection.
|
You can decide which detections will be enabled, and set the threshold for each detection.
|
||||||
|
|
||||||
|
:::info
|
||||||
|
When using LiteLLM with virtual keys, key-specific policies can be set directly in Aim's guards page by specifying the virtual key alias when creating the guard.
|
||||||
|
|
||||||
|
Only the aliases of your virtual keys (and not the actual key secrets) will be sent to Aim.
|
||||||
|
:::
|
||||||
|
|
||||||
### 3. Add Aim Guardrail on your LiteLLM config.yaml
|
### 3. Add Aim Guardrail on your LiteLLM config.yaml
|
||||||
|
|
||||||
Define your guardrails under the `guardrails` section
|
Define your guardrails under the `guardrails` section
|
||||||
|
@ -134,7 +140,7 @@ The above request should not be blocked, and you should receive a regular LLM re
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
# Advanced
|
## Advanced
|
||||||
|
|
||||||
Aim Guard provides user-specific Guardrail policies, enabling you to apply tailored policies to individual users.
|
Aim Guard provides user-specific Guardrail policies, enabling you to apply tailored policies to individual users.
|
||||||
To utilize this feature, include the end-user's email in the request payload by setting the `x-aim-user-email` header of your request.
|
To utilize this feature, include the end-user's email in the request payload by setting the `x-aim-user-email` header of your request.
|
||||||
|
|
|
@ -17,6 +17,14 @@ model_list:
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
guardrails:
|
guardrails:
|
||||||
|
- guardrail_name: general-guard
|
||||||
|
litellm_params:
|
||||||
|
guardrail: aim
|
||||||
|
mode: [pre_call, post_call]
|
||||||
|
api_key: os.environ/AIM_API_KEY
|
||||||
|
api_base: os.environ/AIM_API_BASE
|
||||||
|
default_on: true # Optional
|
||||||
|
|
||||||
- guardrail_name: "aporia-pre-guard"
|
- guardrail_name: "aporia-pre-guard"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
guardrail: aporia # supported values: "aporia", "lakera"
|
guardrail: aporia # supported values: "aporia", "lakera"
|
||||||
|
@ -45,6 +53,7 @@ guardrails:
|
||||||
- `pre_call` Run **before** LLM call, on **input**
|
- `pre_call` Run **before** LLM call, on **input**
|
||||||
- `post_call` Run **after** LLM call, on **input & output**
|
- `post_call` Run **after** LLM call, on **input & output**
|
||||||
- `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call. Response not returned until guardrail check completes
|
- `during_call` Run **during** LLM call, on **input** Same as `pre_call` but runs in parallel as LLM call. Response not returned until guardrail check completes
|
||||||
|
- A list of the above values to run multiple modes, e.g. `mode: [pre_call, post_call]`
|
||||||
|
|
||||||
|
|
||||||
## 2. Start LiteLLM Gateway
|
## 2. Start LiteLLM Gateway
|
||||||
|
|
|
@ -94,15 +94,31 @@ This disables the load_dotenv() functionality, which will automatically load you
|
||||||
|
|
||||||
## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
|
## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
|
||||||
|
|
||||||
This will allow LiteLLM to continue to process requests even if the DB is unavailable. This is better handling for DB unavailability.
|
When running LiteLLM on a VPC (and inaccessible from the public internet), you can enable graceful degradation so that request processing continues even if the database is temporarily unavailable.
|
||||||
|
|
||||||
|
|
||||||
**WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
|
**WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
|
||||||
|
|
||||||
```yaml
|
#### Configuration
|
||||||
|
|
||||||
|
```yaml showLineNumbers title="litellm config.yaml"
|
||||||
general_settings:
|
general_settings:
|
||||||
allow_requests_on_db_unavailable: True
|
allow_requests_on_db_unavailable: True
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Expected Behavior
|
||||||
|
|
||||||
|
When `allow_requests_on_db_unavailable` is set to `true`, LiteLLM will handle errors as follows:
|
||||||
|
|
||||||
|
| Type of Error | Expected Behavior | Details |
|
||||||
|
|---------------|-------------------|----------------|
|
||||||
|
| Prisma Errors | ✅ Request will be allowed | Covers issues like DB connection resets or rejections from the DB via Prisma, the ORM used by LiteLLM. |
|
||||||
|
| Httpx Errors | ✅ Request will be allowed | Occurs when the database is unreachable, allowing the request to proceed despite the DB outage. |
|
||||||
|
| Pod Startup Behavior | ✅ Pods start regardless | LiteLLM Pods will start even if the database is down or unreachable, ensuring higher uptime guarantees for deployments. |
|
||||||
|
| Health/Readiness Check | ✅ Always returns 200 OK | The /health/readiness endpoint returns a 200 OK status to ensure that pods remain operational even when the database is unavailable.
|
||||||
|
| LiteLLM Budget Errors or Model Errors | ❌ Request will be blocked | Triggered when the DB is reachable but the authentication token is invalid, lacks access, or exceeds budget limits. |
|
||||||
|
|
||||||
|
|
||||||
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
|
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
|
||||||
|
|
||||||
By default, LiteLLM writes several types of logs to the database:
|
By default, LiteLLM writes several types of logs to the database:
|
||||||
|
@ -161,6 +177,50 @@ export LITELLM_SALT_KEY="sk-1234"
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/036a6821d588bd36d170713dcf5a72791a694178/litellm/proxy/common_utils/encrypt_decrypt_utils.py#L15)
|
[**See Code**](https://github.com/BerriAI/litellm/blob/036a6821d588bd36d170713dcf5a72791a694178/litellm/proxy/common_utils/encrypt_decrypt_utils.py#L15)
|
||||||
|
|
||||||
|
|
||||||
|
## 9. Use `prisma migrate deploy`
|
||||||
|
|
||||||
|
Use this to handle db migrations across LiteLLM versions in production
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="env" label="ENV">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
USE_PRISMA_MIGRATE="True"
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="cli" label="CLI">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --use_prisma_migrate
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
Benefits:
|
||||||
|
|
||||||
|
The migrate deploy command:
|
||||||
|
|
||||||
|
- **Does not** issue a warning if an already applied migration is missing from migration history
|
||||||
|
- **Does not** detect drift (production database schema differs from migration history end state - for example, due to a hotfix)
|
||||||
|
- **Does not** reset the database or generate artifacts (such as Prisma Client)
|
||||||
|
- **Does not** rely on a shadow database
|
||||||
|
|
||||||
|
|
||||||
|
### How does LiteLLM handle DB migrations in production?
|
||||||
|
|
||||||
|
1. A new migration file is written to our `litellm-proxy-extras` package. [See all](https://github.com/BerriAI/litellm/tree/main/litellm-proxy-extras/litellm_proxy_extras/migrations)
|
||||||
|
|
||||||
|
2. The core litellm pip package is bumped to point to the new `litellm-proxy-extras` package. This ensures, older versions of LiteLLM will continue to use the old migrations. [See code](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/pyproject.toml#L58)
|
||||||
|
|
||||||
|
3. When you upgrade to a new version of LiteLLM, the migration file is applied to the database. [See code](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/litellm-proxy-extras/litellm_proxy_extras/utils.py#L42)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Extras
|
## Extras
|
||||||
### Expected Performance in Production
|
### Expected Performance in Production
|
||||||
|
|
||||||
|
@ -183,93 +243,3 @@ You should only see the following level of details in logs on the proxy server
|
||||||
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
|
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
|
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### Machine Specifications to Deploy LiteLLM
|
|
||||||
|
|
||||||
| Service | Spec | CPUs | Memory | Architecture | Version|
|
|
||||||
| --- | --- | --- | --- | --- | --- |
|
|
||||||
| Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
|
|
||||||
| Redis Cache | - | - | - | - | 7.0+ Redis Engine|
|
|
||||||
|
|
||||||
|
|
||||||
### Reference Kubernetes Deployment YAML
|
|
||||||
|
|
||||||
Reference Kubernetes `deployment.yaml` that was load tested by us
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: litellm-deployment
|
|
||||||
spec:
|
|
||||||
replicas: 3
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: litellm
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: litellm
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: litellm-container
|
|
||||||
image: ghcr.io/berriai/litellm:main-latest
|
|
||||||
imagePullPolicy: Always
|
|
||||||
env:
|
|
||||||
- name: AZURE_API_KEY
|
|
||||||
value: "d6******"
|
|
||||||
- name: AZURE_API_BASE
|
|
||||||
value: "https://ope******"
|
|
||||||
- name: LITELLM_MASTER_KEY
|
|
||||||
value: "sk-1234"
|
|
||||||
- name: DATABASE_URL
|
|
||||||
value: "po**********"
|
|
||||||
args:
|
|
||||||
- "--config"
|
|
||||||
- "/app/proxy_config.yaml" # Update the path to mount the config file
|
|
||||||
volumeMounts: # Define volume mount for proxy_config.yaml
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /app
|
|
||||||
readOnly: true
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health/liveliness
|
|
||||||
port: 4000
|
|
||||||
initialDelaySeconds: 120
|
|
||||||
periodSeconds: 15
|
|
||||||
successThreshold: 1
|
|
||||||
failureThreshold: 3
|
|
||||||
timeoutSeconds: 10
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health/readiness
|
|
||||||
port: 4000
|
|
||||||
initialDelaySeconds: 120
|
|
||||||
periodSeconds: 15
|
|
||||||
successThreshold: 1
|
|
||||||
failureThreshold: 3
|
|
||||||
timeoutSeconds: 10
|
|
||||||
volumes: # Define volume to mount proxy_config.yaml
|
|
||||||
- name: config-volume
|
|
||||||
configMap:
|
|
||||||
name: litellm-config
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
Reference Kubernetes `service.yaml` that was load tested by us
|
|
||||||
```yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: litellm-service
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: litellm
|
|
||||||
ports:
|
|
||||||
- protocol: TCP
|
|
||||||
port: 4000
|
|
||||||
targetPort: 4000
|
|
||||||
type: LoadBalancer
|
|
||||||
```
|
|
||||||
|
|
|
@ -242,6 +242,19 @@ litellm_settings:
|
||||||
| `litellm_redis_fails` | Number of failed redis calls |
|
| `litellm_redis_fails` | Number of failed redis calls |
|
||||||
| `litellm_self_latency` | Histogram latency for successful litellm api call |
|
| `litellm_self_latency` | Histogram latency for successful litellm api call |
|
||||||
|
|
||||||
|
#### DB Transaction Queue Health Metrics
|
||||||
|
|
||||||
|
Use these metrics to monitor the health of the DB Transaction Queue. Eg. Monitoring the size of the in-memory and redis buffers.
|
||||||
|
|
||||||
|
| Metric Name | Description | Storage Type |
|
||||||
|
|-----------------------------------------------------|-----------------------------------------------------------------------------|--------------|
|
||||||
|
| `litellm_pod_lock_manager_size` | Indicates which pod has the lock to write updates to the database. | Redis |
|
||||||
|
| `litellm_in_memory_daily_spend_update_queue_size` | Number of items in the in-memory daily spend update queue. These are the aggregate spend logs for each user. | In-Memory |
|
||||||
|
| `litellm_redis_daily_spend_update_queue_size` | Number of items in the Redis daily spend update queue. These are the aggregate spend logs for each user. | Redis |
|
||||||
|
| `litellm_in_memory_spend_update_queue_size` | In-memory aggregate spend values for keys, users, teams, team members, etc.| In-Memory |
|
||||||
|
| `litellm_redis_spend_update_queue_size` | Redis aggregate spend values for keys, users, teams, etc. | Redis |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## **🔥 LiteLLM Maintained Grafana Dashboards **
|
## **🔥 LiteLLM Maintained Grafana Dashboards **
|
||||||
|
|
||||||
|
@ -268,6 +281,17 @@ Here is a screenshot of the metrics you can monitor with the LiteLLM Grafana Das
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Add authentication on /metrics endpoint
|
||||||
|
|
||||||
|
**By default /metrics endpoint is unauthenticated.**
|
||||||
|
|
||||||
|
You can opt into running litellm authentication on the /metrics endpoint by setting the following on the config
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
litellm_settings:
|
||||||
|
require_auth_for_metrics_endpoint: true
|
||||||
|
```
|
||||||
|
|
||||||
## FAQ
|
## FAQ
|
||||||
|
|
||||||
### What are `_created` vs. `_total` metrics?
|
### What are `_created` vs. `_total` metrics?
|
||||||
|
|
|
@ -161,6 +161,89 @@ Here's the available UI roles for a LiteLLM Internal User:
|
||||||
- `internal_user`: can login, view/create/delete their own keys, view their spend. **Cannot** add new users.
|
- `internal_user`: can login, view/create/delete their own keys, view their spend. **Cannot** add new users.
|
||||||
- `internal_user_viewer`: can login, view their own keys, view their own spend. **Cannot** create/delete keys, add new users.
|
- `internal_user_viewer`: can login, view their own keys, view their own spend. **Cannot** create/delete keys, add new users.
|
||||||
|
|
||||||
|
## Auto-add SSO users to teams
|
||||||
|
|
||||||
|
This walks through setting up sso auto-add for **Okta, Google SSO**
|
||||||
|
|
||||||
|
### Okta, Google SSO
|
||||||
|
|
||||||
|
1. Specify the JWT field that contains the team ids, that the user belongs to.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
litellm_jwtauth:
|
||||||
|
team_ids_jwt_field: "groups" # 👈 CAN BE ANY FIELD
|
||||||
|
```
|
||||||
|
|
||||||
|
This is assuming your SSO token looks like this. **If you need to inspect the JWT fields received from your SSO provider by LiteLLM, follow these instructions [here](#debugging-sso-jwt-fields)**
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
...,
|
||||||
|
"groups": ["team_id_1", "team_id_2"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Create the teams on LiteLLM
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST '<PROXY_BASE_URL>/team/new' \
|
||||||
|
-H 'Authorization: Bearer <PROXY_MASTER_KEY>' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-D '{
|
||||||
|
"team_alias": "team_1",
|
||||||
|
"team_id": "team_id_1" # 👈 MUST BE THE SAME AS THE SSO GROUP ID
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test the SSO flow
|
||||||
|
|
||||||
|
Here's a walkthrough of [how it works](https://www.loom.com/share/8959be458edf41fd85937452c29a33f3?sid=7ebd6d37-569a-4023-866e-e0cde67cb23e)
|
||||||
|
|
||||||
|
### Microsoft Entra ID SSO group assignment
|
||||||
|
|
||||||
|
This walks through setting up sso auto-add for **Microsoft Entra ID**
|
||||||
|
|
||||||
|
Follow along this video for a walkthrough of how to set this up with Microsoft Entra ID
|
||||||
|
|
||||||
|
|
||||||
|
<iframe width="840" height="500" src="https://www.loom.com/embed/ea711323aa9a496d84a01fd7b2a12f54?sid=c53e238c-5bfd-4135-b8fb-b5b1a08632cf" frameborder="0" webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe>
|
||||||
|
|
||||||
|
|
||||||
|
### Debugging SSO JWT fields
|
||||||
|
|
||||||
|
If you need to inspect the JWT fields received from your SSO provider by LiteLLM, follow these instructions. This guide walks you through setting up a debug callback to view the JWT data during the SSO process.
|
||||||
|
|
||||||
|
|
||||||
|
<Image img={require('../../img/debug_sso.png')} style={{ width: '500px', height: 'auto' }} />
|
||||||
|
<br />
|
||||||
|
|
||||||
|
1. Add `/sso/debug/callback` as a redirect URL in your SSO provider
|
||||||
|
|
||||||
|
In your SSO provider's settings, add the following URL as a new redirect (callback) URL:
|
||||||
|
|
||||||
|
```bash showLineNumbers title="Redirect URL"
|
||||||
|
http://<proxy_base_url>/sso/debug/callback
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
2. Navigate to the debug login page on your browser
|
||||||
|
|
||||||
|
Navigate to the following URL on your browser:
|
||||||
|
|
||||||
|
```bash showLineNumbers title="URL to navigate to"
|
||||||
|
https://<proxy_base_url>/sso/debug/login
|
||||||
|
```
|
||||||
|
|
||||||
|
This will initiate the standard SSO flow. You will be redirected to your SSO provider's login screen, and after successful authentication, you will be redirected back to LiteLLM's debug callback route.
|
||||||
|
|
||||||
|
|
||||||
|
3. View the JWT fields
|
||||||
|
|
||||||
|
Once redirected, you should see a page called "SSO Debug Information". This page displays the JWT fields received from your SSO provider (as shown in the image above)
|
||||||
|
|
||||||
|
|
||||||
## Advanced
|
## Advanced
|
||||||
### Setting custom logout URLs
|
### Setting custom logout URLs
|
||||||
|
|
||||||
|
@ -196,40 +279,6 @@ This budget does not apply to keys created under non-default teams.
|
||||||
|
|
||||||
[**Go Here**](./team_budgets.md)
|
[**Go Here**](./team_budgets.md)
|
||||||
|
|
||||||
### Auto-add SSO users to teams
|
|
||||||
|
|
||||||
1. Specify the JWT field that contains the team ids, that the user belongs to.
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
general_settings:
|
|
||||||
master_key: sk-1234
|
|
||||||
litellm_jwtauth:
|
|
||||||
team_ids_jwt_field: "groups" # 👈 CAN BE ANY FIELD
|
|
||||||
```
|
|
||||||
|
|
||||||
This is assuming your SSO token looks like this:
|
|
||||||
```
|
|
||||||
{
|
|
||||||
...,
|
|
||||||
"groups": ["team_id_1", "team_id_2"]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Create the teams on LiteLLM
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST '<PROXY_BASE_URL>/team/new' \
|
|
||||||
-H 'Authorization: Bearer <PROXY_MASTER_KEY>' \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-D '{
|
|
||||||
"team_alias": "team_1",
|
|
||||||
"team_id": "team_id_1" # 👈 MUST BE THE SAME AS THE SSO GROUP ID
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test the SSO flow
|
|
||||||
|
|
||||||
Here's a walkthrough of [how it works](https://www.loom.com/share/8959be458edf41fd85937452c29a33f3?sid=7ebd6d37-569a-4023-866e-e0cde67cb23e)
|
|
||||||
|
|
||||||
### Restrict Users from creating personal keys
|
### Restrict Users from creating personal keys
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ response = completion(
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "user", "content": "What is the capital of France?"},
|
{"role": "user", "content": "What is the capital of France?"},
|
||||||
],
|
],
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024} # 👈 REQUIRED FOR ANTHROPIC models (on `anthropic/`, `bedrock/`, `vertexai/`)
|
reasoning_effort="low",
|
||||||
)
|
)
|
||||||
print(response.choices[0].message.content)
|
print(response.choices[0].message.content)
|
||||||
```
|
```
|
||||||
|
@ -68,7 +68,7 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
"content": "What is the capital of France?"
|
"content": "What is the capital of France?"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
"reasoning_effort": "low"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
@ -150,7 +150,7 @@ response = litellm.completion(
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
tool_choice="auto", # auto is default, but we'll be explicit
|
tool_choice="auto", # auto is default, but we'll be explicit
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
reasoning_effort="low",
|
||||||
)
|
)
|
||||||
print("Response\n", response)
|
print("Response\n", response)
|
||||||
response_message = response.choices[0].message
|
response_message = response.choices[0].message
|
||||||
|
@ -198,9 +198,9 @@ if tool_calls:
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
seed=22,
|
seed=22,
|
||||||
|
reasoning_effort="low",
|
||||||
# tools=tools,
|
# tools=tools,
|
||||||
drop_params=True,
|
drop_params=True,
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
|
||||||
) # get a new response from the model where it can see the function response
|
) # get a new response from the model where it can see the function response
|
||||||
print("second response\n", second_response)
|
print("second response\n", second_response)
|
||||||
```
|
```
|
||||||
|
@ -340,7 +340,7 @@ litellm.drop_params = True # 👈 EITHER GLOBALLY or per request
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="anthropic/claude-3-7-sonnet-20250219",
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
reasoning_effort="low",
|
||||||
drop_params=True,
|
drop_params=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -348,7 +348,7 @@ response = litellm.completion(
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model="deepseek/deepseek-chat",
|
model="deepseek/deepseek-chat",
|
||||||
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
thinking={"type": "enabled", "budget_tokens": 1024},
|
reasoning_effort="low",
|
||||||
drop_params=True,
|
drop_params=True,
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
@ -364,3 +364,36 @@ These fields can be accessed via `response.choices[0].message.reasoning_content`
|
||||||
- `thinking` - str: The thinking from the model.
|
- `thinking` - str: The thinking from the model.
|
||||||
- `signature` - str: The signature delta from the model.
|
- `signature` - str: The signature delta from the model.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Pass `thinking` to Anthropic models
|
||||||
|
|
||||||
|
You can also pass the `thinking` parameter to Anthropic models.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = litellm.completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
|
@ -188,7 +188,13 @@ Currently implemented for:
|
||||||
- OpenAI (if OPENAI_API_KEY is set)
|
- OpenAI (if OPENAI_API_KEY is set)
|
||||||
- Fireworks AI (if FIREWORKS_AI_API_KEY is set)
|
- Fireworks AI (if FIREWORKS_AI_API_KEY is set)
|
||||||
- LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
|
- LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
|
||||||
|
- Gemini (if GEMINI_API_KEY is set)
|
||||||
|
- XAI (if XAI_API_KEY is set)
|
||||||
|
- Anthropic (if ANTHROPIC_API_KEY is set)
|
||||||
|
|
||||||
|
You can also specify a custom provider to check:
|
||||||
|
|
||||||
|
**All providers**:
|
||||||
```python
|
```python
|
||||||
from litellm import get_valid_models
|
from litellm import get_valid_models
|
||||||
|
|
||||||
|
@ -196,6 +202,14 @@ valid_models = get_valid_models(check_provider_endpoint=True)
|
||||||
print(valid_models)
|
print(valid_models)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Specific provider**:
|
||||||
|
```python
|
||||||
|
from litellm import get_valid_models
|
||||||
|
|
||||||
|
valid_models = get_valid_models(check_provider_endpoint=True, custom_llm_provider="openai")
|
||||||
|
print(valid_models)
|
||||||
|
```
|
||||||
|
|
||||||
### `validate_environment(model: str)`
|
### `validate_environment(model: str)`
|
||||||
|
|
||||||
This helper tells you if you have all the required environment variables for a model, and if not - what's missing.
|
This helper tells you if you have all the required environment variables for a model, and if not - what's missing.
|
||||||
|
|
|
@ -98,6 +98,5 @@ On the models dropdown select `thinking-anthropic-claude-3-7-sonnet`
|
||||||
|
|
||||||
<Image img={require('../../img/litellm_thinking_openweb.gif')} />
|
<Image img={require('../../img/litellm_thinking_openweb.gif')} />
|
||||||
|
|
||||||
|
## Additional Resources
|
||||||
|
- Running LiteLLM and OpenWebUI on Windows Localhost: A Comprehensive Guide [https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/](https://www.tanyongsheng.com/note/running-litellm-and-openwebui-on-windows-localhost-a-comprehensive-guide/)
|
||||||
|
|
BIN
docs/my-website/img/deadlock_fix_1.png
Normal file
After Width: | Height: | Size: 60 KiB |
BIN
docs/my-website/img/deadlock_fix_2.png
Normal file
After Width: | Height: | Size: 70 KiB |
BIN
docs/my-website/img/debug_sso.png
Normal file
After Width: | Height: | Size: 167 KiB |
BIN
docs/my-website/img/enterprise_vs_oss.png
Normal file
After Width: | Height: | Size: 61 KiB |
BIN
docs/my-website/img/hf_filter_inference_providers.png
Normal file
After Width: | Height: | Size: 120 KiB |
BIN
docs/my-website/img/mcp_2.png
Normal file
After Width: | Height: | Size: 133 KiB |
BIN
docs/my-website/img/mcp_ui.png
Normal file
After Width: | Height: | Size: 93 KiB |
BIN
docs/my-website/img/prevent_deadlocks.jpg
Normal file
After Width: | Height: | Size: 325 KiB |
BIN
docs/my-website/img/release_notes/mcp_ui.png
Normal file
After Width: | Height: | Size: 237 KiB |
BIN
docs/my-website/img/release_notes/new_activity_tab.png
Normal file
After Width: | Height: | Size: 326 KiB |
BIN
docs/my-website/img/release_notes/spend_by_model.jpg
Normal file
After Width: | Height: | Size: 488 KiB |
BIN
docs/my-website/img/release_notes/team_model_add.png
Normal file
After Width: | Height: | Size: 70 KiB |
BIN
docs/my-website/img/release_notes/ui_usage.png
Normal file
After Width: | Height: | Size: 66 KiB |
7
docs/my-website/package-lock.json
generated
|
@ -12559,9 +12559,10 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/image-size": {
|
"node_modules/image-size": {
|
||||||
"version": "1.1.1",
|
"version": "1.2.1",
|
||||||
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/image-size/-/image-size-1.2.1.tgz",
|
||||||
"integrity": "sha512-541xKlUw6jr/6gGuk92F+mYM5zaFAc5ahphvkqvNe2bQ6gVBkd6bfrmVJ2t4KDAfikAYZyIqTnktX3i6/aQDrQ==",
|
"integrity": "sha512-rH+46sQJ2dlwfjfhCyNx5thzrv+dtmBIhPHk0zgRUukHzZ/kRueTJXoYYsclBaKcSMBWuGbOFXtioLpzTb5euw==",
|
||||||
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"queue": "6.0.2"
|
"queue": "6.0.2"
|
||||||
},
|
},
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
@ -24,6 +24,7 @@ This release brings:
|
||||||
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
|
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
|
||||||
- Perf improvements for Usage-based Routing
|
- Perf improvements for Usage-based Routing
|
||||||
- Streaming guardrail support via websockets
|
- Streaming guardrail support via websockets
|
||||||
|
- Azure OpenAI client perf fix (from previous release)
|
||||||
|
|
||||||
## Docker Run LiteLLM Proxy
|
## Docker Run LiteLLM Proxy
|
||||||
|
|
||||||
|
@ -31,7 +32,7 @@ This release brings:
|
||||||
docker run
|
docker run
|
||||||
-e STORE_MODEL_IN_DB=True
|
-e STORE_MODEL_IN_DB=True
|
||||||
-p 4000:4000
|
-p 4000:4000
|
||||||
ghcr.io/berriai/litellm:main-v1.63.14-stable
|
ghcr.io/berriai/litellm:main-v1.63.14-stable.patch1
|
||||||
```
|
```
|
||||||
|
|
||||||
## Demo Instance
|
## Demo Instance
|
||||||
|
|
|
@ -6,7 +6,7 @@ authors:
|
||||||
- name: Krrish Dholakia
|
- name: Krrish Dholakia
|
||||||
title: CEO, LiteLLM
|
title: CEO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/krish-d/
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
- name: Ishaan Jaffer
|
- name: Ishaan Jaffer
|
||||||
title: CTO, LiteLLM
|
title: CTO, LiteLLM
|
||||||
url: https://www.linkedin.com/in/reffajnaahsi/
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
|
160
docs/my-website/release_notes/v1.65.0-stable/index.md
Normal file
|
@ -0,0 +1,160 @@
|
||||||
|
---
|
||||||
|
title: v1.65.0-stable - Model Context Protocol
|
||||||
|
slug: v1.65.0-stable
|
||||||
|
date: 2025-03-30T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||||
|
tags: [mcp, custom_prompt_management]
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
v1.65.0-stable is live now. Here are the key highlights of this release:
|
||||||
|
- **MCP Support**: Support for adding and using MCP servers on the LiteLLM proxy.
|
||||||
|
- **UI view total usage after 1M+ logs**: You can now view usage analytics after crossing 1M+ logs in DB.
|
||||||
|
|
||||||
|
## Model Context Protocol (MCP)
|
||||||
|
|
||||||
|
This release introduces support for centrally adding MCP servers on LiteLLM. This allows you to add MCP server endpoints and your developers can `list` and `call` MCP tools through LiteLLM.
|
||||||
|
|
||||||
|
Read more about MCP [here](https://docs.litellm.ai/docs/mcp).
|
||||||
|
|
||||||
|
<Image
|
||||||
|
img={require('../../img/release_notes/mcp_ui.png')}
|
||||||
|
style={{width: '100%', display: 'block', margin: '2rem auto'}}
|
||||||
|
/>
|
||||||
|
<p style={{textAlign: 'left', color: '#666'}}>
|
||||||
|
Expose and use MCP servers through LiteLLM
|
||||||
|
</p>
|
||||||
|
|
||||||
|
## UI view total usage after 1M+ logs
|
||||||
|
|
||||||
|
This release brings the ability to view total usage analytics even after exceeding 1M+ logs in your database. We've implemented a scalable architecture that stores only aggregate usage data, resulting in significantly more efficient queries and reduced database CPU utilization.
|
||||||
|
|
||||||
|
|
||||||
|
<Image
|
||||||
|
img={require('../../img/release_notes/ui_usage.png')}
|
||||||
|
style={{width: '100%', display: 'block', margin: '2rem auto'}}
|
||||||
|
/>
|
||||||
|
<p style={{textAlign: 'left', color: '#666'}}>
|
||||||
|
View total usage after 1M+ logs
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
- How this works:
|
||||||
|
- We now aggregate usage data into a dedicated DailyUserSpend table, significantly reducing query load and CPU usage even beyond 1M+ logs.
|
||||||
|
|
||||||
|
- Daily Spend Breakdown API:
|
||||||
|
|
||||||
|
- Retrieve granular daily usage data (by model, provider, and API key) with a single endpoint.
|
||||||
|
Example Request:
|
||||||
|
|
||||||
|
```shell title="Daily Spend Breakdown API" showLineNumbers
|
||||||
|
curl -L -X GET 'http://localhost:4000/user/daily/activity?start_date=2025-03-20&end_date=2025-03-27' \
|
||||||
|
-H 'Authorization: Bearer sk-...'
|
||||||
|
```
|
||||||
|
|
||||||
|
```json title="Daily Spend Breakdown API Response" showLineNumbers
|
||||||
|
{
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"date": "2025-03-27",
|
||||||
|
"metrics": {
|
||||||
|
"spend": 0.0177072,
|
||||||
|
"prompt_tokens": 111,
|
||||||
|
"completion_tokens": 1711,
|
||||||
|
"total_tokens": 1822,
|
||||||
|
"api_requests": 11
|
||||||
|
},
|
||||||
|
"breakdown": {
|
||||||
|
"models": {
|
||||||
|
"gpt-4o-mini": {
|
||||||
|
"spend": 1.095e-05,
|
||||||
|
"prompt_tokens": 37,
|
||||||
|
"completion_tokens": 9,
|
||||||
|
"total_tokens": 46,
|
||||||
|
"api_requests": 1
|
||||||
|
},
|
||||||
|
"providers": { "openai": { ... }, "azure_ai": { ... } },
|
||||||
|
"api_keys": { "3126b6eaf1...": { ... } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"total_spend": 0.7274667,
|
||||||
|
"total_prompt_tokens": 280990,
|
||||||
|
"total_completion_tokens": 376674,
|
||||||
|
"total_api_requests": 14
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## New Models / Updated Models
|
||||||
|
- Support for Vertex AI gemini-2.0-flash-lite & Google AI Studio gemini-2.0-flash-lite [PR](https://github.com/BerriAI/litellm/pull/9523)
|
||||||
|
- Support for Vertex AI Fine-Tuned LLMs [PR](https://github.com/BerriAI/litellm/pull/9542)
|
||||||
|
- Nova Canvas image generation support [PR](https://github.com/BerriAI/litellm/pull/9525)
|
||||||
|
- OpenAI gpt-4o-transcribe support [PR](https://github.com/BerriAI/litellm/pull/9517)
|
||||||
|
- Added new Vertex AI text embedding model [PR](https://github.com/BerriAI/litellm/pull/9476)
|
||||||
|
|
||||||
|
## LLM Translation
|
||||||
|
- OpenAI Web Search Tool Call Support [PR](https://github.com/BerriAI/litellm/pull/9465)
|
||||||
|
- Vertex AI topLogprobs support [PR](https://github.com/BerriAI/litellm/pull/9518)
|
||||||
|
- Support for sending images and video to Vertex AI multimodal embedding [Doc](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings)
|
||||||
|
- Support litellm.api_base for Vertex AI + Gemini across completion, embedding, image_generation [PR](https://github.com/BerriAI/litellm/pull/9516)
|
||||||
|
- Bug fix for returning `response_cost` when using litellm python SDK with LiteLLM Proxy [PR](https://github.com/BerriAI/litellm/commit/6fd18651d129d606182ff4b980e95768fc43ca3d)
|
||||||
|
- Support for `max_completion_tokens` on Mistral API [PR](https://github.com/BerriAI/litellm/pull/9606)
|
||||||
|
- Refactored Vertex AI passthrough routes - fixes unpredictable behaviour with auto-setting default_vertex_region on router model add [PR](https://github.com/BerriAI/litellm/pull/9467)
|
||||||
|
|
||||||
|
## Spend Tracking Improvements
|
||||||
|
- Log 'api_base' on spend logs [PR](https://github.com/BerriAI/litellm/pull/9509)
|
||||||
|
- Support for Gemini audio token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
|
||||||
|
- Fixed OpenAI audio input token cost tracking [PR](https://github.com/BerriAI/litellm/pull/9535)
|
||||||
|
|
||||||
|
## UI
|
||||||
|
|
||||||
|
### Model Management
|
||||||
|
- Allowed team admins to add/update/delete models on UI [PR](https://github.com/BerriAI/litellm/pull/9572)
|
||||||
|
- Added render supports_web_search on model hub [PR](https://github.com/BerriAI/litellm/pull/9469)
|
||||||
|
|
||||||
|
### Request Logs
|
||||||
|
- Show API base and model ID on request logs [PR](https://github.com/BerriAI/litellm/pull/9572)
|
||||||
|
- Allow viewing keyinfo on request logs [PR](https://github.com/BerriAI/litellm/pull/9568)
|
||||||
|
|
||||||
|
### Usage Tab
|
||||||
|
- Added Daily User Spend Aggregate view - allows UI Usage tab to work > 1m rows [PR](https://github.com/BerriAI/litellm/pull/9538)
|
||||||
|
- Connected UI to "LiteLLM_DailyUserSpend" spend table [PR](https://github.com/BerriAI/litellm/pull/9603)
|
||||||
|
|
||||||
|
## Logging Integrations
|
||||||
|
- Fixed StandardLoggingPayload for GCS Pub Sub Logging Integration [PR](https://github.com/BerriAI/litellm/pull/9508)
|
||||||
|
- Track `litellm_model_name` on `StandardLoggingPayload` [Docs](https://docs.litellm.ai/docs/proxy/logging_spec#standardlogginghiddenparams)
|
||||||
|
|
||||||
|
## Performance / Reliability Improvements
|
||||||
|
- LiteLLM Redis semantic caching implementation [PR](https://github.com/BerriAI/litellm/pull/9356)
|
||||||
|
- Gracefully handle exceptions when DB is having an outage [PR](https://github.com/BerriAI/litellm/pull/9533)
|
||||||
|
- Allow Pods to startup + passing /health/readiness when allow_requests_on_db_unavailable: True and DB is down [PR](https://github.com/BerriAI/litellm/pull/9569)
|
||||||
|
|
||||||
|
|
||||||
|
## General Improvements
|
||||||
|
- Support for exposing MCP tools on litellm proxy [PR](https://github.com/BerriAI/litellm/pull/9426)
|
||||||
|
- Support discovering Gemini, Anthropic, xAI models by calling their /v1/model endpoint [PR](https://github.com/BerriAI/litellm/pull/9530)
|
||||||
|
- Fixed route check for non-proxy admins on JWT auth [PR](https://github.com/BerriAI/litellm/pull/9454)
|
||||||
|
- Added baseline Prisma database migrations [PR](https://github.com/BerriAI/litellm/pull/9565)
|
||||||
|
- View all wildcard models on /model/info [PR](https://github.com/BerriAI/litellm/pull/9572)
|
||||||
|
|
||||||
|
|
||||||
|
## Security
|
||||||
|
- Bumped next from 14.2.21 to 14.2.25 in UI dashboard [PR](https://github.com/BerriAI/litellm/pull/9458)
|
||||||
|
|
||||||
|
## Complete Git Diff
|
||||||
|
|
||||||
|
[Here's the complete git diff](https://github.com/BerriAI/litellm/compare/v1.63.14-stable.patch1...v1.65.0-stable)
|
34
docs/my-website/release_notes/v1.65.0/index.md
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
---
|
||||||
|
title: v1.65.0 - Team Model Add - update
|
||||||
|
slug: v1.65.0
|
||||||
|
date: 2025-03-28T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||||
|
tags: [management endpoints, team models, ui]
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
v1.65.0 updates the `/model/new` endpoint to prevent non-team admins from creating team models.
|
||||||
|
|
||||||
|
This means that only proxy admins or team admins can create team models.
|
||||||
|
|
||||||
|
## Additional Changes
|
||||||
|
|
||||||
|
- Allows team admins to call `/model/update` to update team models.
|
||||||
|
- Allows team admins to call `/model/delete` to delete team models.
|
||||||
|
- Introduces new `user_models_only` param to `/v2/model/info` - only return models added by this user.
|
||||||
|
|
||||||
|
|
||||||
|
These changes enable team admins to add and manage models for their team on the LiteLLM UI + API.
|
||||||
|
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/team_model_add.png')} />
|
176
docs/my-website/release_notes/v1.65.4-stable/index.md
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
---
|
||||||
|
title: v1.65.4-stable
|
||||||
|
slug: v1.65.4-stable
|
||||||
|
date: 2025-04-05T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1749686400&v=beta&t=Hkl3U8Ps0VtvNxX0BNNq24b4dtX5wQaPFp6oiKCIHD8
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||||
|
|
||||||
|
tags: []
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
## Deploy this version
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="docker" label="Docker">
|
||||||
|
|
||||||
|
``` showLineNumbers title="docker run litellm"
|
||||||
|
docker run
|
||||||
|
-e STORE_MODEL_IN_DB=True
|
||||||
|
-p 4000:4000
|
||||||
|
ghcr.io/berriai/litellm:main-v1.65.4-stable
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="pip" label="Pip">
|
||||||
|
|
||||||
|
``` showLineNumbers title="pip install litellm"
|
||||||
|
pip install litellm==1.65.4.post1
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
v1.65.4-stable is live. Here are the improvements since v1.65.0-stable.
|
||||||
|
|
||||||
|
## Key Highlights
|
||||||
|
- **Preventing DB Deadlocks**: Fixes a high-traffic issue when multiple instances were writing to the DB at the same time.
|
||||||
|
- **New Usage Tab**: Enables viewing spend by model and customizing date range
|
||||||
|
|
||||||
|
Let's dive in.
|
||||||
|
|
||||||
|
### Preventing DB Deadlocks
|
||||||
|
|
||||||
|
<Image img={require('../../img/prevent_deadlocks.jpg')} />
|
||||||
|
|
||||||
|
This release fixes the DB deadlocking issue that users faced in high traffic (10K+ RPS). This is great because it enables user/key/team spend tracking works at that scale.
|
||||||
|
|
||||||
|
Read more about the new architecture [here](https://docs.litellm.ai/docs/proxy/db_deadlocks)
|
||||||
|
|
||||||
|
|
||||||
|
### New Usage Tab
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/spend_by_model.jpg')} />
|
||||||
|
|
||||||
|
The new Usage tab now brings the ability to track daily spend by model. This makes it easier to catch any spend tracking or token counting errors, when combined with the ability to view successful requests, and token usage.
|
||||||
|
|
||||||
|
To test this out, just go to Experimental > New Usage > Activity.
|
||||||
|
|
||||||
|
|
||||||
|
## New Models / Updated Models
|
||||||
|
|
||||||
|
1. Databricks - claude-3-7-sonnet cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L10350)
|
||||||
|
2. VertexAI - `gemini-2.5-pro-exp-03-25` cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4492)
|
||||||
|
3. VertexAI - `gemini-2.0-flash` cost tracking [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4689)
|
||||||
|
4. Groq - add whisper ASR models to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L3324)
|
||||||
|
5. IBM - Add watsonx/ibm/granite-3-8b-instruct to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L91)
|
||||||
|
6. Google AI Studio - add gemini/gemini-2.5-pro-preview-03-25 to model cost map [PR](https://github.com/BerriAI/litellm/blob/52b35cd8093b9ad833987b24f494586a1e923209/model_prices_and_context_window.json#L4850)
|
||||||
|
|
||||||
|
## LLM Translation
|
||||||
|
1. Vertex AI - Support anyOf param for OpenAI json schema translation [Get Started](https://docs.litellm.ai/docs/providers/vertex#json-schema)
|
||||||
|
2. Anthropic- response_format + thinking param support (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/reasoning_content)
|
||||||
|
3. Anthropic - if thinking token is specified and max tokens is not - ensure max token to anthropic is higher than thinking tokens (works across Anthropic API, Bedrock, Vertex) [PR](https://github.com/BerriAI/litellm/pull/9594)
|
||||||
|
4. Bedrock - latency optimized inference support [Get Started](https://docs.litellm.ai/docs/providers/bedrock#usage---latency-optimized-inference)
|
||||||
|
5. Sagemaker - handle special tokens + multibyte character code in response [Get Started](https://docs.litellm.ai/docs/providers/aws_sagemaker)
|
||||||
|
6. MCP - add support for using SSE MCP servers [Get Started](https://docs.litellm.ai/docs/mcp#usage)
|
||||||
|
8. Anthropic - new `litellm.messages.create` interface for calling Anthropic `/v1/messages` via passthrough [Get Started](https://docs.litellm.ai/docs/anthropic_unified#usage)
|
||||||
|
11. Anthropic - support ‘file’ content type in message param (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/providers/anthropic#usage---pdf)
|
||||||
|
12. Anthropic - map openai 'reasoning_effort' to anthropic 'thinking' param (works across Anthropic API, Bedrock, Vertex) [Get Started](https://docs.litellm.ai/docs/providers/anthropic#usage---thinking--reasoning_content)
|
||||||
|
13. Google AI Studio (Gemini) - [BETA] `/v1/files` upload support [Get Started](../../docs/providers/google_ai_studio/files)
|
||||||
|
14. Azure - fix o-series tool calling [Get Started](../../docs/providers/azure#tool-calling--function-calling)
|
||||||
|
15. Unified file id - [ALPHA] allow calling multiple providers with same file id [PR](https://github.com/BerriAI/litellm/pull/9718)
|
||||||
|
- This is experimental, and not recommended for production use.
|
||||||
|
- We plan to have a production-ready implementation by next week.
|
||||||
|
16. Google AI Studio (Gemini) - return logprobs [PR](https://github.com/BerriAI/litellm/pull/9713)
|
||||||
|
17. Anthropic - Support prompt caching for Anthropic tool calls [Get Started](https://docs.litellm.ai/docs/completion/prompt_caching)
|
||||||
|
18. OpenRouter - unwrap extra body on open router calls [PR](https://github.com/BerriAI/litellm/pull/9747)
|
||||||
|
19. VertexAI - fix credential caching issue [PR](https://github.com/BerriAI/litellm/pull/9756)
|
||||||
|
20. XAI - filter out 'name' param for XAI [PR](https://github.com/BerriAI/litellm/pull/9761)
|
||||||
|
21. Gemini - image generation output support [Get Started](../../docs/providers/gemini#image-generation)
|
||||||
|
22. Databricks - support claude-3-7-sonnet w/ thinking + response_format [Get Started](../../docs/providers/databricks#usage---thinking--reasoning_content)
|
||||||
|
|
||||||
|
## Spend Tracking Improvements
|
||||||
|
1. Reliability fix - Check sent and received model for cost calculation [PR](https://github.com/BerriAI/litellm/pull/9669)
|
||||||
|
2. Vertex AI - Multimodal embedding cost tracking [Get Started](https://docs.litellm.ai/docs/providers/vertex#multi-modal-embeddings), [PR](https://github.com/BerriAI/litellm/pull/9623)
|
||||||
|
|
||||||
|
## Management Endpoints / UI
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/new_activity_tab.png')} />
|
||||||
|
|
||||||
|
1. New Usage Tab
|
||||||
|
- Report 'total_tokens' + report success/failure calls
|
||||||
|
- Remove double bars on scroll
|
||||||
|
- Ensure ‘daily spend’ chart ordered from earliest to latest date
|
||||||
|
- showing spend per model per day
|
||||||
|
- show key alias on usage tab
|
||||||
|
- Allow non-admins to view their activity
|
||||||
|
- Add date picker to new usage tab
|
||||||
|
2. Virtual Keys Tab
|
||||||
|
- remove 'default key' on user signup
|
||||||
|
- fix showing user models available for personal key creation
|
||||||
|
3. Test Key Tab
|
||||||
|
- Allow testing image generation models
|
||||||
|
4. Models Tab
|
||||||
|
- Fix bulk adding models
|
||||||
|
- support reusable credentials for passthrough endpoints
|
||||||
|
- Allow team members to see team models
|
||||||
|
5. Teams Tab
|
||||||
|
- Fix json serialization error on update team metadata
|
||||||
|
6. Request Logs Tab
|
||||||
|
- Add reasoning_content token tracking across all providers on streaming
|
||||||
|
7. API
|
||||||
|
- return key alias on /user/daily/activity [Get Started](../../docs/proxy/cost_tracking#daily-spend-breakdown-api)
|
||||||
|
8. SSO
|
||||||
|
- Allow assigning SSO users to teams on MSFT SSO [PR](https://github.com/BerriAI/litellm/pull/9745)
|
||||||
|
|
||||||
|
## Logging / Guardrail Integrations
|
||||||
|
|
||||||
|
1. Console Logs - Add json formatting for uncaught exceptions [PR](https://github.com/BerriAI/litellm/pull/9619)
|
||||||
|
2. Guardrails - AIM Guardrails support for virtual key based policies [Get Started](../../docs/proxy/guardrails/aim_security)
|
||||||
|
3. Logging - fix completion start time tracking [PR](https://github.com/BerriAI/litellm/pull/9688)
|
||||||
|
4. Prometheus
|
||||||
|
- Allow adding authentication on Prometheus /metrics endpoints [PR](https://github.com/BerriAI/litellm/pull/9766)
|
||||||
|
- Distinguish LLM Provider Exception vs. LiteLLM Exception in metric naming [PR](https://github.com/BerriAI/litellm/pull/9760)
|
||||||
|
- Emit operational metrics for new DB Transaction architecture [PR](https://github.com/BerriAI/litellm/pull/9719)
|
||||||
|
|
||||||
|
## Performance / Loadbalancing / Reliability improvements
|
||||||
|
1. Preventing Deadlocks
|
||||||
|
- Reduce DB Deadlocks by storing spend updates in Redis and then committing to DB [PR](https://github.com/BerriAI/litellm/pull/9608)
|
||||||
|
- Ensure no deadlocks occur when updating DailyUserSpendTransaction [PR](https://github.com/BerriAI/litellm/pull/9690)
|
||||||
|
- High Traffic fix - ensure new DB + Redis architecture accurately tracks spend [PR](https://github.com/BerriAI/litellm/pull/9673)
|
||||||
|
- Use Redis for PodLock Manager instead of PG (ensures no deadlocks occur) [PR](https://github.com/BerriAI/litellm/pull/9715)
|
||||||
|
- v2 DB Deadlock Reduction Architecture – Add Max Size for In-Memory Queue + Backpressure Mechanism [PR](https://github.com/BerriAI/litellm/pull/9759)
|
||||||
|
|
||||||
|
2. Prisma Migrations [Get Started](../../docs/proxy/prod#9-use-prisma-migrate-deploy)
|
||||||
|
- connects litellm proxy to litellm's prisma migration files
|
||||||
|
- Handle db schema updates from new `litellm-proxy-extras` sdk
|
||||||
|
3. Redis - support password for sync sentinel clients [PR](https://github.com/BerriAI/litellm/pull/9622)
|
||||||
|
4. Fix "Circular reference detected" error when max_parallel_requests = 0 [PR](https://github.com/BerriAI/litellm/pull/9671)
|
||||||
|
5. Code QA - Ban hardcoded numbers [PR](https://github.com/BerriAI/litellm/pull/9709)
|
||||||
|
|
||||||
|
## Helm
|
||||||
|
1. fix: wrong indentation of ttlSecondsAfterFinished in chart [PR](https://github.com/BerriAI/litellm/pull/9611)
|
||||||
|
|
||||||
|
## General Proxy Improvements
|
||||||
|
1. Fix - only apply service_account_settings.enforced_params on service accounts [PR](https://github.com/BerriAI/litellm/pull/9683)
|
||||||
|
2. Fix - handle metadata null on `/chat/completion` [PR](https://github.com/BerriAI/litellm/issues/9717)
|
||||||
|
3. Fix - Move daily user transaction logging outside of 'disable_spend_logs' flag, as they’re unrelated [PR](https://github.com/BerriAI/litellm/pull/9772)
|
||||||
|
|
||||||
|
## Demo
|
||||||
|
|
||||||
|
Try this on the demo instance [today](https://docs.litellm.ai/docs/proxy/demo)
|
||||||
|
|
||||||
|
## Complete Git Diff
|
||||||
|
|
||||||
|
See the complete git diff since v1.65.0-stable, [here](https://github.com/BerriAI/litellm/releases/tag/v1.65.4-stable)
|
||||||
|
|
|
@ -53,7 +53,7 @@ const sidebars = {
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Architecture",
|
label: "Architecture",
|
||||||
items: ["proxy/architecture", "proxy/db_info", "router_architecture", "proxy/user_management_heirarchy", "proxy/jwt_auth_arch", "proxy/image_handling"],
|
items: ["proxy/architecture", "proxy/db_info", "proxy/db_deadlocks", "router_architecture", "proxy/user_management_heirarchy", "proxy/jwt_auth_arch", "proxy/image_handling"],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
type: "link",
|
type: "link",
|
||||||
|
@ -137,6 +137,7 @@ const sidebars = {
|
||||||
label: "[Beta] Guardrails",
|
label: "[Beta] Guardrails",
|
||||||
items: [
|
items: [
|
||||||
"proxy/guardrails/quick_start",
|
"proxy/guardrails/quick_start",
|
||||||
|
...[
|
||||||
"proxy/guardrails/aim_security",
|
"proxy/guardrails/aim_security",
|
||||||
"proxy/guardrails/aporia_api",
|
"proxy/guardrails/aporia_api",
|
||||||
"proxy/guardrails/bedrock",
|
"proxy/guardrails/bedrock",
|
||||||
|
@ -145,7 +146,8 @@ const sidebars = {
|
||||||
"proxy/guardrails/pii_masking_v2",
|
"proxy/guardrails/pii_masking_v2",
|
||||||
"proxy/guardrails/secret_detection",
|
"proxy/guardrails/secret_detection",
|
||||||
"proxy/guardrails/custom_guardrail",
|
"proxy/guardrails/custom_guardrail",
|
||||||
"prompt_injection"
|
"proxy/guardrails/prompt_injection",
|
||||||
|
].sort(),
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -186,7 +188,15 @@ const sidebars = {
|
||||||
"providers/azure_ai",
|
"providers/azure_ai",
|
||||||
"providers/aiml",
|
"providers/aiml",
|
||||||
"providers/vertex",
|
"providers/vertex",
|
||||||
|
|
||||||
|
{
|
||||||
|
type: "category",
|
||||||
|
label: "Google AI Studio",
|
||||||
|
items: [
|
||||||
"providers/gemini",
|
"providers/gemini",
|
||||||
|
"providers/google_ai_studio/files",
|
||||||
|
]
|
||||||
|
},
|
||||||
"providers/anthropic",
|
"providers/anthropic",
|
||||||
"providers/aws_sagemaker",
|
"providers/aws_sagemaker",
|
||||||
"providers/bedrock",
|
"providers/bedrock",
|
||||||
|
|
161
docs/my-website/src/components/TransformRequestPlayground.tsx
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
import React, { useState } from 'react';
|
||||||
|
import styles from './transform_request.module.css';
|
||||||
|
|
||||||
|
const DEFAULT_REQUEST = {
|
||||||
|
"model": "bedrock/gpt-4",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful assistant."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Explain quantum computing in simple terms"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 500,
|
||||||
|
"stream": true
|
||||||
|
};
|
||||||
|
|
||||||
|
type ViewMode = 'split' | 'request' | 'transformed';
|
||||||
|
|
||||||
|
const TransformRequestPlayground: React.FC = () => {
|
||||||
|
const [request, setRequest] = useState(JSON.stringify(DEFAULT_REQUEST, null, 2));
|
||||||
|
const [transformedRequest, setTransformedRequest] = useState('');
|
||||||
|
const [viewMode, setViewMode] = useState<ViewMode>('split');
|
||||||
|
|
||||||
|
const handleTransform = async () => {
|
||||||
|
try {
|
||||||
|
// Here you would make the actual API call to transform the request
|
||||||
|
// For now, we'll just set a sample response
|
||||||
|
const sampleResponse = `curl -X POST \\
|
||||||
|
https://api.openai.com/v1/chat/completions \\
|
||||||
|
-H 'Authorization: Bearer sk-xxx' \\
|
||||||
|
-H 'Content-Type: application/json' \\
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You are a helpful assistant."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"temperature": 0.7
|
||||||
|
}'`;
|
||||||
|
setTransformedRequest(sampleResponse);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error transforming request:', error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleCopy = () => {
|
||||||
|
navigator.clipboard.writeText(transformedRequest);
|
||||||
|
};
|
||||||
|
|
||||||
|
const renderContent = () => {
|
||||||
|
switch (viewMode) {
|
||||||
|
case 'request':
|
||||||
|
return (
|
||||||
|
<div className={styles.panel}>
|
||||||
|
<div className={styles['panel-header']}>
|
||||||
|
<h2>Original Request</h2>
|
||||||
|
<p>The request you would send to LiteLLM /chat/completions endpoint.</p>
|
||||||
|
</div>
|
||||||
|
<textarea
|
||||||
|
className={styles['code-input']}
|
||||||
|
value={request}
|
||||||
|
onChange={(e) => setRequest(e.target.value)}
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
<div className={styles['panel-footer']}>
|
||||||
|
<button className={styles['transform-button']} onClick={handleTransform}>
|
||||||
|
Transform →
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
case 'transformed':
|
||||||
|
return (
|
||||||
|
<div className={styles.panel}>
|
||||||
|
<div className={styles['panel-header']}>
|
||||||
|
<h2>Transformed Request</h2>
|
||||||
|
<p>How LiteLLM transforms your request for the specified provider.</p>
|
||||||
|
<p className={styles.note}>Note: Sensitive headers are not shown.</p>
|
||||||
|
</div>
|
||||||
|
<div className={styles['code-output-container']}>
|
||||||
|
<pre className={styles['code-output']}>{transformedRequest}</pre>
|
||||||
|
<button className={styles['copy-button']} onClick={handleCopy}>
|
||||||
|
Copy
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
default:
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<div className={styles.panel}>
|
||||||
|
<div className={styles['panel-header']}>
|
||||||
|
<h2>Original Request</h2>
|
||||||
|
<p>The request you would send to LiteLLM /chat/completions endpoint.</p>
|
||||||
|
</div>
|
||||||
|
<textarea
|
||||||
|
className={styles['code-input']}
|
||||||
|
value={request}
|
||||||
|
onChange={(e) => setRequest(e.target.value)}
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
<div className={styles['panel-footer']}>
|
||||||
|
<button className={styles['transform-button']} onClick={handleTransform}>
|
||||||
|
Transform →
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className={styles.panel}>
|
||||||
|
<div className={styles['panel-header']}>
|
||||||
|
<h2>Transformed Request</h2>
|
||||||
|
<p>How LiteLLM transforms your request for the specified provider.</p>
|
||||||
|
<p className={styles.note}>Note: Sensitive headers are not shown.</p>
|
||||||
|
</div>
|
||||||
|
<div className={styles['code-output-container']}>
|
||||||
|
<pre className={styles['code-output']}>{transformedRequest}</pre>
|
||||||
|
<button className={styles['copy-button']} onClick={handleCopy}>
|
||||||
|
Copy
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className={styles['transform-playground']}>
|
||||||
|
<div className={styles['view-toggle']}>
|
||||||
|
<button
|
||||||
|
className={viewMode === 'split' ? styles.active : ''}
|
||||||
|
onClick={() => setViewMode('split')}
|
||||||
|
>
|
||||||
|
Split View
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className={viewMode === 'request' ? styles.active : ''}
|
||||||
|
onClick={() => setViewMode('request')}
|
||||||
|
>
|
||||||
|
Request
|
||||||
|
</button>
|
||||||
|
<button
|
||||||
|
className={viewMode === 'transformed' ? styles.active : ''}
|
||||||
|
onClick={() => setViewMode('transformed')}
|
||||||
|
>
|
||||||
|
Transformed
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div className={styles['playground-container']}>
|
||||||
|
{renderContent()}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
export default TransformRequestPlayground;
|
|
@ -444,9 +444,7 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
|
||||||
|
|
||||||
detected_secrets = []
|
detected_secrets = []
|
||||||
for file in secrets.files:
|
for file in secrets.files:
|
||||||
|
|
||||||
for found_secret in secrets[file]:
|
for found_secret in secrets[file]:
|
||||||
|
|
||||||
if found_secret.secret_value is None:
|
if found_secret.secret_value is None:
|
||||||
continue
|
continue
|
||||||
detected_secrets.append(
|
detected_secrets.append(
|
||||||
|
@ -471,14 +469,12 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
|
||||||
data: dict,
|
data: dict,
|
||||||
call_type: str, # "completion", "embeddings", "image_generation", "moderation"
|
call_type: str, # "completion", "embeddings", "image_generation", "moderation"
|
||||||
):
|
):
|
||||||
|
|
||||||
if await self.should_run_check(user_api_key_dict) is False:
|
if await self.should_run_check(user_api_key_dict) is False:
|
||||||
return
|
return
|
||||||
|
|
||||||
if "messages" in data and isinstance(data["messages"], list):
|
if "messages" in data and isinstance(data["messages"], list):
|
||||||
for message in data["messages"]:
|
for message in data["messages"]:
|
||||||
if "content" in message and isinstance(message["content"], str):
|
if "content" in message and isinstance(message["content"], str):
|
||||||
|
|
||||||
detected_secrets = self.scan_message_for_secrets(message["content"])
|
detected_secrets = self.scan_message_for_secrets(message["content"])
|
||||||
|
|
||||||
for secret in detected_secrets:
|
for secret in detected_secrets:
|
||||||
|
|
26
litellm-proxy-extras/LICENSE
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
Portions of this software are licensed as follows:
|
||||||
|
|
||||||
|
* All content that resides under the "enterprise/" directory of this repository, if that directory exists, is licensed under the license defined in "enterprise/LICENSE".
|
||||||
|
* Content outside of the above mentioned directories or restrictions above is available under the MIT license as defined below.
|
||||||
|
---
|
||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2023 Berri AI
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
21
litellm-proxy-extras/README.md
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
Additional files for the proxy. Reduces the size of the main litellm package.
|
||||||
|
|
||||||
|
Currently, only stores the migration.sql files for litellm-proxy.
|
||||||
|
|
||||||
|
To install, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install litellm-proxy-extras
|
||||||
|
```
|
||||||
|
OR
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install litellm[proxy] # installs litellm-proxy-extras and other proxy dependencies.
|
||||||
|
```
|
||||||
|
|
||||||
|
To use the migrations, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --use_prisma_migrate
|
||||||
|
```
|
||||||
|
|
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0-py3-none-any.whl
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.0.tar.gz
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1-py3-none-any.whl
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.1.tar.gz
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2-py3-none-any.whl
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.2.tar.gz
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.3-py3-none-any.whl
vendored
Normal file
BIN
litellm-proxy-extras/dist/litellm_proxy_extras-0.1.3.tar.gz
vendored
Normal file
0
litellm-proxy-extras/litellm_proxy_extras/__init__.py
Normal file
12
litellm-proxy-extras/litellm_proxy_extras/_logging.py
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
import logging
|
||||||
|
|
||||||
|
# Set up package logger
|
||||||
|
logger = logging.getLogger("litellm_proxy_extras")
|
||||||
|
if not logger.handlers: # Only add handler if none exists
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||||
|
)
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
logger.setLevel(logging.INFO)
|
|
@ -0,0 +1,360 @@
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_BudgetTable" (
|
||||||
|
"budget_id" TEXT NOT NULL,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"soft_budget" DOUBLE PRECISION,
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"model_max_budget" JSONB,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_BudgetTable_pkey" PRIMARY KEY ("budget_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_CredentialsTable" (
|
||||||
|
"credential_id" TEXT NOT NULL,
|
||||||
|
"credential_name" TEXT NOT NULL,
|
||||||
|
"credential_values" JSONB NOT NULL,
|
||||||
|
"credential_info" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_CredentialsTable_pkey" PRIMARY KEY ("credential_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ProxyModelTable" (
|
||||||
|
"model_id" TEXT NOT NULL,
|
||||||
|
"model_name" TEXT NOT NULL,
|
||||||
|
"litellm_params" JSONB NOT NULL,
|
||||||
|
"model_info" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ProxyModelTable_pkey" PRIMARY KEY ("model_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_OrganizationTable" (
|
||||||
|
"organization_id" TEXT NOT NULL,
|
||||||
|
"organization_alias" TEXT NOT NULL,
|
||||||
|
"budget_id" TEXT NOT NULL,
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"models" TEXT[],
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_OrganizationTable_pkey" PRIMARY KEY ("organization_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ModelTable" (
|
||||||
|
"id" SERIAL NOT NULL,
|
||||||
|
"aliases" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ModelTable_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_TeamTable" (
|
||||||
|
"team_id" TEXT NOT NULL,
|
||||||
|
"team_alias" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"admins" TEXT[],
|
||||||
|
"members" TEXT[],
|
||||||
|
"members_with_roles" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"models" TEXT[],
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"blocked" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_id" INTEGER,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_TeamTable_pkey" PRIMARY KEY ("team_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_UserTable" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"user_alias" TEXT,
|
||||||
|
"team_id" TEXT,
|
||||||
|
"sso_user_id" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"password" TEXT,
|
||||||
|
"teams" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"user_role" TEXT,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"user_email" TEXT,
|
||||||
|
"models" TEXT[],
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_UserTable_pkey" PRIMARY KEY ("user_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_VerificationToken" (
|
||||||
|
"token" TEXT NOT NULL,
|
||||||
|
"key_name" TEXT,
|
||||||
|
"key_alias" TEXT,
|
||||||
|
"soft_budget_cooldown" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"expires" TIMESTAMP(3),
|
||||||
|
"models" TEXT[],
|
||||||
|
"aliases" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"config" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"user_id" TEXT,
|
||||||
|
"team_id" TEXT,
|
||||||
|
"permissions" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"blocked" BOOLEAN,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_VerificationToken_pkey" PRIMARY KEY ("token")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_EndUserTable" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"alias" TEXT,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"allowed_model_region" TEXT,
|
||||||
|
"default_model" TEXT,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"blocked" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_EndUserTable_pkey" PRIMARY KEY ("user_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_Config" (
|
||||||
|
"param_name" TEXT NOT NULL,
|
||||||
|
"param_value" JSONB,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_Config_pkey" PRIMARY KEY ("param_name")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_SpendLogs" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"call_type" TEXT NOT NULL,
|
||||||
|
"api_key" TEXT NOT NULL DEFAULT '',
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"total_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"startTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"endTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"completionStartTime" TIMESTAMP(3),
|
||||||
|
"model" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_id" TEXT DEFAULT '',
|
||||||
|
"model_group" TEXT DEFAULT '',
|
||||||
|
"custom_llm_provider" TEXT DEFAULT '',
|
||||||
|
"api_base" TEXT DEFAULT '',
|
||||||
|
"user" TEXT DEFAULT '',
|
||||||
|
"metadata" JSONB DEFAULT '{}',
|
||||||
|
"cache_hit" TEXT DEFAULT '',
|
||||||
|
"cache_key" TEXT DEFAULT '',
|
||||||
|
"request_tags" JSONB DEFAULT '[]',
|
||||||
|
"team_id" TEXT,
|
||||||
|
"end_user" TEXT,
|
||||||
|
"requester_ip_address" TEXT,
|
||||||
|
"messages" JSONB DEFAULT '{}',
|
||||||
|
"response" JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_SpendLogs_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ErrorLogs" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"startTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"endTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"api_base" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_group" TEXT NOT NULL DEFAULT '',
|
||||||
|
"litellm_model_name" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_id" TEXT NOT NULL DEFAULT '',
|
||||||
|
"request_kwargs" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"exception_type" TEXT NOT NULL DEFAULT '',
|
||||||
|
"exception_string" TEXT NOT NULL DEFAULT '',
|
||||||
|
"status_code" TEXT NOT NULL DEFAULT '',
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ErrorLogs_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_UserNotifications" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"models" TEXT[],
|
||||||
|
"justification" TEXT NOT NULL,
|
||||||
|
"status" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_UserNotifications_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_TeamMembership" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"team_id" TEXT NOT NULL,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_TeamMembership_pkey" PRIMARY KEY ("user_id","team_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_OrganizationMembership" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"organization_id" TEXT NOT NULL,
|
||||||
|
"user_role" TEXT,
|
||||||
|
"spend" DOUBLE PRECISION DEFAULT 0.0,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_OrganizationMembership_pkey" PRIMARY KEY ("user_id","organization_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_InvitationLink" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"is_accepted" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"accepted_at" TIMESTAMP(3),
|
||||||
|
"expires_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_InvitationLink_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_AuditLog" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"changed_by" TEXT NOT NULL DEFAULT '',
|
||||||
|
"changed_by_api_key" TEXT NOT NULL DEFAULT '',
|
||||||
|
"action" TEXT NOT NULL,
|
||||||
|
"table_name" TEXT NOT NULL,
|
||||||
|
"object_id" TEXT NOT NULL,
|
||||||
|
"before_value" JSONB,
|
||||||
|
"updated_values" JSONB,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_AuditLog_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_CredentialsTable_credential_name_key" ON "LiteLLM_CredentialsTable"("credential_name");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_TeamTable_model_id_key" ON "LiteLLM_TeamTable"("model_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_UserTable_sso_user_id_key" ON "LiteLLM_UserTable"("sso_user_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_SpendLogs_startTime_idx" ON "LiteLLM_SpendLogs"("startTime");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_SpendLogs_end_user_idx" ON "LiteLLM_SpendLogs"("end_user");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_OrganizationMembership_user_id_organization_id_key" ON "LiteLLM_OrganizationMembership"("user_id", "organization_id");
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationTable" ADD CONSTRAINT "LiteLLM_OrganizationTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_model_id_fkey" FOREIGN KEY ("model_id") REFERENCES "LiteLLM_ModelTable"("id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_UserTable" ADD CONSTRAINT "LiteLLM_UserTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_EndUserTable" ADD CONSTRAINT "LiteLLM_EndUserTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamMembership" ADD CONSTRAINT "LiteLLM_TeamMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_created_by_fkey" FOREIGN KEY ("created_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_updated_by_fkey" FOREIGN KEY ("updated_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_DailyUserSpend" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"date" TEXT NOT NULL,
|
||||||
|
"api_key" TEXT NOT NULL,
|
||||||
|
"model" TEXT NOT NULL,
|
||||||
|
"model_group" TEXT,
|
||||||
|
"custom_llm_provider" TEXT,
|
||||||
|
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_DailyUserSpend_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_date_idx" ON "LiteLLM_DailyUserSpend"("date");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_user_id_idx" ON "LiteLLM_DailyUserSpend"("user_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_api_key_idx" ON "LiteLLM_DailyUserSpend"("api_key");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_model_idx" ON "LiteLLM_DailyUserSpend"("model");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_DailyUserSpend_user_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyUserSpend"("user_id", "date", "api_key", "model", "custom_llm_provider");
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- AlterTable
|
||||||
|
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "api_requests" INTEGER NOT NULL DEFAULT 0;
|
||||||
|
|