Merge remote-tracking branch 'upstream/main' into fix/reset-end-user-budget-by-duration
|
@ -49,7 +49,7 @@ jobs:
|
||||||
pip install opentelemetry-api==1.25.0
|
pip install opentelemetry-api==1.25.0
|
||||||
pip install opentelemetry-sdk==1.25.0
|
pip install opentelemetry-sdk==1.25.0
|
||||||
pip install opentelemetry-exporter-otlp==1.25.0
|
pip install opentelemetry-exporter-otlp==1.25.0
|
||||||
pip install openai==1.54.0
|
pip install openai==1.66.1
|
||||||
pip install prisma==0.11.0
|
pip install prisma==0.11.0
|
||||||
pip install "detect_secrets==1.5.0"
|
pip install "detect_secrets==1.5.0"
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.24.1"
|
||||||
|
@ -71,7 +71,7 @@ jobs:
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
pip install "pytest-xdist==3.6.1"
|
pip install "pytest-xdist==3.6.1"
|
||||||
pip install "websockets==10.4"
|
pip install "websockets==13.1.0"
|
||||||
pip uninstall posthog -y
|
pip uninstall posthog -y
|
||||||
- save_cache:
|
- save_cache:
|
||||||
paths:
|
paths:
|
||||||
|
@ -168,7 +168,7 @@ jobs:
|
||||||
pip install opentelemetry-api==1.25.0
|
pip install opentelemetry-api==1.25.0
|
||||||
pip install opentelemetry-sdk==1.25.0
|
pip install opentelemetry-sdk==1.25.0
|
||||||
pip install opentelemetry-exporter-otlp==1.25.0
|
pip install opentelemetry-exporter-otlp==1.25.0
|
||||||
pip install openai==1.54.0
|
pip install openai==1.66.1
|
||||||
pip install prisma==0.11.0
|
pip install prisma==0.11.0
|
||||||
pip install "detect_secrets==1.5.0"
|
pip install "detect_secrets==1.5.0"
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.24.1"
|
||||||
|
@ -189,6 +189,7 @@ jobs:
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
|
pip install "websockets==13.1.0"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
paths:
|
paths:
|
||||||
- ./venv
|
- ./venv
|
||||||
|
@ -267,7 +268,7 @@ jobs:
|
||||||
pip install opentelemetry-api==1.25.0
|
pip install opentelemetry-api==1.25.0
|
||||||
pip install opentelemetry-sdk==1.25.0
|
pip install opentelemetry-sdk==1.25.0
|
||||||
pip install opentelemetry-exporter-otlp==1.25.0
|
pip install opentelemetry-exporter-otlp==1.25.0
|
||||||
pip install openai==1.54.0
|
pip install openai==1.66.1
|
||||||
pip install prisma==0.11.0
|
pip install prisma==0.11.0
|
||||||
pip install "detect_secrets==1.5.0"
|
pip install "detect_secrets==1.5.0"
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.24.1"
|
||||||
|
@ -288,6 +289,7 @@ jobs:
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
|
pip install "websockets==13.1.0"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
paths:
|
paths:
|
||||||
- ./venv
|
- ./venv
|
||||||
|
@ -511,7 +513,7 @@ jobs:
|
||||||
pip install opentelemetry-api==1.25.0
|
pip install opentelemetry-api==1.25.0
|
||||||
pip install opentelemetry-sdk==1.25.0
|
pip install opentelemetry-sdk==1.25.0
|
||||||
pip install opentelemetry-exporter-otlp==1.25.0
|
pip install opentelemetry-exporter-otlp==1.25.0
|
||||||
pip install openai==1.54.0
|
pip install openai==1.66.1
|
||||||
pip install prisma==0.11.0
|
pip install prisma==0.11.0
|
||||||
pip install "detect_secrets==1.5.0"
|
pip install "detect_secrets==1.5.0"
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.24.1"
|
||||||
|
@ -678,6 +680,92 @@ jobs:
|
||||||
paths:
|
paths:
|
||||||
- llm_translation_coverage.xml
|
- llm_translation_coverage.xml
|
||||||
- llm_translation_coverage
|
- llm_translation_coverage
|
||||||
|
llm_responses_api_testing:
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.11
|
||||||
|
auth:
|
||||||
|
username: ${DOCKERHUB_USERNAME}
|
||||||
|
password: ${DOCKERHUB_PASSWORD}
|
||||||
|
working_directory: ~/project
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Install Dependencies
|
||||||
|
command: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
pip install "pytest==7.3.1"
|
||||||
|
pip install "pytest-retry==1.6.3"
|
||||||
|
pip install "pytest-cov==5.0.0"
|
||||||
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
pip install "respx==0.21.1"
|
||||||
|
# Run pytest and generate JUnit XML report
|
||||||
|
- run:
|
||||||
|
name: Run tests
|
||||||
|
command: |
|
||||||
|
pwd
|
||||||
|
ls
|
||||||
|
python -m pytest -vv tests/llm_responses_api_testing --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
|
||||||
|
no_output_timeout: 120m
|
||||||
|
- run:
|
||||||
|
name: Rename the coverage files
|
||||||
|
command: |
|
||||||
|
mv coverage.xml llm_responses_api_coverage.xml
|
||||||
|
mv .coverage llm_responses_api_coverage
|
||||||
|
|
||||||
|
# Store test results
|
||||||
|
- store_test_results:
|
||||||
|
path: test-results
|
||||||
|
- persist_to_workspace:
|
||||||
|
root: .
|
||||||
|
paths:
|
||||||
|
- llm_responses_api_coverage.xml
|
||||||
|
- llm_responses_api_coverage
|
||||||
|
litellm_mapped_tests:
|
||||||
|
docker:
|
||||||
|
- image: cimg/python:3.11
|
||||||
|
auth:
|
||||||
|
username: ${DOCKERHUB_USERNAME}
|
||||||
|
password: ${DOCKERHUB_PASSWORD}
|
||||||
|
working_directory: ~/project
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Install Dependencies
|
||||||
|
command: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
python -m pip install -r requirements.txt
|
||||||
|
pip install "pytest-mock==3.12.0"
|
||||||
|
pip install "pytest==7.3.1"
|
||||||
|
pip install "pytest-retry==1.6.3"
|
||||||
|
pip install "pytest-cov==5.0.0"
|
||||||
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
pip install "respx==0.21.1"
|
||||||
|
pip install "hypercorn==0.17.3"
|
||||||
|
# Run pytest and generate JUnit XML report
|
||||||
|
- run:
|
||||||
|
name: Run tests
|
||||||
|
command: |
|
||||||
|
pwd
|
||||||
|
ls
|
||||||
|
python -m pytest -vv tests/litellm --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit.xml --durations=5
|
||||||
|
no_output_timeout: 120m
|
||||||
|
- run:
|
||||||
|
name: Rename the coverage files
|
||||||
|
command: |
|
||||||
|
mv coverage.xml litellm_mapped_tests_coverage.xml
|
||||||
|
mv .coverage litellm_mapped_tests_coverage
|
||||||
|
|
||||||
|
# Store test results
|
||||||
|
- store_test_results:
|
||||||
|
path: test-results
|
||||||
|
- persist_to_workspace:
|
||||||
|
root: .
|
||||||
|
paths:
|
||||||
|
- litellm_mapped_tests_coverage.xml
|
||||||
|
- litellm_mapped_tests_coverage
|
||||||
batches_testing:
|
batches_testing:
|
||||||
docker:
|
docker:
|
||||||
- image: cimg/python:3.11
|
- image: cimg/python:3.11
|
||||||
|
@ -1046,6 +1134,7 @@ jobs:
|
||||||
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
- run: python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
|
||||||
- run: ruff check ./litellm
|
- run: ruff check ./litellm
|
||||||
# - run: python ./tests/documentation_tests/test_general_setting_keys.py
|
# - run: python ./tests/documentation_tests/test_general_setting_keys.py
|
||||||
|
- run: python ./tests/code_coverage_tests/check_licenses.py
|
||||||
- run: python ./tests/code_coverage_tests/router_code_coverage.py
|
- run: python ./tests/code_coverage_tests/router_code_coverage.py
|
||||||
- run: python ./tests/code_coverage_tests/callback_manager_test.py
|
- run: python ./tests/code_coverage_tests/callback_manager_test.py
|
||||||
- run: python ./tests/code_coverage_tests/recursive_detector.py
|
- run: python ./tests/code_coverage_tests/recursive_detector.py
|
||||||
|
@ -1058,6 +1147,7 @@ jobs:
|
||||||
- run: python ./tests/code_coverage_tests/ensure_async_clients_test.py
|
- run: python ./tests/code_coverage_tests/ensure_async_clients_test.py
|
||||||
- run: python ./tests/code_coverage_tests/enforce_llms_folder_style.py
|
- run: python ./tests/code_coverage_tests/enforce_llms_folder_style.py
|
||||||
- run: python ./tests/documentation_tests/test_circular_imports.py
|
- run: python ./tests/documentation_tests/test_circular_imports.py
|
||||||
|
- run: python ./tests/code_coverage_tests/prevent_key_leaks_in_exceptions.py
|
||||||
- run: helm lint ./deploy/charts/litellm-helm
|
- run: helm lint ./deploy/charts/litellm-helm
|
||||||
|
|
||||||
db_migration_disable_update_check:
|
db_migration_disable_update_check:
|
||||||
|
@ -1067,6 +1157,23 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Install Python 3.9
|
||||||
|
command: |
|
||||||
|
curl https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh --output miniconda.sh
|
||||||
|
bash miniconda.sh -b -p $HOME/miniconda
|
||||||
|
export PATH="$HOME/miniconda/bin:$PATH"
|
||||||
|
conda init bash
|
||||||
|
source ~/.bashrc
|
||||||
|
conda create -n myenv python=3.9 -y
|
||||||
|
conda activate myenv
|
||||||
|
python --version
|
||||||
|
- run:
|
||||||
|
name: Install Dependencies
|
||||||
|
command: |
|
||||||
|
pip install "pytest==7.3.1"
|
||||||
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
pip install aiohttp
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
command: |
|
command: |
|
||||||
|
@ -1074,29 +1181,48 @@ jobs:
|
||||||
- run:
|
- run:
|
||||||
name: Run Docker container
|
name: Run Docker container
|
||||||
command: |
|
command: |
|
||||||
docker run --name my-app \
|
docker run -d \
|
||||||
-p 4000:4000 \
|
-p 4000:4000 \
|
||||||
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
||||||
-e DISABLE_SCHEMA_UPDATE="True" \
|
-e DISABLE_SCHEMA_UPDATE="True" \
|
||||||
-v $(pwd)/litellm/proxy/example_config_yaml/bad_schema.prisma:/app/schema.prisma \
|
-v $(pwd)/litellm/proxy/example_config_yaml/bad_schema.prisma:/app/schema.prisma \
|
||||||
-v $(pwd)/litellm/proxy/example_config_yaml/bad_schema.prisma:/app/litellm/proxy/schema.prisma \
|
-v $(pwd)/litellm/proxy/example_config_yaml/bad_schema.prisma:/app/litellm/proxy/schema.prisma \
|
||||||
-v $(pwd)/litellm/proxy/example_config_yaml/disable_schema_update.yaml:/app/config.yaml \
|
-v $(pwd)/litellm/proxy/example_config_yaml/disable_schema_update.yaml:/app/config.yaml \
|
||||||
|
--name my-app \
|
||||||
myapp:latest \
|
myapp:latest \
|
||||||
--config /app/config.yaml \
|
--config /app/config.yaml \
|
||||||
--port 4000 > docker_output.log 2>&1 || true
|
--port 4000
|
||||||
- run:
|
- run:
|
||||||
name: Display Docker logs
|
name: Install curl and dockerize
|
||||||
command: cat docker_output.log
|
|
||||||
- run:
|
|
||||||
name: Check for expected error
|
|
||||||
command: |
|
command: |
|
||||||
if grep -q "prisma schema out of sync with db. Consider running these sql_commands to sync the two" docker_output.log; then
|
sudo apt-get update
|
||||||
echo "Expected error found. Test passed."
|
sudo apt-get install -y curl
|
||||||
|
sudo wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
|
||||||
|
sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
|
||||||
|
sudo rm dockerize-linux-amd64-v0.6.1.tar.gz
|
||||||
|
|
||||||
|
- run:
|
||||||
|
name: Wait for container to be ready
|
||||||
|
command: dockerize -wait http://localhost:4000 -timeout 1m
|
||||||
|
- run:
|
||||||
|
name: Check container logs for expected message
|
||||||
|
command: |
|
||||||
|
echo "=== Printing Full Container Startup Logs ==="
|
||||||
|
docker logs my-app
|
||||||
|
echo "=== End of Full Container Startup Logs ==="
|
||||||
|
|
||||||
|
if docker logs my-app 2>&1 | grep -q "prisma schema out of sync with db. Consider running these sql_commands to sync the two"; then
|
||||||
|
echo "Expected message found in logs. Test passed."
|
||||||
else
|
else
|
||||||
echo "Expected error not found. Test failed."
|
echo "Expected message not found in logs. Test failed."
|
||||||
cat docker_output.log
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
- run:
|
||||||
|
name: Run Basic Proxy Startup Tests (Health Readiness and Chat Completion)
|
||||||
|
command: |
|
||||||
|
python -m pytest -vv tests/basic_proxy_startup_tests -x --junitxml=test-results/junit-2.xml --durations=5
|
||||||
|
no_output_timeout: 120m
|
||||||
|
|
||||||
|
|
||||||
build_and_test:
|
build_and_test:
|
||||||
machine:
|
machine:
|
||||||
|
@ -1152,7 +1278,7 @@ jobs:
|
||||||
pip install "aiodynamo==23.10.1"
|
pip install "aiodynamo==23.10.1"
|
||||||
pip install "asyncio==3.4.3"
|
pip install "asyncio==3.4.3"
|
||||||
pip install "PyGithub==1.59.1"
|
pip install "PyGithub==1.59.1"
|
||||||
pip install "openai==1.54.0 "
|
pip install "openai==1.66.1"
|
||||||
- run:
|
- run:
|
||||||
name: Install Grype
|
name: Install Grype
|
||||||
command: |
|
command: |
|
||||||
|
@ -1227,13 +1353,13 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
pwd
|
pwd
|
||||||
ls
|
ls
|
||||||
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
|
python -m pytest -s -vv tests/*.py -x --junitxml=test-results/junit.xml --durations=5 --ignore=tests/otel_tests --ignore=tests/pass_through_tests --ignore=tests/proxy_admin_ui_tests --ignore=tests/load_tests --ignore=tests/llm_translation --ignore=tests/llm_responses_api_testing --ignore=tests/image_gen_tests --ignore=tests/pass_through_unit_tests
|
||||||
no_output_timeout: 120m
|
no_output_timeout: 120m
|
||||||
|
|
||||||
# Store test results
|
# Store test results
|
||||||
- store_test_results:
|
- store_test_results:
|
||||||
path: test-results
|
path: test-results
|
||||||
e2e_openai_misc_endpoints:
|
e2e_openai_endpoints:
|
||||||
machine:
|
machine:
|
||||||
image: ubuntu-2204:2023.10.1
|
image: ubuntu-2204:2023.10.1
|
||||||
resource_class: xlarge
|
resource_class: xlarge
|
||||||
|
@ -1288,7 +1414,7 @@ jobs:
|
||||||
pip install "aiodynamo==23.10.1"
|
pip install "aiodynamo==23.10.1"
|
||||||
pip install "asyncio==3.4.3"
|
pip install "asyncio==3.4.3"
|
||||||
pip install "PyGithub==1.59.1"
|
pip install "PyGithub==1.59.1"
|
||||||
pip install "openai==1.54.0 "
|
pip install "openai==1.66.1"
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
|
@ -1350,7 +1476,7 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
pwd
|
pwd
|
||||||
ls
|
ls
|
||||||
python -m pytest -s -vv tests/openai_misc_endpoints_tests --junitxml=test-results/junit.xml --durations=5
|
python -m pytest -s -vv tests/openai_endpoints_tests --junitxml=test-results/junit.xml --durations=5
|
||||||
no_output_timeout: 120m
|
no_output_timeout: 120m
|
||||||
|
|
||||||
# Store test results
|
# Store test results
|
||||||
|
@ -1410,7 +1536,7 @@ jobs:
|
||||||
pip install "aiodynamo==23.10.1"
|
pip install "aiodynamo==23.10.1"
|
||||||
pip install "asyncio==3.4.3"
|
pip install "asyncio==3.4.3"
|
||||||
pip install "PyGithub==1.59.1"
|
pip install "PyGithub==1.59.1"
|
||||||
pip install "openai==1.54.0 "
|
pip install "openai==1.66.1"
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
|
command: docker build -t my-app:latest -f ./docker/Dockerfile.database .
|
||||||
|
@ -1839,7 +1965,7 @@ jobs:
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
pip install "google-cloud-aiplatform==1.43.0"
|
pip install "google-cloud-aiplatform==1.43.0"
|
||||||
pip install aiohttp
|
pip install aiohttp
|
||||||
pip install "openai==1.54.0 "
|
pip install "openai==1.66.1"
|
||||||
pip install "assemblyai==0.37.0"
|
pip install "assemblyai==0.37.0"
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.7.1"
|
||||||
|
@ -1853,12 +1979,12 @@ jobs:
|
||||||
pip install prisma
|
pip install prisma
|
||||||
pip install fastapi
|
pip install fastapi
|
||||||
pip install jsonschema
|
pip install jsonschema
|
||||||
pip install "httpx==0.24.1"
|
pip install "httpx==0.27.0"
|
||||||
pip install "anyio==3.7.1"
|
pip install "anyio==3.7.1"
|
||||||
pip install "asyncio==3.4.3"
|
pip install "asyncio==3.4.3"
|
||||||
pip install "PyGithub==1.59.1"
|
pip install "PyGithub==1.59.1"
|
||||||
pip install "google-cloud-aiplatform==1.59.0"
|
pip install "google-cloud-aiplatform==1.59.0"
|
||||||
pip install anthropic
|
pip install "anthropic==0.49.0"
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
|
@ -1900,11 +2026,44 @@ jobs:
|
||||||
- run:
|
- run:
|
||||||
name: Wait for app to be ready
|
name: Wait for app to be ready
|
||||||
command: dockerize -wait http://localhost:4000 -timeout 5m
|
command: dockerize -wait http://localhost:4000 -timeout 5m
|
||||||
|
# Add Ruby installation and testing before the existing Node.js and Python tests
|
||||||
|
- run:
|
||||||
|
name: Install Ruby and Bundler
|
||||||
|
command: |
|
||||||
|
# Import GPG keys first
|
||||||
|
gpg --keyserver hkp://keyserver.ubuntu.com --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB || {
|
||||||
|
curl -sSL https://rvm.io/mpapis.asc | gpg --import -
|
||||||
|
curl -sSL https://rvm.io/pkuczynski.asc | gpg --import -
|
||||||
|
}
|
||||||
|
|
||||||
|
# Install Ruby version manager (RVM)
|
||||||
|
curl -sSL https://get.rvm.io | bash -s stable
|
||||||
|
|
||||||
|
# Source RVM from the correct location
|
||||||
|
source $HOME/.rvm/scripts/rvm
|
||||||
|
|
||||||
|
# Install Ruby 3.2.2
|
||||||
|
rvm install 3.2.2
|
||||||
|
rvm use 3.2.2 --default
|
||||||
|
|
||||||
|
# Install latest Bundler
|
||||||
|
gem install bundler
|
||||||
|
|
||||||
|
- run:
|
||||||
|
name: Run Ruby tests
|
||||||
|
command: |
|
||||||
|
source $HOME/.rvm/scripts/rvm
|
||||||
|
cd tests/pass_through_tests/ruby_passthrough_tests
|
||||||
|
bundle install
|
||||||
|
bundle exec rspec
|
||||||
|
no_output_timeout: 30m
|
||||||
# New steps to run Node.js test
|
# New steps to run Node.js test
|
||||||
- run:
|
- run:
|
||||||
name: Install Node.js
|
name: Install Node.js
|
||||||
command: |
|
command: |
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
||||||
|
sudo apt-get update
|
||||||
sudo apt-get install -y nodejs
|
sudo apt-get install -y nodejs
|
||||||
node --version
|
node --version
|
||||||
npm --version
|
npm --version
|
||||||
|
@ -1953,7 +2112,7 @@ jobs:
|
||||||
python -m venv venv
|
python -m venv venv
|
||||||
. venv/bin/activate
|
. venv/bin/activate
|
||||||
pip install coverage
|
pip install coverage
|
||||||
coverage combine llm_translation_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
|
coverage combine llm_translation_coverage llm_responses_api_coverage logging_coverage litellm_router_coverage local_testing_coverage litellm_assistants_api_coverage auth_ui_unit_tests_coverage langfuse_coverage caching_coverage litellm_proxy_unit_tests_coverage image_gen_coverage pass_through_unit_tests_coverage batches_coverage litellm_proxy_security_tests_coverage
|
||||||
coverage xml
|
coverage xml
|
||||||
- codecov/upload:
|
- codecov/upload:
|
||||||
file: ./coverage.xml
|
file: ./coverage.xml
|
||||||
|
@ -2017,7 +2176,7 @@ jobs:
|
||||||
circleci step halt
|
circleci step halt
|
||||||
fi
|
fi
|
||||||
- run:
|
- run:
|
||||||
name: Trigger Github Action for new Docker Container + Trigger Stable Release Testing
|
name: Trigger Github Action for new Docker Container + Trigger Load Testing
|
||||||
command: |
|
command: |
|
||||||
echo "Install TOML package."
|
echo "Install TOML package."
|
||||||
python3 -m pip install toml
|
python3 -m pip install toml
|
||||||
|
@ -2027,9 +2186,9 @@ jobs:
|
||||||
-H "Accept: application/vnd.github.v3+json" \
|
-H "Accept: application/vnd.github.v3+json" \
|
||||||
-H "Authorization: Bearer $GITHUB_TOKEN" \
|
-H "Authorization: Bearer $GITHUB_TOKEN" \
|
||||||
"https://api.github.com/repos/BerriAI/litellm/actions/workflows/ghcr_deploy.yml/dispatches" \
|
"https://api.github.com/repos/BerriAI/litellm/actions/workflows/ghcr_deploy.yml/dispatches" \
|
||||||
-d "{\"ref\":\"main\", \"inputs\":{\"tag\":\"v${VERSION}\", \"commit_hash\":\"$CIRCLE_SHA1\"}}"
|
-d "{\"ref\":\"main\", \"inputs\":{\"tag\":\"v${VERSION}-nightly\", \"commit_hash\":\"$CIRCLE_SHA1\"}}"
|
||||||
echo "triggering stable release server for version ${VERSION} and commit ${CIRCLE_SHA1}"
|
echo "triggering load testing server for version ${VERSION} and commit ${CIRCLE_SHA1}"
|
||||||
curl -X POST "https://proxyloadtester-production.up.railway.app/start/load/test?version=${VERSION}&commit_hash=${CIRCLE_SHA1}"
|
curl -X POST "https://proxyloadtester-production.up.railway.app/start/load/test?version=${VERSION}&commit_hash=${CIRCLE_SHA1}&release_type=nightly"
|
||||||
|
|
||||||
e2e_ui_testing:
|
e2e_ui_testing:
|
||||||
machine:
|
machine:
|
||||||
|
@ -2082,7 +2241,7 @@ jobs:
|
||||||
pip install "pytest-retry==1.6.3"
|
pip install "pytest-retry==1.6.3"
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
pip install aiohttp
|
pip install aiohttp
|
||||||
pip install "openai==1.54.0 "
|
pip install "openai==1.66.1"
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.7.1"
|
||||||
pip install "pytest==7.3.1"
|
pip install "pytest==7.3.1"
|
||||||
|
@ -2272,7 +2431,7 @@ workflows:
|
||||||
only:
|
only:
|
||||||
- main
|
- main
|
||||||
- /litellm_.*/
|
- /litellm_.*/
|
||||||
- e2e_openai_misc_endpoints:
|
- e2e_openai_endpoints:
|
||||||
filters:
|
filters:
|
||||||
branches:
|
branches:
|
||||||
only:
|
only:
|
||||||
|
@ -2314,6 +2473,18 @@ workflows:
|
||||||
only:
|
only:
|
||||||
- main
|
- main
|
||||||
- /litellm_.*/
|
- /litellm_.*/
|
||||||
|
- llm_responses_api_testing:
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
- /litellm_.*/
|
||||||
|
- litellm_mapped_tests:
|
||||||
|
filters:
|
||||||
|
branches:
|
||||||
|
only:
|
||||||
|
- main
|
||||||
|
- /litellm_.*/
|
||||||
- batches_testing:
|
- batches_testing:
|
||||||
filters:
|
filters:
|
||||||
branches:
|
branches:
|
||||||
|
@ -2347,6 +2518,8 @@ workflows:
|
||||||
- upload-coverage:
|
- upload-coverage:
|
||||||
requires:
|
requires:
|
||||||
- llm_translation_testing
|
- llm_translation_testing
|
||||||
|
- llm_responses_api_testing
|
||||||
|
- litellm_mapped_tests
|
||||||
- batches_testing
|
- batches_testing
|
||||||
- litellm_utils_testing
|
- litellm_utils_testing
|
||||||
- pass_through_unit_testing
|
- pass_through_unit_testing
|
||||||
|
@ -2400,10 +2573,12 @@ workflows:
|
||||||
requires:
|
requires:
|
||||||
- local_testing
|
- local_testing
|
||||||
- build_and_test
|
- build_and_test
|
||||||
- e2e_openai_misc_endpoints
|
- e2e_openai_endpoints
|
||||||
- load_testing
|
- load_testing
|
||||||
- test_bad_database_url
|
- test_bad_database_url
|
||||||
- llm_translation_testing
|
- llm_translation_testing
|
||||||
|
- llm_responses_api_testing
|
||||||
|
- litellm_mapped_tests
|
||||||
- batches_testing
|
- batches_testing
|
||||||
- litellm_utils_testing
|
- litellm_utils_testing
|
||||||
- pass_through_unit_testing
|
- pass_through_unit_testing
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
# used by CI/CD testing
|
# used by CI/CD testing
|
||||||
openai==1.54.0
|
openai==1.66.1
|
||||||
python-dotenv
|
python-dotenv
|
||||||
tiktoken
|
tiktoken
|
||||||
importlib_metadata
|
importlib_metadata
|
||||||
|
|
16
.github/pull_request_template.md
vendored
|
@ -6,6 +6,16 @@
|
||||||
|
|
||||||
<!-- e.g. "Fixes #000" -->
|
<!-- e.g. "Fixes #000" -->
|
||||||
|
|
||||||
|
## Pre-Submission checklist
|
||||||
|
|
||||||
|
**Please complete all items before asking a LiteLLM maintainer to review your PR**
|
||||||
|
|
||||||
|
- [ ] I have Added testing in the `tests/litellm/` directory, **Adding at least 1 test is a hard requirement** - [see details](https://docs.litellm.ai/docs/extras/contributing_code)
|
||||||
|
- [ ] I have added a screenshot of my new test passing locally
|
||||||
|
- [ ] My PR passes all unit tests on (`make test-unit`)[https://docs.litellm.ai/docs/extras/contributing_code]
|
||||||
|
- [ ] My PR's scope is as isolated as possible, it only solves 1 specific problem
|
||||||
|
|
||||||
|
|
||||||
## Type
|
## Type
|
||||||
|
|
||||||
<!-- Select the type of Pull Request -->
|
<!-- Select the type of Pull Request -->
|
||||||
|
@ -20,10 +30,4 @@
|
||||||
|
|
||||||
## Changes
|
## Changes
|
||||||
|
|
||||||
<!-- List of changes -->
|
|
||||||
|
|
||||||
## [REQUIRED] Testing - Attach a screenshot of any new tests passing locally
|
|
||||||
If UI changes, send a screenshot/GIF of working UI fixes
|
|
||||||
|
|
||||||
<!-- Test procedure -->
|
|
||||||
|
|
||||||
|
|
25
.github/workflows/ghcr_deploy.yml
vendored
|
@ -80,7 +80,6 @@ jobs:
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
packages: write
|
packages: write
|
||||||
#
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
@ -112,7 +111,11 @@ jobs:
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }} # if a tag is provided, use that, otherwise use the release tag, and if neither is available, use 'latest'
|
tags: |
|
||||||
|
${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||||
|
${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||||
|
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||||
|
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm:main-stable', env.REGISTRY) || '' }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||||
|
|
||||||
|
@ -151,7 +154,11 @@ jobs:
|
||||||
context: .
|
context: .
|
||||||
file: ./docker/Dockerfile.database
|
file: ./docker/Dockerfile.database
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }}
|
tags: |
|
||||||
|
${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||||
|
${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||||
|
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-database:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||||
|
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-database:main-stable', env.REGISTRY) || '' }}
|
||||||
labels: ${{ steps.meta-database.outputs.labels }}
|
labels: ${{ steps.meta-database.outputs.labels }}
|
||||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||||
|
|
||||||
|
@ -190,7 +197,11 @@ jobs:
|
||||||
context: .
|
context: .
|
||||||
file: ./docker/Dockerfile.non_root
|
file: ./docker/Dockerfile.non_root
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.release_type }}
|
tags: |
|
||||||
|
${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||||
|
${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||||
|
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-non_root:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||||
|
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-non_root:main-stable', env.REGISTRY) || '' }}
|
||||||
labels: ${{ steps.meta-non_root.outputs.labels }}
|
labels: ${{ steps.meta-non_root.outputs.labels }}
|
||||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||||
|
|
||||||
|
@ -229,7 +240,11 @@ jobs:
|
||||||
context: .
|
context: .
|
||||||
file: ./litellm-js/spend-logs/Dockerfile
|
file: ./litellm-js/spend-logs/Dockerfile
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
|
tags: |
|
||||||
|
${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
|
||||||
|
${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
|
||||||
|
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-spend_logs:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
|
||||||
|
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-spend_logs:main-stable', env.REGISTRY) || '' }}
|
||||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||||
|
|
||||||
build-and-push-helm-chart:
|
build-and-push-helm-chart:
|
||||||
|
|
27
.github/workflows/helm_unit_test.yml
vendored
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
name: Helm unit test
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
unit-test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Set up Helm 3.11.1
|
||||||
|
uses: azure/setup-helm@v1
|
||||||
|
with:
|
||||||
|
version: '3.11.1'
|
||||||
|
|
||||||
|
- name: Install Helm Unit Test Plugin
|
||||||
|
run: |
|
||||||
|
helm plugin install https://github.com/helm-unittest/helm-unittest --version v0.4.4
|
||||||
|
|
||||||
|
- name: Run unit tests
|
||||||
|
run:
|
||||||
|
helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
|
4
.github/workflows/interpret_load_test.py
vendored
|
@ -61,7 +61,8 @@ def _get_docker_run_command_stable_release(release_version):
|
||||||
docker run \\
|
docker run \\
|
||||||
-e STORE_MODEL_IN_DB=True \\
|
-e STORE_MODEL_IN_DB=True \\
|
||||||
-p 4000:4000 \\
|
-p 4000:4000 \\
|
||||||
ghcr.io/berriai/litellm_stable_release_branch-{release_version}
|
ghcr.io/berriai/litellm:litellm_stable_release_branch-{release_version}
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,6 +76,7 @@ def _get_docker_run_command(release_version):
|
||||||
-e STORE_MODEL_IN_DB=True \\
|
-e STORE_MODEL_IN_DB=True \\
|
||||||
-p 4000:4000 \\
|
-p 4000:4000 \\
|
||||||
ghcr.io/berriai/litellm:main-{release_version}
|
ghcr.io/berriai/litellm:main-{release_version}
|
||||||
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
2
.github/workflows/locustfile.py
vendored
|
@ -8,7 +8,7 @@ class MyUser(HttpUser):
|
||||||
def chat_completion(self):
|
def chat_completion(self):
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": "Bearer sk-ZoHqrLIs2-5PzJrqBaviAA",
|
"Authorization": "Bearer sk-8N1tLOOyH8TIxwOLahhIVg",
|
||||||
# Include any additional headers you may need for authentication, etc.
|
# Include any additional headers you may need for authentication, etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
2
.gitignore
vendored
|
@ -77,3 +77,5 @@ litellm/proxy/_experimental/out/404.html
|
||||||
litellm/proxy/_experimental/out/model_hub.html
|
litellm/proxy/_experimental/out/model_hub.html
|
||||||
.mypy_cache/*
|
.mypy_cache/*
|
||||||
litellm/proxy/application.log
|
litellm/proxy/application.log
|
||||||
|
tests/llm_translation/vertex_test_account.json
|
||||||
|
tests/llm_translation/test_vertex_key.json
|
||||||
|
|
|
@ -22,7 +22,7 @@ repos:
|
||||||
rev: 7.0.0 # The version of flake8 to use
|
rev: 7.0.0 # The version of flake8 to use
|
||||||
hooks:
|
hooks:
|
||||||
- id: flake8
|
- id: flake8
|
||||||
exclude: ^litellm/tests/|^litellm/proxy/tests/
|
exclude: ^litellm/tests/|^litellm/proxy/tests/|^litellm/tests/litellm/|^tests/litellm/
|
||||||
additional_dependencies: [flake8-print]
|
additional_dependencies: [flake8-print]
|
||||||
files: litellm/.*\.py
|
files: litellm/.*\.py
|
||||||
# - id: flake8
|
# - id: flake8
|
||||||
|
|
32
Makefile
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# LiteLLM Makefile
|
||||||
|
# Simple Makefile for running tests and basic development tasks
|
||||||
|
|
||||||
|
.PHONY: help test test-unit test-integration lint format
|
||||||
|
|
||||||
|
# Default target
|
||||||
|
help:
|
||||||
|
@echo "Available commands:"
|
||||||
|
@echo " make test - Run all tests"
|
||||||
|
@echo " make test-unit - Run unit tests"
|
||||||
|
@echo " make test-integration - Run integration tests"
|
||||||
|
@echo " make test-unit-helm - Run helm unit tests"
|
||||||
|
|
||||||
|
install-dev:
|
||||||
|
poetry install --with dev
|
||||||
|
|
||||||
|
lint: install-dev
|
||||||
|
poetry run pip install types-requests types-setuptools types-redis types-PyYAML
|
||||||
|
cd litellm && poetry run mypy . --ignore-missing-imports
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
test:
|
||||||
|
poetry run pytest tests/
|
||||||
|
|
||||||
|
test-unit:
|
||||||
|
poetry run pytest tests/litellm/
|
||||||
|
|
||||||
|
test-integration:
|
||||||
|
poetry run pytest tests/ -k "not litellm"
|
||||||
|
|
||||||
|
test-unit-helm:
|
||||||
|
helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
|
67
README.md
|
@ -40,7 +40,7 @@ LiteLLM manages:
|
||||||
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
[**Jump to LiteLLM Proxy (LLM Gateway) Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||||
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
|
[**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-providers-docs)
|
||||||
|
|
||||||
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published.
|
🚨 **Stable Release:** Use docker images with the `-stable` tag. These have undergone 12 hour load tests, before being published. [More information about the release cycle here](https://docs.litellm.ai/docs/proxy/release_cycle)
|
||||||
|
|
||||||
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ import os
|
||||||
|
|
||||||
## set ENV variables
|
## set ENV variables
|
||||||
os.environ["OPENAI_API_KEY"] = "your-openai-key"
|
os.environ["OPENAI_API_KEY"] = "your-openai-key"
|
||||||
os.environ["ANTHROPIC_API_KEY"] = "your-cohere-key"
|
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key"
|
||||||
|
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||||
|
|
||||||
|
@ -187,13 +187,13 @@ os.environ["LANGFUSE_PUBLIC_KEY"] = ""
|
||||||
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
os.environ["LANGFUSE_SECRET_KEY"] = ""
|
||||||
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
|
os.environ["ATHINA_API_KEY"] = "your-athina-api-key"
|
||||||
|
|
||||||
os.environ["OPENAI_API_KEY"]
|
os.environ["OPENAI_API_KEY"] = "your-openai-key"
|
||||||
|
|
||||||
# set callbacks
|
# set callbacks
|
||||||
litellm.success_callback = ["lunary", "mlflow", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
|
litellm.success_callback = ["lunary", "mlflow", "langfuse", "athina", "helicone"] # log input/output to lunary, langfuse, supabase, athina, helicone etc
|
||||||
|
|
||||||
#openai call
|
#openai call
|
||||||
response = completion(model="anthropic/claude-3-sonnet-20240229", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
response = completion(model="openai/gpt-4o", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||||
```
|
```
|
||||||
|
|
||||||
# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
|
# LiteLLM Proxy Server (LLM Gateway) - ([Docs](https://docs.litellm.ai/docs/simple_proxy))
|
||||||
|
@ -340,64 +340,7 @@ curl 'http://0.0.0.0:4000/key/generate' \
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
To contribute: Clone the repo locally -> Make a change -> Submit a PR with the change.
|
Interested in contributing? Contributions to LiteLLM Python SDK, Proxy Server, and contributing LLM integrations are both accepted and highly encouraged! [See our Contribution Guide for more details](https://docs.litellm.ai/docs/extras/contributing_code)
|
||||||
|
|
||||||
Here's how to modify the repo locally:
|
|
||||||
Step 1: Clone the repo
|
|
||||||
|
|
||||||
```
|
|
||||||
git clone https://github.com/BerriAI/litellm.git
|
|
||||||
```
|
|
||||||
|
|
||||||
Step 2: Navigate into the project, and install dependencies:
|
|
||||||
|
|
||||||
```
|
|
||||||
cd litellm
|
|
||||||
poetry install -E extra_proxy -E proxy
|
|
||||||
```
|
|
||||||
|
|
||||||
Step 3: Test your change:
|
|
||||||
|
|
||||||
```
|
|
||||||
cd tests # pwd: Documents/litellm/litellm/tests
|
|
||||||
poetry run flake8
|
|
||||||
poetry run pytest .
|
|
||||||
```
|
|
||||||
|
|
||||||
Step 4: Submit a PR with your changes! 🚀
|
|
||||||
|
|
||||||
- push your fork to your GitHub repo
|
|
||||||
- submit a PR from there
|
|
||||||
|
|
||||||
### Building LiteLLM Docker Image
|
|
||||||
|
|
||||||
Follow these instructions if you want to build / run the LiteLLM Docker Image yourself.
|
|
||||||
|
|
||||||
Step 1: Clone the repo
|
|
||||||
|
|
||||||
```
|
|
||||||
git clone https://github.com/BerriAI/litellm.git
|
|
||||||
```
|
|
||||||
|
|
||||||
Step 2: Build the Docker Image
|
|
||||||
|
|
||||||
Build using Dockerfile.non_root
|
|
||||||
```
|
|
||||||
docker build -f docker/Dockerfile.non_root -t litellm_test_image .
|
|
||||||
```
|
|
||||||
|
|
||||||
Step 3: Run the Docker Image
|
|
||||||
|
|
||||||
Make sure config.yaml is present in the root directory. This is your litellm proxy config file.
|
|
||||||
```
|
|
||||||
docker run \
|
|
||||||
-v $(pwd)/proxy_config.yaml:/app/config.yaml \
|
|
||||||
-e DATABASE_URL="postgresql://xxxxxxxx" \
|
|
||||||
-e LITELLM_MASTER_KEY="sk-1234" \
|
|
||||||
-p 4000:4000 \
|
|
||||||
litellm_test_image \
|
|
||||||
--config /app/config.yaml --detailed_debug
|
|
||||||
```
|
|
||||||
|
|
||||||
# Enterprise
|
# Enterprise
|
||||||
For companies that need better security, user management and professional support
|
For companies that need better security, user management and professional support
|
||||||
|
|
252
cookbook/logging_observability/LiteLLM_Proxy_Langfuse.ipynb
vendored
Normal file
|
@ -0,0 +1,252 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## LLM Ops Stack - LiteLLM Proxy + Langfuse \n",
|
||||||
|
"\n",
|
||||||
|
"This notebook demonstrates how to use LiteLLM Proxy with Langfuse \n",
|
||||||
|
"- Use LiteLLM Proxy for calling 100+ LLMs in OpenAI format\n",
|
||||||
|
"- Use Langfuse for viewing request / response traces \n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"In this notebook we will setup LiteLLM Proxy to make requests to OpenAI, Anthropic, Bedrock and automatically log traces to Langfuse."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Setup LiteLLM Proxy\n",
|
||||||
|
"\n",
|
||||||
|
"### 1.1 Define .env variables \n",
|
||||||
|
"Define .env variables on the container that litellm proxy is running on.\n",
|
||||||
|
"```bash\n",
|
||||||
|
"## LLM API Keys\n",
|
||||||
|
"OPENAI_API_KEY=sk-proj-1234567890\n",
|
||||||
|
"ANTHROPIC_API_KEY=sk-ant-api03-1234567890\n",
|
||||||
|
"AWS_ACCESS_KEY_ID=1234567890\n",
|
||||||
|
"AWS_SECRET_ACCESS_KEY=1234567890\n",
|
||||||
|
"\n",
|
||||||
|
"## Langfuse Logging \n",
|
||||||
|
"LANGFUSE_PUBLIC_KEY=\"pk-lf-xxxx9\"\n",
|
||||||
|
"LANGFUSE_SECRET_KEY=\"sk-lf-xxxx9\"\n",
|
||||||
|
"LANGFUSE_HOST=\"https://us.cloud.langfuse.com\"\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"### 1.1 Setup LiteLLM Proxy Config yaml \n",
|
||||||
|
"```yaml\n",
|
||||||
|
"model_list:\n",
|
||||||
|
" - model_name: gpt-4o\n",
|
||||||
|
" litellm_params:\n",
|
||||||
|
" model: openai/gpt-4o\n",
|
||||||
|
" api_key: os.environ/OPENAI_API_KEY\n",
|
||||||
|
" - model_name: claude-3-5-sonnet-20241022\n",
|
||||||
|
" litellm_params:\n",
|
||||||
|
" model: anthropic/claude-3-5-sonnet-20241022\n",
|
||||||
|
" api_key: os.environ/ANTHROPIC_API_KEY\n",
|
||||||
|
" - model_name: us.amazon.nova-micro-v1:0\n",
|
||||||
|
" litellm_params:\n",
|
||||||
|
" model: bedrock/us.amazon.nova-micro-v1:0\n",
|
||||||
|
" aws_access_key_id: os.environ/AWS_ACCESS_KEY_ID\n",
|
||||||
|
" aws_secret_access_key: os.environ/AWS_SECRET_ACCESS_KEY\n",
|
||||||
|
"\n",
|
||||||
|
"litellm_settings:\n",
|
||||||
|
" callbacks: [\"langfuse\"]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Make LLM Requests to LiteLLM Proxy\n",
|
||||||
|
"\n",
|
||||||
|
"Now we will make our first LLM request to LiteLLM Proxy"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2.1 Setup Client Side Variables to point to LiteLLM Proxy\n",
|
||||||
|
"Set `LITELLM_PROXY_BASE_URL` to the base url of the LiteLLM Proxy and `LITELLM_VIRTUAL_KEY` to the virtual key you want to use for Authentication to LiteLLM Proxy. (Note: In this initial setup you can)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 23,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"LITELLM_PROXY_BASE_URL=\"http://0.0.0.0:4000\"\n",
|
||||||
|
"LITELLM_VIRTUAL_KEY=\"sk-oXXRa1xxxxxxxxxxx\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 22,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"ChatCompletion(id='chatcmpl-B0sq6QkOKNMJ0dwP3x7OoMqk1jZcI', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Langfuse is a platform designed to monitor, observe, and troubleshoot AI and large language model (LLM) applications. It provides features that help developers gain insights into how their AI systems are performing, make debugging easier, and optimize the deployment of models. Langfuse allows for tracking of model interactions, collecting telemetry, and visualizing data, which is crucial for understanding the behavior of AI models in production environments. This kind of tool is particularly useful for developers working with language models who need to ensure reliability and efficiency in their applications.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1739550502, model='gpt-4o-2024-08-06', object='chat.completion', service_tier='default', system_fingerprint='fp_523b9b6e5f', usage=CompletionUsage(completion_tokens=109, prompt_tokens=13, total_tokens=122, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 22,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import openai\n",
|
||||||
|
"client = openai.OpenAI(\n",
|
||||||
|
" api_key=LITELLM_VIRTUAL_KEY,\n",
|
||||||
|
" base_url=LITELLM_PROXY_BASE_URL\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"response = client.chat.completions.create(\n",
|
||||||
|
" model=\"gpt-4o\",\n",
|
||||||
|
" messages = [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": \"what is Langfuse?\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"response"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2.3 View Traces on Langfuse\n",
|
||||||
|
"LiteLLM will send the request / response, model, tokens (input + output), cost to Langfuse.\n",
|
||||||
|
"\n",
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### 2.4 Call Anthropic, Bedrock models \n",
|
||||||
|
"\n",
|
||||||
|
"Now we can call `us.amazon.nova-micro-v1:0` and `claude-3-5-sonnet-20241022` models defined on your config.yaml both in the OpenAI request / response format."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 24,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"ChatCompletion(id='chatcmpl-7756e509-e61f-4f5e-b5ae-b7a41013522a', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Langfuse is an observability tool designed specifically for machine learning models and applications built with natural language processing (NLP) and large language models (LLMs). It focuses on providing detailed insights into how these models perform in real-world scenarios. Here are some key features and purposes of Langfuse:\\n\\n1. **Real-time Monitoring**: Langfuse allows developers to monitor the performance of their NLP and LLM applications in real time. This includes tracking the inputs and outputs of the models, as well as any errors or issues that arise during operation.\\n\\n2. **Error Tracking**: It helps in identifying and tracking errors in the models' outputs. By analyzing incorrect or unexpected responses, developers can pinpoint where and why errors occur, facilitating more effective debugging and improvement.\\n\\n3. **Performance Metrics**: Langfuse provides various performance metrics, such as latency, throughput, and error rates. These metrics help developers understand how well their models are performing under different conditions and workloads.\\n\\n4. **Traceability**: It offers detailed traceability of requests and responses, allowing developers to follow the path of a request through the system and see how it is processed by the model at each step.\\n\\n5. **User Feedback Integration**: Langfuse can integrate user feedback to provide context for model outputs. This helps in understanding how real users are interacting with the model and how its outputs align with user expectations.\\n\\n6. **Customizable Dashboards**: Users can create custom dashboards to visualize the data collected by Langfuse. These dashboards can be tailored to highlight the most important metrics and insights for a specific application or team.\\n\\n7. **Alerting and Notifications**: It can set up alerts for specific conditions or errors, notifying developers when something goes wrong or when performance metrics fall outside of acceptable ranges.\\n\\nBy providing comprehensive observability for NLP and LLM applications, Langfuse helps developers to build more reliable, accurate, and user-friendly models and services.\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1739554005, model='us.amazon.nova-micro-v1:0', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=380, prompt_tokens=5, total_tokens=385, completion_tokens_details=None, prompt_tokens_details=None))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 24,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import openai\n",
|
||||||
|
"client = openai.OpenAI(\n",
|
||||||
|
" api_key=LITELLM_VIRTUAL_KEY,\n",
|
||||||
|
" base_url=LITELLM_PROXY_BASE_URL\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"response = client.chat.completions.create(\n",
|
||||||
|
" model=\"us.amazon.nova-micro-v1:0\",\n",
|
||||||
|
" messages = [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": \"what is Langfuse?\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"response"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 3. Advanced - Set Langfuse Trace ID, Tags, Metadata \n",
|
||||||
|
"\n",
|
||||||
|
"Here is an example of how you can set Langfuse specific params on your client side request. See full list of supported langfuse params [here](https://docs.litellm.ai/docs/observability/langfuse_integration)\n",
|
||||||
|
"\n",
|
||||||
|
"You can view the logged trace of this request [here](https://us.cloud.langfuse.com/project/clvlhdfat0007vwb74m9lvfvi/traces/567890?timestamp=2025-02-14T17%3A30%3A26.709Z)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"ChatCompletion(id='chatcmpl-789babd5-c064-4939-9093-46e4cd2e208a', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"Langfuse is an observability platform designed specifically for monitoring and improving the performance of natural language processing (NLP) models and applications. It provides developers with tools to track, analyze, and optimize how their language models interact with users and handle natural language inputs.\\n\\nHere are some key features and benefits of Langfuse:\\n\\n1. **Real-Time Monitoring**: Langfuse allows developers to monitor their NLP applications in real time. This includes tracking user interactions, model responses, and overall performance metrics.\\n\\n2. **Error Tracking**: It helps in identifying and tracking errors in the model's responses. This can include incorrect, irrelevant, or unsafe outputs.\\n\\n3. **User Feedback Integration**: Langfuse enables the collection of user feedback directly within the platform. This feedback can be used to identify areas for improvement in the model's performance.\\n\\n4. **Performance Metrics**: The platform provides detailed metrics and analytics on model performance, including latency, throughput, and accuracy.\\n\\n5. **Alerts and Notifications**: Developers can set up alerts to notify them of any significant issues or anomalies in model performance.\\n\\n6. **Debugging Tools**: Langfuse offers tools to help developers debug and refine their models by providing insights into how the model processes different types of inputs.\\n\\n7. **Integration with Development Workflows**: It integrates seamlessly with various development environments and CI/CD pipelines, making it easier to incorporate observability into the development process.\\n\\n8. **Customizable Dashboards**: Users can create custom dashboards to visualize the data in a way that best suits their needs.\\n\\nLangfuse aims to help developers build more reliable, accurate, and user-friendly NLP applications by providing them with the tools to observe and improve how their models perform in real-world scenarios.\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1739554281, model='us.amazon.nova-micro-v1:0', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=346, prompt_tokens=5, total_tokens=351, completion_tokens_details=None, prompt_tokens_details=None))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 27,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import openai\n",
|
||||||
|
"client = openai.OpenAI(\n",
|
||||||
|
" api_key=LITELLM_VIRTUAL_KEY,\n",
|
||||||
|
" base_url=LITELLM_PROXY_BASE_URL\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"response = client.chat.completions.create(\n",
|
||||||
|
" model=\"us.amazon.nova-micro-v1:0\",\n",
|
||||||
|
" messages = [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"role\": \"user\",\n",
|
||||||
|
" \"content\": \"what is Langfuse?\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ],\n",
|
||||||
|
" extra_body={\n",
|
||||||
|
" \"metadata\": {\n",
|
||||||
|
" \"generation_id\": \"1234567890\",\n",
|
||||||
|
" \"trace_id\": \"567890\",\n",
|
||||||
|
" \"trace_user_id\": \"user_1234567890\",\n",
|
||||||
|
" \"tags\": [\"tag1\", \"tag2\"]\n",
|
||||||
|
" }\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"response"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## "
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "python"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
BIN
cookbook/logging_observability/litellm_proxy_langfuse.png
Normal file
After Width: | Height: | Size: 308 KiB |
|
@ -18,7 +18,7 @@ type: application
|
||||||
# This is the chart version. This version number should be incremented each time you make changes
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
# to the chart and its templates, including the app version.
|
# to the chart and its templates, including the app version.
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
version: 0.3.0
|
version: 0.4.2
|
||||||
|
|
||||||
# This is the version number of the application being deployed. This version number should be
|
# This is the version number of the application being deployed. This version number should be
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
|
|
|
@ -22,6 +22,8 @@ If `db.useStackgresOperator` is used (not yet implemented):
|
||||||
| Name | Description | Value |
|
| Name | Description | Value |
|
||||||
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
|
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
|
||||||
| `replicaCount` | The number of LiteLLM Proxy pods to be deployed | `1` |
|
| `replicaCount` | The number of LiteLLM Proxy pods to be deployed | `1` |
|
||||||
|
| `masterkeySecretName` | The name of the Kubernetes Secret that contains the Master API Key for LiteLLM. If not specified, use the generated secret name. | N/A |
|
||||||
|
| `masterkeySecretKey` | The key within the Kubernetes Secret that contains the Master API Key for LiteLLM. If not specified, use `masterkey` as the key. | N/A |
|
||||||
| `masterkey` | The Master API Key for LiteLLM. If not specified, a random key is generated. | N/A |
|
| `masterkey` | The Master API Key for LiteLLM. If not specified, a random key is generated. | N/A |
|
||||||
| `environmentSecrets` | An optional array of Secret object names. The keys and values in these secrets will be presented to the LiteLLM proxy pod as environment variables. See below for an example Secret object. | `[]` |
|
| `environmentSecrets` | An optional array of Secret object names. The keys and values in these secrets will be presented to the LiteLLM proxy pod as environment variables. See below for an example Secret object. | `[]` |
|
||||||
| `environmentConfigMaps` | An optional array of ConfigMap object names. The keys and values in these configmaps will be presented to the LiteLLM proxy pod as environment variables. See below for an example Secret object. | `[]` |
|
| `environmentConfigMaps` | An optional array of ConfigMap object names. The keys and values in these configmaps will be presented to the LiteLLM proxy pod as environment variables. See below for an example Secret object. | `[]` |
|
||||||
|
|
|
@ -78,8 +78,8 @@ spec:
|
||||||
- name: PROXY_MASTER_KEY
|
- name: PROXY_MASTER_KEY
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ include "litellm.fullname" . }}-masterkey
|
name: {{ .Values.masterkeySecretName | default (printf "%s-masterkey" (include "litellm.fullname" .)) }}
|
||||||
key: masterkey
|
key: {{ .Values.masterkeySecretKey | default "masterkey" }}
|
||||||
{{- if .Values.redis.enabled }}
|
{{- if .Values.redis.enabled }}
|
||||||
- name: REDIS_HOST
|
- name: REDIS_HOST
|
||||||
value: {{ include "litellm.redis.serviceName" . }}
|
value: {{ include "litellm.redis.serviceName" . }}
|
||||||
|
|
|
@ -48,6 +48,23 @@ spec:
|
||||||
{{- end }}
|
{{- end }}
|
||||||
- name: DISABLE_SCHEMA_UPDATE
|
- name: DISABLE_SCHEMA_UPDATE
|
||||||
value: "false" # always run the migration from the Helm PreSync hook, override the value set
|
value: "false" # always run the migration from the Helm PreSync hook, override the value set
|
||||||
|
{{- with .Values.volumeMounts }}
|
||||||
|
volumeMounts:
|
||||||
|
{{- toYaml . | nindent 12 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.volumes }}
|
||||||
|
volumes:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
restartPolicy: OnFailure
|
restartPolicy: OnFailure
|
||||||
|
{{- with .Values.affinity }}
|
||||||
|
affinity:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
{{- with .Values.tolerations }}
|
||||||
|
tolerations:
|
||||||
|
{{- toYaml . | nindent 8 }}
|
||||||
|
{{- end }}
|
||||||
|
ttlSecondsAfterFinished: {{ .Values.migrationJob.ttlSecondsAfterFinished }}
|
||||||
backoffLimit: {{ .Values.migrationJob.backoffLimit }}
|
backoffLimit: {{ .Values.migrationJob.backoffLimit }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
{{- if not .Values.masterkeySecretName }}
|
||||||
{{ $masterkey := (.Values.masterkey | default (randAlphaNum 17)) }}
|
{{ $masterkey := (.Values.masterkey | default (randAlphaNum 17)) }}
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Secret
|
kind: Secret
|
||||||
|
@ -6,3 +7,4 @@ metadata:
|
||||||
data:
|
data:
|
||||||
masterkey: {{ $masterkey | b64enc }}
|
masterkey: {{ $masterkey | b64enc }}
|
||||||
type: Opaque
|
type: Opaque
|
||||||
|
{{- end }}
|
||||||
|
|
82
deploy/charts/litellm-helm/tests/deployment_tests.yaml
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
suite: test deployment
|
||||||
|
templates:
|
||||||
|
- deployment.yaml
|
||||||
|
- configmap-litellm.yaml
|
||||||
|
tests:
|
||||||
|
- it: should work
|
||||||
|
template: deployment.yaml
|
||||||
|
set:
|
||||||
|
image.tag: test
|
||||||
|
asserts:
|
||||||
|
- isKind:
|
||||||
|
of: Deployment
|
||||||
|
- matchRegex:
|
||||||
|
path: metadata.name
|
||||||
|
pattern: -litellm$
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.containers[0].image
|
||||||
|
value: ghcr.io/berriai/litellm-database:test
|
||||||
|
- it: should work with tolerations
|
||||||
|
template: deployment.yaml
|
||||||
|
set:
|
||||||
|
tolerations:
|
||||||
|
- key: node-role.kubernetes.io/master
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
asserts:
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.tolerations[0].key
|
||||||
|
value: node-role.kubernetes.io/master
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.tolerations[0].operator
|
||||||
|
value: Exists
|
||||||
|
- it: should work with affinity
|
||||||
|
template: deployment.yaml
|
||||||
|
set:
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: topology.kubernetes.io/zone
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- antarctica-east1
|
||||||
|
asserts:
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key
|
||||||
|
value: topology.kubernetes.io/zone
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator
|
||||||
|
value: In
|
||||||
|
- equal:
|
||||||
|
path: spec.template.spec.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0]
|
||||||
|
value: antarctica-east1
|
||||||
|
- it: should work without masterkeySecretName or masterkeySecretKey
|
||||||
|
template: deployment.yaml
|
||||||
|
set:
|
||||||
|
masterkeySecretName: ""
|
||||||
|
masterkeySecretKey: ""
|
||||||
|
asserts:
|
||||||
|
- contains:
|
||||||
|
path: spec.template.spec.containers[0].env
|
||||||
|
content:
|
||||||
|
name: PROXY_MASTER_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: RELEASE-NAME-litellm-masterkey
|
||||||
|
key: masterkey
|
||||||
|
- it: should work with masterkeySecretName and masterkeySecretKey
|
||||||
|
template: deployment.yaml
|
||||||
|
set:
|
||||||
|
masterkeySecretName: my-secret
|
||||||
|
masterkeySecretKey: my-key
|
||||||
|
asserts:
|
||||||
|
- contains:
|
||||||
|
path: spec.template.spec.containers[0].env
|
||||||
|
content:
|
||||||
|
name: PROXY_MASTER_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: my-secret
|
||||||
|
key: my-key
|
18
deploy/charts/litellm-helm/tests/masterkey-secret_tests.yaml
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
suite: test masterkey secret
|
||||||
|
templates:
|
||||||
|
- secret-masterkey.yaml
|
||||||
|
tests:
|
||||||
|
- it: should create a secret if masterkeySecretName is not set
|
||||||
|
template: secret-masterkey.yaml
|
||||||
|
set:
|
||||||
|
masterkeySecretName: ""
|
||||||
|
asserts:
|
||||||
|
- isKind:
|
||||||
|
of: Secret
|
||||||
|
- it: should not create a secret if masterkeySecretName is set
|
||||||
|
template: secret-masterkey.yaml
|
||||||
|
set:
|
||||||
|
masterkeySecretName: my-secret
|
||||||
|
asserts:
|
||||||
|
- hasDocuments:
|
||||||
|
count: 0
|
|
@ -75,6 +75,12 @@ ingress:
|
||||||
|
|
||||||
# masterkey: changeit
|
# masterkey: changeit
|
||||||
|
|
||||||
|
# if set, use this secret for the master key; otherwise, autogenerate a new one
|
||||||
|
masterkeySecretName: ""
|
||||||
|
|
||||||
|
# if set, use this secret key for the master key; otherwise, use the default key
|
||||||
|
masterkeySecretKey: ""
|
||||||
|
|
||||||
# The elements within proxy_config are rendered as config.yaml for the proxy
|
# The elements within proxy_config are rendered as config.yaml for the proxy
|
||||||
# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
|
# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
|
||||||
# Reference: https://docs.litellm.ai/docs/proxy/configs
|
# Reference: https://docs.litellm.ai/docs/proxy/configs
|
||||||
|
@ -187,6 +193,7 @@ migrationJob:
|
||||||
backoffLimit: 4 # Backoff limit for Job restarts
|
backoffLimit: 4 # Backoff limit for Job restarts
|
||||||
disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
|
disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
|
||||||
annotations: {}
|
annotations: {}
|
||||||
|
ttlSecondsAfterFinished: 120
|
||||||
|
|
||||||
# Additional environment variables to be added to the deployment
|
# Additional environment variables to be added to the deployment
|
||||||
envVars: {
|
envVars: {
|
||||||
|
|
|
@ -20,10 +20,18 @@ services:
|
||||||
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
|
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
|
||||||
env_file:
|
env_file:
|
||||||
- .env # Load local .env file
|
- .env # Load local .env file
|
||||||
|
depends_on:
|
||||||
|
- db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
|
||||||
|
healthcheck: # Defines the health check configuration for the container
|
||||||
|
test: [ "CMD", "curl", "-f", "http://localhost:4000/health/liveliness || exit 1" ] # Command to execute for health check
|
||||||
|
interval: 30s # Perform health check every 30 seconds
|
||||||
|
timeout: 10s # Health check command times out after 10 seconds
|
||||||
|
retries: 3 # Retry up to 3 times if health check fails
|
||||||
|
start_period: 40s # Wait 40 seconds after container start before beginning health checks
|
||||||
|
|
||||||
|
|
||||||
db:
|
db:
|
||||||
image: postgres
|
image: postgres:16
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_DB: litellm
|
POSTGRES_DB: litellm
|
||||||
|
@ -31,6 +39,8 @@ services:
|
||||||
POSTGRES_PASSWORD: dbpassword9090
|
POSTGRES_PASSWORD: dbpassword9090
|
||||||
ports:
|
ports:
|
||||||
- "5432:5432"
|
- "5432:5432"
|
||||||
|
volumes:
|
||||||
|
- postgres_data:/var/lib/postgresql/data # Persists Postgres data across container restarts
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
|
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
|
||||||
interval: 1s
|
interval: 1s
|
||||||
|
@ -53,6 +63,8 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
prometheus_data:
|
prometheus_data:
|
||||||
driver: local
|
driver: local
|
||||||
|
postgres_data:
|
||||||
|
name: litellm_postgres_data # Named volume for Postgres data persistence
|
||||||
|
|
||||||
|
|
||||||
# ...rest of your docker-compose config if any
|
# ...rest of your docker-compose config if any
|
||||||
|
|
|
@ -11,9 +11,7 @@ FROM $LITELLM_BUILD_IMAGE AS builder
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install build dependencies
|
# Install build dependencies
|
||||||
RUN apk update && \
|
RUN apk add --no-cache gcc python3-dev musl-dev
|
||||||
apk add --no-cache gcc python3-dev musl-dev && \
|
|
||||||
rm -rf /var/cache/apk/*
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip && \
|
RUN pip install --upgrade pip && \
|
||||||
pip install build
|
pip install build
|
||||||
|
|
92
docs/my-website/docs/anthropic_unified.md
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# /v1/messages [BETA]
|
||||||
|
|
||||||
|
LiteLLM provides a BETA endpoint in the spec of Anthropic's `/v1/messages` endpoint.
|
||||||
|
|
||||||
|
This currently just supports the Anthropic API.
|
||||||
|
|
||||||
|
| Feature | Supported | Notes |
|
||||||
|
|-------|-------|-------|
|
||||||
|
| Cost Tracking | ✅ | |
|
||||||
|
| Logging | ✅ | works across all integrations |
|
||||||
|
| End-user Tracking | ✅ | |
|
||||||
|
| Streaming | ✅ | |
|
||||||
|
| Fallbacks | ✅ | between anthropic models |
|
||||||
|
| Loadbalancing | ✅ | between anthropic models |
|
||||||
|
|
||||||
|
Planned improvement:
|
||||||
|
- Vertex AI Anthropic support
|
||||||
|
- Bedrock Anthropic support
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem label="PROXY" value="proxy">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: anthropic-claude
|
||||||
|
litellm_params:
|
||||||
|
model: claude-3-7-sonnet-latest
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/messages' \
|
||||||
|
-H 'content-type: application/json' \
|
||||||
|
-H 'x-api-key: $LITELLM_API_KEY' \
|
||||||
|
-H 'anthropic-version: 2023-06-01' \
|
||||||
|
-d '{
|
||||||
|
"model": "anthropic-claude",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "List 5 important events in the XIX century"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_tokens": 4096
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm.llms.anthropic.experimental_pass_through.messages.handler import anthropic_messages
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set env
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "my-api-key"
|
||||||
|
|
||||||
|
messages = [{"role": "user", "content": "Hello, can you tell me a short joke?"}]
|
||||||
|
|
||||||
|
# Call the handler
|
||||||
|
async def call():
|
||||||
|
response = await anthropic_messages(
|
||||||
|
messages=messages,
|
||||||
|
api_key=api_key,
|
||||||
|
model="claude-3-haiku-20240307",
|
||||||
|
max_tokens=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
asyncio.run(call())
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Assistants API
|
# /assistants
|
||||||
|
|
||||||
Covers Threads, Messages, Assistants.
|
Covers Threads, Messages, Assistants.
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# [BETA] Batches API
|
# /batches
|
||||||
|
|
||||||
Covers Batches, Files
|
Covers Batches, Files
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,13 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Prompt Caching
|
# Prompt Caching
|
||||||
|
|
||||||
For OpenAI + Anthropic + Deepseek, LiteLLM follows the OpenAI prompt caching usage object format:
|
Supported Providers:
|
||||||
|
- OpenAI (`openai/`)
|
||||||
|
- Anthropic API (`anthropic/`)
|
||||||
|
- Bedrock (`bedrock/`, `bedrock/invoke/`, `bedrock/converse`) ([All models bedrock supports prompt caching on](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html))
|
||||||
|
- Deepseek API (`deepseek/`)
|
||||||
|
|
||||||
|
For the supported providers, LiteLLM follows the OpenAI prompt caching usage object format:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
"usage": {
|
"usage": {
|
||||||
|
|
|
@ -46,7 +46,7 @@ from litellm import completion
|
||||||
fallback_dict = {"gpt-3.5-turbo": "gpt-3.5-turbo-16k"}
|
fallback_dict = {"gpt-3.5-turbo": "gpt-3.5-turbo-16k"}
|
||||||
messages = [{"content": "how does a court case get to the Supreme Court?" * 500, "role": "user"}]
|
messages = [{"content": "how does a court case get to the Supreme Court?" * 500, "role": "user"}]
|
||||||
|
|
||||||
completion(model="gpt-3.5-turbo", messages=messages, context_window_fallback_dict=ctx_window_fallback_dict)
|
completion(model="gpt-3.5-turbo", messages=messages, context_window_fallback_dict=fallback_dict)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Fallbacks - Switch Models/API Keys/API Bases (SDK)
|
### Fallbacks - Switch Models/API Keys/API Bases (SDK)
|
||||||
|
|
|
@ -190,3 +190,137 @@ Expected Response
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Explicitly specify image type
|
||||||
|
|
||||||
|
If you have images without a mime-type, or if litellm is incorrectly inferring the mime type of your image (e.g. calling `gs://` url's with vertex ai), you can set this explicity via the `format` param.
|
||||||
|
|
||||||
|
```python
|
||||||
|
"image_url": {
|
||||||
|
"url": "gs://my-gs-image",
|
||||||
|
"format": "image/jpeg"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
LiteLLM will use this for any API endpoint, which supports specifying mime-type (e.g. anthropic/bedrock/vertex ai).
|
||||||
|
|
||||||
|
For others (e.g. openai), it will be ignored.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem label="SDK" value="sdk">
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
|
||||||
|
|
||||||
|
# openai call
|
||||||
|
response = completion(
|
||||||
|
model = "claude-3-7-sonnet-latest",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What’s in this image?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
|
||||||
|
"format": "image/jpeg"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem label="PROXY" value="proxy">
|
||||||
|
|
||||||
|
1. Define vision models on config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4-vision-preview # OpenAI gpt-4-vision-preview
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4-vision-preview
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
- model_name: llava-hf # Custom OpenAI compatible model
|
||||||
|
litellm_params:
|
||||||
|
model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
|
||||||
|
api_base: http://localhost:8000
|
||||||
|
api_key: fake-key
|
||||||
|
model_info:
|
||||||
|
supports_vision: True # set supports_vision to True so /model/info returns this attribute as True
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run proxy server
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it using the OpenAI Python SDK
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="sk-1234", # your litellm proxy api key
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model = "gpt-4-vision-preview", # use model="llava-hf" to test your custom OpenAI endpoint
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "What’s in this image?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
|
||||||
|
"format": "image/jpeg"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Spec
|
||||||
|
|
||||||
|
```
|
||||||
|
"image_url": str
|
||||||
|
|
||||||
|
OR
|
||||||
|
|
||||||
|
"image_url": {
|
||||||
|
"url": "url OR base64 encoded str",
|
||||||
|
"detail": "openai-only param",
|
||||||
|
"format": "specify mime-type of image"
|
||||||
|
}
|
||||||
|
```
|
|
@ -46,7 +46,7 @@ For security inquiries, please contact us at support@berri.ai
|
||||||
|-------------------|-------------------------------------------------------------------------------------------------|
|
|-------------------|-------------------------------------------------------------------------------------------------|
|
||||||
| SOC 2 Type I | Certified. Report available upon request on Enterprise plan. |
|
| SOC 2 Type I | Certified. Report available upon request on Enterprise plan. |
|
||||||
| SOC 2 Type II | In progress. Certificate available by April 15th, 2025 |
|
| SOC 2 Type II | In progress. Certificate available by April 15th, 2025 |
|
||||||
| ISO27001 | In progress. Certificate available by February 7th, 2025 |
|
| ISO 27001 | Certified. Report available upon request on Enterprise |
|
||||||
|
|
||||||
|
|
||||||
## Supported Data Regions for LiteLLM Cloud
|
## Supported Data Regions for LiteLLM Cloud
|
||||||
|
@ -137,7 +137,7 @@ Point of contact email address for general security-related questions: krrish@be
|
||||||
Has the Vendor been audited / certified?
|
Has the Vendor been audited / certified?
|
||||||
- SOC 2 Type I. Certified. Report available upon request on Enterprise plan.
|
- SOC 2 Type I. Certified. Report available upon request on Enterprise plan.
|
||||||
- SOC 2 Type II. In progress. Certificate available by April 15th, 2025.
|
- SOC 2 Type II. In progress. Certificate available by April 15th, 2025.
|
||||||
- ISO27001. In progress. Certificate available by February 7th, 2025.
|
- ISO 27001. Certified. Report available upon request on Enterprise plan.
|
||||||
|
|
||||||
Has an information security management system been implemented?
|
Has an information security management system been implemented?
|
||||||
- Yes - [CodeQL](https://codeql.github.com/) and a comprehensive ISMS covering multiple security domains.
|
- Yes - [CodeQL](https://codeql.github.com/) and a comprehensive ISMS covering multiple security domains.
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Embeddings
|
# /embeddings
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
```python
|
```python
|
||||||
|
|
106
docs/my-website/docs/extras/contributing_code.md
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
# Contributing Code
|
||||||
|
|
||||||
|
## **Checklist before submitting a PR**
|
||||||
|
|
||||||
|
Here are the core requirements for any PR submitted to LiteLLM
|
||||||
|
|
||||||
|
|
||||||
|
- [ ] Add testing, **Adding at least 1 test is a hard requirement** - [see details](#2-adding-testing-to-your-pr)
|
||||||
|
- [ ] Ensure your PR passes the following tests:
|
||||||
|
- [ ] [Unit Tests](#3-running-unit-tests)
|
||||||
|
- [ ] [Formatting / Linting Tests](#35-running-linting-tests)
|
||||||
|
- [ ] Keep scope as isolated as possible. As a general rule, your changes should address 1 specific problem at a time
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
## 1. Setup your local dev environment
|
||||||
|
|
||||||
|
|
||||||
|
Here's how to modify the repo locally:
|
||||||
|
|
||||||
|
Step 1: Clone the repo
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git clone https://github.com/BerriAI/litellm.git
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 2: Install dev dependencies:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
poetry install --with dev --extras proxy
|
||||||
|
```
|
||||||
|
|
||||||
|
That's it, your local dev environment is ready!
|
||||||
|
|
||||||
|
## 2. Adding Testing to your PR
|
||||||
|
|
||||||
|
- Add your test to the [`tests/litellm/` directory](https://github.com/BerriAI/litellm/tree/main/tests/litellm)
|
||||||
|
|
||||||
|
- This directory 1:1 maps the the `litellm/` directory, and can only contain mocked tests.
|
||||||
|
- Do not add real llm api calls to this directory.
|
||||||
|
|
||||||
|
### 2.1 File Naming Convention for `tests/litellm/`
|
||||||
|
|
||||||
|
The `tests/litellm/` directory follows the same directory structure as `litellm/`.
|
||||||
|
|
||||||
|
- `litellm/proxy/test_caching_routes.py` maps to `litellm/proxy/caching_routes.py`
|
||||||
|
- `test_{filename}.py` maps to `litellm/{filename}.py`
|
||||||
|
|
||||||
|
## 3. Running Unit Tests
|
||||||
|
|
||||||
|
run the following command on the root of the litellm directory
|
||||||
|
|
||||||
|
```shell
|
||||||
|
make test-unit
|
||||||
|
```
|
||||||
|
|
||||||
|
## 3.5 Running Linting Tests
|
||||||
|
|
||||||
|
run the following command on the root of the litellm directory
|
||||||
|
|
||||||
|
```shell
|
||||||
|
make lint
|
||||||
|
```
|
||||||
|
|
||||||
|
LiteLLM uses mypy for linting. On ci/cd we also run `black` for formatting.
|
||||||
|
|
||||||
|
## 4. Submit a PR with your changes!
|
||||||
|
|
||||||
|
- push your fork to your GitHub repo
|
||||||
|
- submit a PR from there
|
||||||
|
|
||||||
|
|
||||||
|
## Advanced
|
||||||
|
### Building LiteLLM Docker Image
|
||||||
|
|
||||||
|
Some people might want to build the LiteLLM docker image themselves. Follow these instructions if you want to build / run the LiteLLM Docker Image yourself.
|
||||||
|
|
||||||
|
Step 1: Clone the repo
|
||||||
|
|
||||||
|
```shell
|
||||||
|
git clone https://github.com/BerriAI/litellm.git
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 2: Build the Docker Image
|
||||||
|
|
||||||
|
Build using Dockerfile.non_root
|
||||||
|
|
||||||
|
```shell
|
||||||
|
docker build -f docker/Dockerfile.non_root -t litellm_test_image .
|
||||||
|
```
|
||||||
|
|
||||||
|
Step 3: Run the Docker Image
|
||||||
|
|
||||||
|
Make sure config.yaml is present in the root directory. This is your litellm proxy config file.
|
||||||
|
|
||||||
|
```shell
|
||||||
|
docker run \
|
||||||
|
-v $(pwd)/proxy_config.yaml:/app/config.yaml \
|
||||||
|
-e DATABASE_URL="postgresql://xxxxxxxx" \
|
||||||
|
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||||
|
-p 4000:4000 \
|
||||||
|
litellm_test_image \
|
||||||
|
--config /app/config.yaml --detailed_debug
|
||||||
|
```
|
|
@ -2,7 +2,7 @@
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
|
|
||||||
# Files API
|
# /files
|
||||||
|
|
||||||
Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
|
Files are used to upload documents that can be used with features like Assistants, Fine-tuning, and Batch API.
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# [Beta] Fine-tuning API
|
# /fine_tuning
|
||||||
|
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Moderation
|
# /moderations
|
||||||
|
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
|
|
|
@ -19,6 +19,7 @@ Make an account on [Arize AI](https://app.arize.com/auth/login)
|
||||||
## Quick Start
|
## Quick Start
|
||||||
Use just 2 lines of code, to instantly log your responses **across all providers** with arize
|
Use just 2 lines of code, to instantly log your responses **across all providers** with arize
|
||||||
|
|
||||||
|
You can also use the instrumentor option instead of the callback, which you can find [here](https://docs.arize.com/arize/llm-tracing/tracing-integrations-auto/litellm).
|
||||||
|
|
||||||
```python
|
```python
|
||||||
litellm.callbacks = ["arize"]
|
litellm.callbacks = ["arize"]
|
||||||
|
|
|
@ -78,7 +78,10 @@ Following are the allowed fields in metadata, their types, and their description
|
||||||
* `context: Optional[Union[dict, str]]` - This is the context used as information for the prompt. For RAG applications, this is the "retrieved" data. You may log context as a string or as an object (dictionary).
|
* `context: Optional[Union[dict, str]]` - This is the context used as information for the prompt. For RAG applications, this is the "retrieved" data. You may log context as a string or as an object (dictionary).
|
||||||
* `expected_response: Optional[str]` - This is the reference response to compare against for evaluation purposes. This is useful for segmenting inference calls by expected response.
|
* `expected_response: Optional[str]` - This is the reference response to compare against for evaluation purposes. This is useful for segmenting inference calls by expected response.
|
||||||
* `user_query: Optional[str]` - This is the user's query. For conversational applications, this is the user's last message.
|
* `user_query: Optional[str]` - This is the user's query. For conversational applications, this is the user's last message.
|
||||||
|
* `tags: Optional[list]` - This is a list of tags. This is useful for segmenting inference calls by tags.
|
||||||
|
* `user_feedback: Optional[str]` - The end user’s feedback.
|
||||||
|
* `model_options: Optional[dict]` - This is a dictionary of model options. This is useful for getting insights into how model behavior affects your end users.
|
||||||
|
* `custom_attributes: Optional[dict]` - This is a dictionary of custom attributes. This is useful for additional information about the inference.
|
||||||
|
|
||||||
## Using a self hosted deployment of Athina
|
## Using a self hosted deployment of Athina
|
||||||
|
|
||||||
|
|
75
docs/my-website/docs/observability/phoenix_integration.md
Normal file
|
@ -0,0 +1,75 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
# Phoenix OSS
|
||||||
|
|
||||||
|
Open source tracing and evaluation platform
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
|
||||||
|
This is community maintained, Please make an issue if you run into a bug
|
||||||
|
https://github.com/BerriAI/litellm
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
|
||||||
|
## Pre-Requisites
|
||||||
|
Make an account on [Phoenix OSS](https://phoenix.arize.com)
|
||||||
|
OR self-host your own instance of [Phoenix](https://docs.arize.com/phoenix/deployment)
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
Use just 2 lines of code, to instantly log your responses **across all providers** with Phoenix
|
||||||
|
|
||||||
|
You can also use the instrumentor option instead of the callback, which you can find [here](https://docs.arize.com/phoenix/tracing/integrations-tracing/litellm).
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm.callbacks = ["arize_phoenix"]
|
||||||
|
```
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["PHOENIX_API_KEY"] = "" # Necessary only using Phoenix Cloud
|
||||||
|
os.environ["PHOENIX_COLLECTOR_HTTP_ENDPOINT"] = "" # The URL of your Phoenix OSS instance
|
||||||
|
# This defaults to https://app.phoenix.arize.com/v1/traces for Phoenix Cloud
|
||||||
|
|
||||||
|
# LLM API Keys
|
||||||
|
os.environ['OPENAI_API_KEY']=""
|
||||||
|
|
||||||
|
# set arize as a callback, litellm will send the data to arize
|
||||||
|
litellm.callbacks = ["phoenix"]
|
||||||
|
|
||||||
|
# openai call
|
||||||
|
response = litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "Hi 👋 - i'm openai"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Using with LiteLLM Proxy
|
||||||
|
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
callbacks: ["arize_phoenix"]
|
||||||
|
|
||||||
|
environment_variables:
|
||||||
|
PHOENIX_API_KEY: "d0*****"
|
||||||
|
PHOENIX_COLLECTOR_ENDPOINT: "https://app.phoenix.arize.com/v1/traces" # OPTIONAL, for setting the GRPC endpoint
|
||||||
|
PHOENIX_COLLECTOR_HTTP_ENDPOINT: "https://app.phoenix.arize.com/v1/traces" # OPTIONAL, for setting the HTTP endpoint
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support & Talk to Founders
|
||||||
|
|
||||||
|
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
|
||||||
|
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
|
||||||
|
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
|
||||||
|
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
|
95
docs/my-website/docs/pass_through/openai_passthrough.md
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
# OpenAI Passthrough
|
||||||
|
|
||||||
|
Pass-through endpoints for `/openai`
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
| Feature | Supported | Notes |
|
||||||
|
|-------|-------|-------|
|
||||||
|
| Cost Tracking | ❌ | Not supported |
|
||||||
|
| Logging | ✅ | Works across all integrations |
|
||||||
|
| Streaming | ✅ | Fully supported |
|
||||||
|
|
||||||
|
### When to use this?
|
||||||
|
|
||||||
|
- For 90% of your use cases, you should use the [native LiteLLM OpenAI Integration](https://docs.litellm.ai/docs/providers/openai) (`/chat/completions`, `/embeddings`, `/completions`, `/images`, `/batches`, etc.)
|
||||||
|
- Use this passthrough to call less popular or newer OpenAI endpoints that LiteLLM doesn't fully support yet, such as `/assistants`, `/threads`, `/vector_stores`
|
||||||
|
|
||||||
|
Simply replace `https://api.openai.com` with `LITELLM_PROXY_BASE_URL/openai`
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Assistants API
|
||||||
|
|
||||||
|
#### Create OpenAI Client
|
||||||
|
|
||||||
|
Make sure you do the following:
|
||||||
|
- Point `base_url` to your `LITELLM_PROXY_BASE_URL/openai`
|
||||||
|
- Use your `LITELLM_API_KEY` as the `api_key`
|
||||||
|
|
||||||
|
```python
|
||||||
|
import openai
|
||||||
|
|
||||||
|
client = openai.OpenAI(
|
||||||
|
base_url="http://0.0.0.0:4000/openai", # <your-proxy-url>/openai
|
||||||
|
api_key="sk-anything" # <your-proxy-api-key>
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Create an Assistant
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Create an assistant
|
||||||
|
assistant = client.beta.assistants.create(
|
||||||
|
name="Math Tutor",
|
||||||
|
instructions="You are a math tutor. Help solve equations.",
|
||||||
|
model="gpt-4o",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Create a Thread
|
||||||
|
```python
|
||||||
|
# Create a thread
|
||||||
|
thread = client.beta.threads.create()
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Add a Message to the Thread
|
||||||
|
```python
|
||||||
|
# Add a message
|
||||||
|
message = client.beta.threads.messages.create(
|
||||||
|
thread_id=thread.id,
|
||||||
|
role="user",
|
||||||
|
content="Solve 3x + 11 = 14",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Run the Assistant
|
||||||
|
```python
|
||||||
|
# Create a run to get the assistant's response
|
||||||
|
run = client.beta.threads.runs.create(
|
||||||
|
thread_id=thread.id,
|
||||||
|
assistant_id=assistant.id,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check run status
|
||||||
|
run_status = client.beta.threads.runs.retrieve(
|
||||||
|
thread_id=thread.id,
|
||||||
|
run_id=run.id
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Retrieve Messages
|
||||||
|
```python
|
||||||
|
# List messages after the run completes
|
||||||
|
messages = client.beta.threads.messages.list(
|
||||||
|
thread_id=thread.id
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Delete the Assistant
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Delete the assistant when done
|
||||||
|
client.beta.assistants.delete(assistant.id)
|
||||||
|
```
|
||||||
|
|
14
docs/my-website/docs/projects/Elroy.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# 🐕 Elroy
|
||||||
|
|
||||||
|
Elroy is a scriptable AI assistant that remembers and sets goals.
|
||||||
|
|
||||||
|
Interact through the command line, share memories via MCP, or build your own tools using Python.
|
||||||
|
|
||||||
|
|
||||||
|
[![Static Badge][github-shield]][github-url]
|
||||||
|
[![Discord][discord-shield]][discord-url]
|
||||||
|
|
||||||
|
[github-shield]: https://img.shields.io/badge/Github-repo-white?logo=github
|
||||||
|
[github-url]: https://github.com/elroy-bot/elroy
|
||||||
|
[discord-shield]:https://img.shields.io/discord/1200684659277832293?color=7289DA&label=Discord&logo=discord&logoColor=white
|
||||||
|
[discord-url]: https://discord.gg/5PJUY4eMce
|
5
docs/my-website/docs/projects/PDL.md
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
PDL - A YAML-based approach to prompt programming
|
||||||
|
|
||||||
|
Github: https://github.com/IBM/prompt-declaration-language
|
||||||
|
|
||||||
|
PDL is a declarative approach to prompt programming, helping users to accumulate messages implicitly, with support for model chaining and tool use.
|
9
docs/my-website/docs/projects/pgai.md
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
# pgai
|
||||||
|
|
||||||
|
[pgai](https://github.com/timescale/pgai) is a suite of tools to develop RAG, semantic search, and other AI applications more easily with PostgreSQL.
|
||||||
|
|
||||||
|
If you don't know what pgai is yet check out the [README](https://github.com/timescale/pgai)!
|
||||||
|
|
||||||
|
If you're already familiar with pgai, you can find litellm specific docs here:
|
||||||
|
- Litellm for [model calling](https://github.com/timescale/pgai/blob/main/docs/model_calling/litellm.md) in pgai
|
||||||
|
- Use the [litellm provider](https://github.com/timescale/pgai/blob/main/docs/vectorizer/api-reference.md#aiembedding_litellm) to automatically create embeddings for your data via the pgai vectorizer.
|
|
@ -819,6 +819,114 @@ resp = litellm.completion(
|
||||||
print(f"\nResponse: {resp}")
|
print(f"\nResponse: {resp}")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Usage - Thinking / `reasoning_content`
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: claude-3-7-sonnet-20250219
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/claude-3-7-sonnet-20250219
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```python
|
||||||
|
ModelResponse(
|
||||||
|
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
|
||||||
|
created=1740470510,
|
||||||
|
model='claude-3-7-sonnet-20250219',
|
||||||
|
object='chat.completion',
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="The capital of France is Paris.",
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
provider_specific_fields={
|
||||||
|
'citations': None,
|
||||||
|
'thinking_blocks': [
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6...'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
thinking_blocks=[
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6AGB...'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=68,
|
||||||
|
prompt_tokens=42,
|
||||||
|
total_tokens=110,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None,
|
||||||
|
cached_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
image_tokens=None
|
||||||
|
),
|
||||||
|
cache_creation_input_tokens=0,
|
||||||
|
cache_read_input_tokens=0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
## **Passing Extra Headers to Anthropic API**
|
## **Passing Extra Headers to Anthropic API**
|
||||||
|
|
||||||
Pass `extra_headers: dict` to `litellm.completion`
|
Pass `extra_headers: dict` to `litellm.completion`
|
||||||
|
@ -1135,3 +1243,4 @@ curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
|
@ -23,14 +23,16 @@ import os
|
||||||
|
|
||||||
os.environ['CEREBRAS_API_KEY'] = ""
|
os.environ['CEREBRAS_API_KEY'] = ""
|
||||||
response = completion(
|
response = completion(
|
||||||
model="cerebras/meta/llama3-70b-instruct",
|
model="cerebras/llama3-70b-instruct",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
"content": "What's the weather like in Boston today in Fahrenheit? (Write in JSON)",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
|
|
||||||
|
# The prompt should include JSON if 'json_object' is selected; otherwise, you will get error code 400.
|
||||||
response_format={ "type": "json_object" },
|
response_format={ "type": "json_object" },
|
||||||
seed=123,
|
seed=123,
|
||||||
stop=["\n\n"],
|
stop=["\n\n"],
|
||||||
|
@ -50,15 +52,17 @@ import os
|
||||||
|
|
||||||
os.environ['CEREBRAS_API_KEY'] = ""
|
os.environ['CEREBRAS_API_KEY'] = ""
|
||||||
response = completion(
|
response = completion(
|
||||||
model="cerebras/meta/llama3-70b-instruct",
|
model="cerebras/llama3-70b-instruct",
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What's the weather like in Boston today in Fahrenheit?",
|
"content": "What's the weather like in Boston today in Fahrenheit? (Write in JSON)",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
stream=True,
|
stream=True,
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
|
|
||||||
|
# The prompt should include JSON if 'json_object' is selected; otherwise, you will get error code 400.
|
||||||
response_format={ "type": "json_object" },
|
response_format={ "type": "json_object" },
|
||||||
seed=123,
|
seed=123,
|
||||||
stop=["\n\n"],
|
stop=["\n\n"],
|
||||||
|
|
|
@ -108,7 +108,7 @@ response = embedding(
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
|
LiteLLM supports the v1 and v2 clients for Cohere rerank. By default, the `rerank` endpoint uses the v2 client, but you can specify the v1 client by explicitly calling `v1/rerank`
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="LiteLLM SDK Usage">
|
<TabItem value="sdk" label="LiteLLM SDK Usage">
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Infinity
|
# Infinity
|
||||||
|
|
||||||
| Property | Details |
|
| Property | Details |
|
||||||
|
@ -12,6 +15,9 @@
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import rerank
|
from litellm import rerank
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["INFINITY_API_BASE"] = "http://localhost:8080"
|
||||||
|
|
||||||
response = rerank(
|
response = rerank(
|
||||||
model="infinity/rerank",
|
model="infinity/rerank",
|
||||||
|
@ -65,3 +71,114 @@ curl http://0.0.0.0:4000/rerank \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Supported Cohere Rerank API Params
|
||||||
|
|
||||||
|
| Param | Type | Description |
|
||||||
|
|-------|-------|-------|
|
||||||
|
| `query` | `str` | The query to rerank the documents against |
|
||||||
|
| `documents` | `list[str]` | The documents to rerank |
|
||||||
|
| `top_n` | `int` | The number of documents to return |
|
||||||
|
| `return_documents` | `bool` | Whether to return the documents in the response |
|
||||||
|
|
||||||
|
### Usage - Return Documents
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = rerank(
|
||||||
|
model="infinity/rerank",
|
||||||
|
query="What is the capital of France?",
|
||||||
|
documents=["Paris", "London", "Berlin", "Madrid"],
|
||||||
|
return_documents=True,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/rerank \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "custom-infinity-rerank",
|
||||||
|
"query": "What is the capital of France?",
|
||||||
|
"documents": [
|
||||||
|
"Paris",
|
||||||
|
"London",
|
||||||
|
"Berlin",
|
||||||
|
"Madrid"
|
||||||
|
],
|
||||||
|
"return_documents": True,
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Pass Provider-specific Params
|
||||||
|
|
||||||
|
Any unmapped params will be passed to the provider as-is.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import rerank
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["INFINITY_API_BASE"] = "http://localhost:8080"
|
||||||
|
|
||||||
|
response = rerank(
|
||||||
|
model="infinity/rerank",
|
||||||
|
query="What is the capital of France?",
|
||||||
|
documents=["Paris", "London", "Berlin", "Madrid"],
|
||||||
|
raw_scores=True, # 👈 PROVIDER-SPECIFIC PARAM
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: custom-infinity-rerank
|
||||||
|
litellm_params:
|
||||||
|
model: infinity/rerank
|
||||||
|
api_base: https://localhost:8080
|
||||||
|
raw_scores: True # 👈 EITHER SET PROVIDER-SPECIFIC PARAMS HERE OR IN REQUEST BODY
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start litellm
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/rerank \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "custom-infinity-rerank",
|
||||||
|
"query": "What is the capital of the United States?",
|
||||||
|
"documents": [
|
||||||
|
"Carson City is the capital city of the American state of Nevada.",
|
||||||
|
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean. Its capital is Saipan.",
|
||||||
|
"Washington, D.C. is the capital of the United States.",
|
||||||
|
"Capital punishment has existed in the United States since before it was a country."
|
||||||
|
],
|
||||||
|
"raw_scores": True # 👈 PROVIDER-SPECIFIC PARAM
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
</Tabs>
|
||||||
|
|
|
@ -3,13 +3,15 @@ import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# LiteLLM Proxy (LLM Gateway)
|
# LiteLLM Proxy (LLM Gateway)
|
||||||
|
|
||||||
:::tip
|
|
||||||
|
|
||||||
[LiteLLM Providers a **self hosted** proxy server (AI Gateway)](../simple_proxy) to call all the LLMs in the OpenAI format
|
| Property | Details |
|
||||||
|
|-------|-------|
|
||||||
|
| Description | LiteLLM Proxy is an OpenAI-compatible gateway that allows you to interact with multiple LLM providers through a unified API. Simply use the `litellm_proxy/` prefix before the model name to route your requests through the proxy. |
|
||||||
|
| Provider Route on LiteLLM | `litellm_proxy/` (add this prefix to the model name, to route any requests to litellm_proxy - e.g. `litellm_proxy/your-model-name`) |
|
||||||
|
| Setup LiteLLM Gateway | [LiteLLM Gateway ↗](../simple_proxy) |
|
||||||
|
| Supported Endpoints |`/chat/completions`, `/completions`, `/embeddings`, `/audio/speech`, `/audio/transcriptions`, `/images`, `/rerank` |
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
**[LiteLLM Proxy](../simple_proxy) is OpenAI compatible**, you just need the `litellm_proxy/` prefix before the model
|
|
||||||
|
|
||||||
## Required Variables
|
## Required Variables
|
||||||
|
|
||||||
|
@ -83,7 +85,76 @@ for chunk in response:
|
||||||
print(chunk)
|
print(chunk)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Embeddings
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.embedding(
|
||||||
|
model="litellm_proxy/your-embedding-model",
|
||||||
|
input="Hello world",
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Image Generation
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.image_generation(
|
||||||
|
model="litellm_proxy/dall-e-3",
|
||||||
|
prompt="A beautiful sunset over mountains",
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Audio Transcription
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.transcription(
|
||||||
|
model="litellm_proxy/whisper-1",
|
||||||
|
file="your-audio-file",
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Text to Speech
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.speech(
|
||||||
|
model="litellm_proxy/tts-1",
|
||||||
|
input="Hello world",
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rerank
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
response = litellm.rerank(
|
||||||
|
model="litellm_proxy/rerank-english-v2.0",
|
||||||
|
query="What is machine learning?",
|
||||||
|
documents=[
|
||||||
|
"Machine learning is a field of study in artificial intelligence",
|
||||||
|
"Biology is the study of living organisms"
|
||||||
|
],
|
||||||
|
api_base="your-litellm-proxy-url",
|
||||||
|
api_key="your-litellm-proxy-api-key"
|
||||||
|
)
|
||||||
|
```
|
||||||
## **Usage with Langchain, LLamaindex, OpenAI Js, Anthropic SDK, Instructor**
|
## **Usage with Langchain, LLamaindex, OpenAI Js, Anthropic SDK, Instructor**
|
||||||
|
|
||||||
#### [Follow this doc to see how to use litellm proxy with langchain, llamaindex, anthropic etc](../proxy/user_keys)
|
#### [Follow this doc to see how to use litellm proxy with langchain, llamaindex, anthropic etc](../proxy/user_keys)
|
|
@ -64,71 +64,7 @@ All models listed here https://docs.perplexity.ai/docs/model-cards are supported
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Return citations
|
|
||||||
|
|
||||||
Perplexity supports returning citations via `return_citations=True`. [Perplexity Docs](https://docs.perplexity.ai/reference/post_chat_completions). Note: Perplexity has this feature in **closed beta**, so you need them to grant you access to get citations from their API.
|
|
||||||
|
|
||||||
If perplexity returns citations, LiteLLM will pass it straight through.
|
|
||||||
|
|
||||||
:::info
|
:::info
|
||||||
|
|
||||||
For passing more provider-specific, [go here](../completion/provider_specific_params.md)
|
For more information about passing provider-specific parameters, [go here](../completion/provider_specific_params.md)
|
||||||
:::
|
:::
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
import os
|
|
||||||
|
|
||||||
os.environ['PERPLEXITYAI_API_KEY'] = ""
|
|
||||||
response = completion(
|
|
||||||
model="perplexity/mistral-7b-instruct",
|
|
||||||
messages=messages,
|
|
||||||
return_citations=True
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Add perplexity to config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
model_list:
|
|
||||||
- model_name: "perplexity-model"
|
|
||||||
litellm_params:
|
|
||||||
model: "llama-3.1-sonar-small-128k-online"
|
|
||||||
api_key: os.environ/PERPLEXITY_API_KEY
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Start proxy
|
|
||||||
|
|
||||||
```bash
|
|
||||||
litellm --config /path/to/config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -L -X POST 'http://0.0.0.0:4000/chat/completions' \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-H 'Authorization: Bearer sk-1234' \
|
|
||||||
-d '{
|
|
||||||
"model": "perplexity-model",
|
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Who won the world cup in 2022?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"return_citations": true
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
[**Call w/ OpenAI SDK, Langchain, Instructor, etc.**](../proxy/user_keys.md#chatcompletions)
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
|
@ -2,11 +2,11 @@ import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Sambanova
|
# Sambanova
|
||||||
https://community.sambanova.ai/t/create-chat-completion-api/
|
https://cloud.sambanova.ai/
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|
||||||
**We support ALL Sambanova models, just set `model=sambanova/<any-model-on-sambanova>` as a prefix when sending litellm requests. For the complete supported model list, visit https://sambanova.ai/technology/models **
|
**We support ALL Sambanova models, just set `model=sambanova/<any-model-on-sambanova>` as a prefix when sending litellm requests. For the complete supported model list, visit https://docs.sambanova.ai/cloud/docs/get-started/supported-models **
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
|
@ -27,12 +27,11 @@ response = completion(
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What do you know about sambanova.ai",
|
"content": "What do you know about sambanova.ai. Give your response in json format",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
response_format={ "type": "json_object" },
|
response_format={ "type": "json_object" },
|
||||||
seed=123,
|
|
||||||
stop=["\n\n"],
|
stop=["\n\n"],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
top_p=0.9,
|
top_p=0.9,
|
||||||
|
@ -54,13 +53,12 @@ response = completion(
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "What do you know about sambanova.ai",
|
"content": "What do you know about sambanova.ai. Give your response in json format",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
stream=True,
|
stream=True,
|
||||||
max_tokens=10,
|
max_tokens=10,
|
||||||
response_format={ "type": "json_object" },
|
response_format={ "type": "json_object" },
|
||||||
seed=123,
|
|
||||||
stop=["\n\n"],
|
stop=["\n\n"],
|
||||||
temperature=0.2,
|
temperature=0.2,
|
||||||
top_p=0.9,
|
top_p=0.9,
|
||||||
|
|
90
docs/my-website/docs/providers/snowflake.md
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
|
||||||
|
# Snowflake
|
||||||
|
| Property | Details |
|
||||||
|
|-------|-------|
|
||||||
|
| Description | The Snowflake Cortex LLM REST API lets you access the COMPLETE function via HTTP POST requests|
|
||||||
|
| Provider Route on LiteLLM | `snowflake/` |
|
||||||
|
| Link to Provider Doc | [Snowflake ↗](https://docs.snowflake.com/en/user-guide/snowflake-cortex/cortex-llm-rest-api) |
|
||||||
|
| Base URL | [https://{account-id}.snowflakecomputing.com/api/v2/cortex/inference:complete/](https://{account-id}.snowflakecomputing.com/api/v2/cortex/inference:complete) |
|
||||||
|
| Supported OpenAI Endpoints | `/chat/completions`, `/completions` |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Currently, Snowflake's REST API does not have an endpoint for `snowflake-arctic-embed` embedding models. If you want to use these embedding models with Litellm, you can call them through our Hugging Face provider.
|
||||||
|
|
||||||
|
Find the Arctic Embed models [here](https://huggingface.co/collections/Snowflake/arctic-embed-661fd57d50fab5fc314e4c18) on Hugging Face.
|
||||||
|
|
||||||
|
## Supported OpenAI Parameters
|
||||||
|
```
|
||||||
|
"temperature",
|
||||||
|
"max_tokens",
|
||||||
|
"top_p",
|
||||||
|
"response_format"
|
||||||
|
```
|
||||||
|
|
||||||
|
## API KEYS
|
||||||
|
|
||||||
|
Snowflake does have API keys. Instead, you access the Snowflake API with your JWT token and account identifier.
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
os.environ["SNOWFLAKE_JWT"] = "YOUR JWT"
|
||||||
|
os.environ["SNOWFLAKE_ACCOUNT_ID"] = "YOUR ACCOUNT IDENTIFIER"
|
||||||
|
```
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
## set ENV variables
|
||||||
|
os.environ["SNOWFLAKE_JWT"] = "YOUR JWT"
|
||||||
|
os.environ["SNOWFLAKE_ACCOUNT_ID"] = "YOUR ACCOUNT IDENTIFIER"
|
||||||
|
|
||||||
|
# Snowflake call
|
||||||
|
response = completion(
|
||||||
|
model="snowflake/mistral-7b",
|
||||||
|
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage with LiteLLM Proxy
|
||||||
|
|
||||||
|
#### 1. Required env variables
|
||||||
|
```bash
|
||||||
|
export SNOWFLAKE_JWT=""
|
||||||
|
export SNOWFLAKE_ACCOUNT_ID = ""
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Start the proxy~
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: mistral-7b
|
||||||
|
litellm_params:
|
||||||
|
model: snowflake/mistral-7b
|
||||||
|
api_key: YOUR_API_KEY
|
||||||
|
api_base: https://YOUR-ACCOUNT-ID.snowflakecomputing.com/api/v2/cortex/inference:complete
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Test it
|
||||||
|
```shell
|
||||||
|
curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data ' {
|
||||||
|
"model": "snowflake/mistral-7b",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hello, how are you?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
|
@ -405,13 +405,15 @@ If this was your initial VertexAI Grounding code,
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import vertexai
|
import vertexai
|
||||||
|
from vertexai.generative_models import GenerativeModel, GenerationConfig, Tool, grounding
|
||||||
|
|
||||||
|
|
||||||
vertexai.init(project=project_id, location="us-central1")
|
vertexai.init(project=project_id, location="us-central1")
|
||||||
|
|
||||||
model = GenerativeModel("gemini-1.5-flash-001")
|
model = GenerativeModel("gemini-1.5-flash-001")
|
||||||
|
|
||||||
# Use Google Search for grounding
|
# Use Google Search for grounding
|
||||||
tool = Tool.from_google_search_retrieval(grounding.GoogleSearchRetrieval(disable_attributon=False))
|
tool = Tool.from_google_search_retrieval(grounding.GoogleSearchRetrieval())
|
||||||
|
|
||||||
prompt = "When is the next total solar eclipse in US?"
|
prompt = "When is the next total solar eclipse in US?"
|
||||||
response = model.generate_content(
|
response = model.generate_content(
|
||||||
|
@ -852,6 +854,7 @@ litellm.vertex_location = "us-central1 # Your Location
|
||||||
| claude-3-5-sonnet@20240620 | `completion('vertex_ai/claude-3-5-sonnet@20240620', messages)` |
|
| claude-3-5-sonnet@20240620 | `completion('vertex_ai/claude-3-5-sonnet@20240620', messages)` |
|
||||||
| claude-3-sonnet@20240229 | `completion('vertex_ai/claude-3-sonnet@20240229', messages)` |
|
| claude-3-sonnet@20240229 | `completion('vertex_ai/claude-3-sonnet@20240229', messages)` |
|
||||||
| claude-3-haiku@20240307 | `completion('vertex_ai/claude-3-haiku@20240307', messages)` |
|
| claude-3-haiku@20240307 | `completion('vertex_ai/claude-3-haiku@20240307', messages)` |
|
||||||
|
| claude-3-7-sonnet@20250219 | `completion('vertex_ai/claude-3-7-sonnet@20250219', messages)` |
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
|
|
||||||
|
@ -926,6 +929,119 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Usage - `thinking` / `reasoning_content`
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="vertex_ai/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- model_name: claude-3-7-sonnet-20250219
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/claude-3-7-sonnet-20250219
|
||||||
|
vertex_ai_project: "my-test-project"
|
||||||
|
vertex_ai_location: "us-west-1"
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer <YOUR-LITELLM-KEY>" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet-20250219",
|
||||||
|
"messages": [{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```python
|
||||||
|
ModelResponse(
|
||||||
|
id='chatcmpl-c542d76d-f675-4e87-8e5f-05855f5d0f5e',
|
||||||
|
created=1740470510,
|
||||||
|
model='claude-3-7-sonnet-20250219',
|
||||||
|
object='chat.completion',
|
||||||
|
system_fingerprint=None,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason='stop',
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content="The capital of France is Paris.",
|
||||||
|
role='assistant',
|
||||||
|
tool_calls=None,
|
||||||
|
function_call=None,
|
||||||
|
provider_specific_fields={
|
||||||
|
'citations': None,
|
||||||
|
'thinking_blocks': [
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6...'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
thinking_blocks=[
|
||||||
|
{
|
||||||
|
'type': 'thinking',
|
||||||
|
'thinking': 'The capital of France is Paris. This is a very straightforward factual question.',
|
||||||
|
'signature': 'EuYBCkQYAiJAy6AGB...'
|
||||||
|
}
|
||||||
|
],
|
||||||
|
reasoning_content='The capital of France is Paris. This is a very straightforward factual question.'
|
||||||
|
)
|
||||||
|
],
|
||||||
|
usage=Usage(
|
||||||
|
completion_tokens=68,
|
||||||
|
prompt_tokens=42,
|
||||||
|
total_tokens=110,
|
||||||
|
completion_tokens_details=None,
|
||||||
|
prompt_tokens_details=PromptTokensDetailsWrapper(
|
||||||
|
audio_tokens=None,
|
||||||
|
cached_tokens=0,
|
||||||
|
text_tokens=None,
|
||||||
|
image_tokens=None
|
||||||
|
),
|
||||||
|
cache_creation_input_tokens=0,
|
||||||
|
cache_read_input_tokens=0
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Llama 3 API
|
## Llama 3 API
|
||||||
|
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|
@ -1572,6 +1688,14 @@ assert isinstance(
|
||||||
|
|
||||||
Pass any file supported by Vertex AI, through LiteLLM.
|
Pass any file supported by Vertex AI, through LiteLLM.
|
||||||
|
|
||||||
|
LiteLLM Supports the following image types passed in url
|
||||||
|
|
||||||
|
```
|
||||||
|
Images with Cloud Storage URIs - gs://cloud-samples-data/generative-ai/image/boats.jpeg
|
||||||
|
Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
||||||
|
Videos with Cloud Storage URIs - https://storage.googleapis.com/github-repo/img/gemini/multimodality_usecases_overview/pixel8.mp4
|
||||||
|
Base64 Encoded Local Images
|
||||||
|
```
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="sdk" label="SDK">
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
|
@ -157,6 +157,98 @@ curl -L -X POST 'http://0.0.0.0:4000/embeddings' \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
## Send Video URL to VLLM
|
||||||
|
|
||||||
|
Example Implementation from VLLM [here](https://github.com/vllm-project/vllm/pull/10020)
|
||||||
|
|
||||||
|
There are two ways to send a video url to VLLM:
|
||||||
|
|
||||||
|
1. Pass the video url directly
|
||||||
|
|
||||||
|
```
|
||||||
|
{"type": "video_url", "video_url": {"url": video_url}},
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Pass the video data as base64
|
||||||
|
|
||||||
|
```
|
||||||
|
{"type": "video_url", "video_url": {"url": f"data:video/mp4;base64,{video_data_base64}"}}
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="hosted_vllm/qwen", # pass the vllm model name
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": "Summarize the following video"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "video_url",
|
||||||
|
"video_url": {
|
||||||
|
"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
api_base="https://hosted-vllm-api.co")
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: my-model
|
||||||
|
litellm_params:
|
||||||
|
model: hosted_vllm/qwen # add hosted_vllm/ prefix to route as OpenAI provider
|
||||||
|
api_base: https://hosted-vllm-api.co # add api base for OpenAI compatible provider
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://0.0.0.0:4000/chat/completions \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model": "my-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content":
|
||||||
|
[
|
||||||
|
{"type": "text", "text": "Summarize the following video"},
|
||||||
|
{"type": "video_url", "video_url": {"url": "https://www.youtube.com/watch?v=dQw4w9WgXcQ"}}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## (Deprecated) for `vllm pip package`
|
## (Deprecated) for `vllm pip package`
|
||||||
### Using - `litellm.completion`
|
### Using - `litellm.completion`
|
||||||
|
|
||||||
|
|
|
@ -10,17 +10,13 @@ Role-based access control (RBAC) is based on Organizations, Teams and Internal U
|
||||||
|
|
||||||
## Roles
|
## Roles
|
||||||
|
|
||||||
**Admin Roles**
|
| Role Type | Role Name | Permissions |
|
||||||
- `proxy_admin`: admin over the platform
|
|-----------|-----------|-------------|
|
||||||
- `proxy_admin_viewer`: can login, view all keys, view all spend. **Cannot** create keys/delete keys/add new users
|
| **Admin** | `proxy_admin` | Admin over the platform |
|
||||||
|
| | `proxy_admin_viewer` | Can login, view all keys, view all spend. **Cannot** create keys/delete keys/add new users |
|
||||||
**Organization Roles**
|
| **Organization** | `org_admin` | Admin over the organization. Can create teams and users within their organization |
|
||||||
- `org_admin`: admin over the organization. Can create teams and users within their organization
|
| **Internal User** | `internal_user` | Can login, view/create/delete their own keys, view their spend. **Cannot** add new users |
|
||||||
|
| | `internal_user_viewer` | Can login, view their own keys, view their own spend. **Cannot** create/delete keys, add new users |
|
||||||
**Internal User Roles**
|
|
||||||
- `internal_user`: can login, view/create/delete their own keys, view their spend. **Cannot** add new users.
|
|
||||||
- `internal_user_viewer`: can login, view their own keys, view their own spend. **Cannot** create/delete keys, add new users.
|
|
||||||
|
|
||||||
|
|
||||||
## Onboarding Organizations
|
## Onboarding Organizations
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ import TabItem from '@theme/TabItem';
|
||||||
- Virtual Key Rate Limit
|
- Virtual Key Rate Limit
|
||||||
- User Rate Limit
|
- User Rate Limit
|
||||||
- Team Limit
|
- Team Limit
|
||||||
- The `_PROXY_track_cost_callback` updates spend / usage in the LiteLLM database. [Here is everything tracked in the DB per request](https://github.com/BerriAI/litellm/blob/ba41a72f92a9abf1d659a87ec880e8e319f87481/schema.prisma#L172)
|
- The `_ProxyDBLogger` updates spend / usage in the LiteLLM database. [Here is everything tracked in the DB per request](https://github.com/BerriAI/litellm/blob/ba41a72f92a9abf1d659a87ec880e8e319f87481/schema.prisma#L172)
|
||||||
|
|
||||||
## Frequently Asked Questions
|
## Frequently Asked Questions
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,6 @@ import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Caching
|
# Caching
|
||||||
Cache LLM Responses
|
|
||||||
|
|
||||||
:::note
|
:::note
|
||||||
|
|
||||||
|
@ -10,14 +9,19 @@ For OpenAI/Anthropic Prompt Caching, go [here](../completion/prompt_caching.md)
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
LiteLLM supports:
|
Cache LLM Responses. LiteLLM's caching system stores and reuses LLM responses to save costs and reduce latency. When you make the same request twice, the cached response is returned instead of calling the LLM API again.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Supported Caches
|
||||||
|
|
||||||
- In Memory Cache
|
- In Memory Cache
|
||||||
- Redis Cache
|
- Redis Cache
|
||||||
- Qdrant Semantic Cache
|
- Qdrant Semantic Cache
|
||||||
- Redis Semantic Cache
|
- Redis Semantic Cache
|
||||||
- s3 Bucket Cache
|
- s3 Bucket Cache
|
||||||
|
|
||||||
## Quick Start - Redis, s3 Cache, Semantic Cache
|
## Quick Start
|
||||||
<Tabs>
|
<Tabs>
|
||||||
|
|
||||||
<TabItem value="redis" label="redis cache">
|
<TabItem value="redis" label="redis cache">
|
||||||
|
@ -369,9 +373,9 @@ $ litellm --config /path/to/config.yaml
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Basic
|
||||||
## Using Caching - /chat/completions
|
|
||||||
|
|
||||||
<Tabs>
|
<Tabs>
|
||||||
<TabItem value="chat_completions" label="/chat/completions">
|
<TabItem value="chat_completions" label="/chat/completions">
|
||||||
|
@ -416,6 +420,239 @@ curl --location 'http://0.0.0.0:4000/embeddings' \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
### Dynamic Cache Controls
|
||||||
|
|
||||||
|
| Parameter | Type | Description |
|
||||||
|
|-----------|------|-------------|
|
||||||
|
| `ttl` | *Optional(int)* | Will cache the response for the user-defined amount of time (in seconds) |
|
||||||
|
| `s-maxage` | *Optional(int)* | Will only accept cached responses that are within user-defined range (in seconds) |
|
||||||
|
| `no-cache` | *Optional(bool)* | Will not store the response in cache. |
|
||||||
|
| `no-store` | *Optional(bool)* | Will not cache the response |
|
||||||
|
| `namespace` | *Optional(str)* | Will cache the response under a user-defined namespace |
|
||||||
|
|
||||||
|
Each cache parameter can be controlled on a per-request basis. Here are examples for each parameter:
|
||||||
|
|
||||||
|
### `ttl`
|
||||||
|
|
||||||
|
Set how long (in seconds) to cache a response.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="your-api-key",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
messages=[{"role": "user", "content": "Hello"}],
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
extra_body={
|
||||||
|
"cache": {
|
||||||
|
"ttl": 300 # Cache response for 5 minutes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"cache": {"ttl": 300},
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
### `s-maxage`
|
||||||
|
|
||||||
|
Only accept cached responses that are within the specified age (in seconds).
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="your-api-key",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
messages=[{"role": "user", "content": "Hello"}],
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
extra_body={
|
||||||
|
"cache": {
|
||||||
|
"s-maxage": 600 # Only use cache if less than 10 minutes old
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"cache": {"s-maxage": 600},
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
### `no-cache`
|
||||||
|
Force a fresh response, bypassing the cache.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="your-api-key",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
messages=[{"role": "user", "content": "Hello"}],
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
extra_body={
|
||||||
|
"cache": {
|
||||||
|
"no-cache": True # Skip cache check, get fresh response
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"cache": {"no-cache": true},
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
### `no-store`
|
||||||
|
|
||||||
|
Will not store the response in cache.
|
||||||
|
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="your-api-key",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
messages=[{"role": "user", "content": "Hello"}],
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
extra_body={
|
||||||
|
"cache": {
|
||||||
|
"no-store": True # Don't cache this response
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"cache": {"no-store": true},
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
### `namespace`
|
||||||
|
Store the response under a specific cache namespace.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="your-api-key",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
messages=[{"role": "user", "content": "Hello"}],
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
extra_body={
|
||||||
|
"cache": {
|
||||||
|
"namespace": "my-custom-namespace" # Store in custom namespace
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"cache": {"namespace": "my-custom-namespace"},
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Set cache for proxy, but not on the actual llm api call
|
## Set cache for proxy, but not on the actual llm api call
|
||||||
|
|
||||||
Use this if you just want to enable features like rate limiting, and loadbalancing across multiple instances.
|
Use this if you just want to enable features like rate limiting, and loadbalancing across multiple instances.
|
||||||
|
@ -501,253 +738,6 @@ litellm_settings:
|
||||||
# /chat/completions, /completions, /embeddings, /audio/transcriptions
|
# /chat/completions, /completions, /embeddings, /audio/transcriptions
|
||||||
```
|
```
|
||||||
|
|
||||||
### **Turn on / off caching per request. **
|
|
||||||
|
|
||||||
The proxy support 4 cache-controls:
|
|
||||||
|
|
||||||
- `ttl`: *Optional(int)* - Will cache the response for the user-defined amount of time (in seconds).
|
|
||||||
- `s-maxage`: *Optional(int)* Will only accept cached responses that are within user-defined range (in seconds).
|
|
||||||
- `no-cache`: *Optional(bool)* Will not return a cached response, but instead call the actual endpoint.
|
|
||||||
- `no-store`: *Optional(bool)* Will not cache the response.
|
|
||||||
|
|
||||||
[Let us know if you need more](https://github.com/BerriAI/litellm/issues/1218)
|
|
||||||
|
|
||||||
**Turn off caching**
|
|
||||||
|
|
||||||
Set `no-cache=True`, this will not return a cached response
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="openai" label="OpenAI Python SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
# This is the default and can be omitted
|
|
||||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Say this is a test",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
extra_body = { # OpenAI python accepts extra args in extra_body
|
|
||||||
cache: {
|
|
||||||
"no-cache": True # will not return a cached response
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="curl" label="curl">
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:4000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer sk-1234" \
|
|
||||||
-d '{
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"cache": {"no-cache": True},
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "Say this is a test"}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
**Turn on caching**
|
|
||||||
|
|
||||||
By default cache is always on
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="openai" label="OpenAI Python SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
# This is the default and can be omitted
|
|
||||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Say this is a test",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
model="gpt-3.5-turbo"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="curl on" label="curl">
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:4000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer sk-1234" \
|
|
||||||
-d '{
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "Say this is a test"}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
**Set `ttl`**
|
|
||||||
|
|
||||||
Set `ttl=600`, this will caches response for 10 minutes (600 seconds)
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="openai" label="OpenAI Python SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
# This is the default and can be omitted
|
|
||||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Say this is a test",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
extra_body = { # OpenAI python accepts extra args in extra_body
|
|
||||||
cache: {
|
|
||||||
"ttl": 600 # caches response for 10 minutes
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="curl on" label="curl">
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:4000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer sk-1234" \
|
|
||||||
-d '{
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"cache": {"ttl": 600},
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "Say this is a test"}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
**Set `s-maxage`**
|
|
||||||
|
|
||||||
Set `s-maxage`, this will only get responses cached within last 10 minutes
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="openai" label="OpenAI Python SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
# This is the default and can be omitted
|
|
||||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
|
||||||
base_url="http://0.0.0.0:4000"
|
|
||||||
)
|
|
||||||
|
|
||||||
chat_completion = client.chat.completions.create(
|
|
||||||
messages=[
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Say this is a test",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
model="gpt-3.5-turbo",
|
|
||||||
extra_body = { # OpenAI python accepts extra args in extra_body
|
|
||||||
cache: {
|
|
||||||
"s-maxage": 600 # only get responses cached within last 10 minutes
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
<TabItem value="curl on" label="curl">
|
|
||||||
|
|
||||||
```shell
|
|
||||||
curl http://localhost:4000/v1/chat/completions \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-H "Authorization: Bearer sk-1234" \
|
|
||||||
-d '{
|
|
||||||
"model": "gpt-3.5-turbo",
|
|
||||||
"cache": {"s-maxage": 600},
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "Say this is a test"}
|
|
||||||
]
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
### Turn on / off caching per Key.
|
|
||||||
|
|
||||||
1. Add cache params when creating a key [full list](#turn-on--off-caching-per-key)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST 'http://0.0.0.0:4000/key/generate' \
|
|
||||||
-H 'Authorization: Bearer sk-1234' \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-d '{
|
|
||||||
"user_id": "222",
|
|
||||||
"metadata": {
|
|
||||||
"cache": {
|
|
||||||
"no-cache": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST 'http://localhost:4000/chat/completions' \
|
|
||||||
-H 'Content-Type: application/json' \
|
|
||||||
-H 'Authorization: Bearer <YOUR_NEW_KEY>' \
|
|
||||||
-d '{"model": "gpt-3.5-turbo", "messages": [{"role": "user", "content": "bom dia"}]}'
|
|
||||||
```
|
|
||||||
|
|
||||||
### Deleting Cache Keys - `/cache/delete`
|
### Deleting Cache Keys - `/cache/delete`
|
||||||
In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete
|
In order to delete a cache key, send a request to `/cache/delete` with the `keys` you want to delete
|
||||||
|
|
||||||
|
|
|
@ -466,6 +466,9 @@ router_settings:
|
||||||
| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry
|
| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry
|
||||||
| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing
|
| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing
|
||||||
| PAGERDUTY_API_KEY | API key for PagerDuty Alerting
|
| PAGERDUTY_API_KEY | API key for PagerDuty Alerting
|
||||||
|
| PHOENIX_API_KEY | API key for Arize Phoenix
|
||||||
|
| PHOENIX_COLLECTOR_ENDPOINT | API endpoint for Arize Phoenix
|
||||||
|
| PHOENIX_COLLECTOR_HTTP_ENDPOINT | API http endpoint for Arize Phoenix
|
||||||
| POD_NAME | Pod name for the server, this will be [emitted to `datadog` logs](https://docs.litellm.ai/docs/proxy/logging#datadog) as `POD_NAME`
|
| POD_NAME | Pod name for the server, this will be [emitted to `datadog` logs](https://docs.litellm.ai/docs/proxy/logging#datadog) as `POD_NAME`
|
||||||
| PREDIBASE_API_BASE | Base URL for Predibase API
|
| PREDIBASE_API_BASE | Base URL for Predibase API
|
||||||
| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service
|
| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service
|
||||||
|
@ -488,14 +491,15 @@ router_settings:
|
||||||
| SLACK_DAILY_REPORT_FREQUENCY | Frequency of daily Slack reports (e.g., daily, weekly)
|
| SLACK_DAILY_REPORT_FREQUENCY | Frequency of daily Slack reports (e.g., daily, weekly)
|
||||||
| SLACK_WEBHOOK_URL | Webhook URL for Slack integration
|
| SLACK_WEBHOOK_URL | Webhook URL for Slack integration
|
||||||
| SMTP_HOST | Hostname for the SMTP server
|
| SMTP_HOST | Hostname for the SMTP server
|
||||||
| SMTP_PASSWORD | Password for SMTP authentication
|
| SMTP_PASSWORD | Password for SMTP authentication (do not set if SMTP does not require auth)
|
||||||
| SMTP_PORT | Port number for SMTP server
|
| SMTP_PORT | Port number for SMTP server
|
||||||
| SMTP_SENDER_EMAIL | Email address used as the sender in SMTP transactions
|
| SMTP_SENDER_EMAIL | Email address used as the sender in SMTP transactions
|
||||||
| SMTP_SENDER_LOGO | Logo used in emails sent via SMTP
|
| SMTP_SENDER_LOGO | Logo used in emails sent via SMTP
|
||||||
| SMTP_TLS | Flag to enable or disable TLS for SMTP connections
|
| SMTP_TLS | Flag to enable or disable TLS for SMTP connections
|
||||||
| SMTP_USERNAME | Username for SMTP authentication
|
| SMTP_USERNAME | Username for SMTP authentication (do not set if SMTP does not require auth)
|
||||||
| SPEND_LOGS_URL | URL for retrieving spend logs
|
| SPEND_LOGS_URL | URL for retrieving spend logs
|
||||||
| SSL_CERTIFICATE | Path to the SSL certificate file
|
| SSL_CERTIFICATE | Path to the SSL certificate file
|
||||||
|
| SSL_SECURITY_LEVEL | [BETA] Security level for SSL/TLS connections. E.g. `DEFAULT@SECLEVEL=1`
|
||||||
| SSL_VERIFY | Flag to enable or disable SSL certificate verification
|
| SSL_VERIFY | Flag to enable or disable SSL certificate verification
|
||||||
| SUPABASE_KEY | API key for Supabase service
|
| SUPABASE_KEY | API key for Supabase service
|
||||||
| SUPABASE_URL | Base URL for Supabase instance
|
| SUPABASE_URL | Base URL for Supabase instance
|
||||||
|
|
|
@ -448,6 +448,34 @@ model_list:
|
||||||
|
|
||||||
s/o to [@David Manouchehri](https://www.linkedin.com/in/davidmanouchehri/) for helping with this.
|
s/o to [@David Manouchehri](https://www.linkedin.com/in/davidmanouchehri/) for helping with this.
|
||||||
|
|
||||||
|
### Centralized Credential Management
|
||||||
|
|
||||||
|
Define credentials once and reuse them across multiple models. This helps with:
|
||||||
|
- Secret rotation
|
||||||
|
- Reducing config duplication
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: azure/gpt-4o
|
||||||
|
litellm_credential_name: default_azure_credential # Reference credential below
|
||||||
|
|
||||||
|
credential_list:
|
||||||
|
- credential_name: default_azure_credential
|
||||||
|
credential_values:
|
||||||
|
api_key: os.environ/AZURE_API_KEY # Load from environment
|
||||||
|
api_base: os.environ/AZURE_API_BASE
|
||||||
|
api_version: "2023-05-15"
|
||||||
|
credential_info:
|
||||||
|
description: "Production credentials for EU region"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Key Parameters
|
||||||
|
- `credential_name`: Unique identifier for the credential set
|
||||||
|
- `credential_values`: Key-value pairs of credentials/secrets (supports `os.environ/` syntax)
|
||||||
|
- `credential_info`: Key-value pairs of user provided credentials information. No key-value pairs are required, but the dictionary must exist.
|
||||||
|
|
||||||
### Load API Keys from Secret Managers (Azure Vault, etc)
|
### Load API Keys from Secret Managers (Azure Vault, etc)
|
||||||
|
|
||||||
[**Using Secret Managers with LiteLLM Proxy**](../secret)
|
[**Using Secret Managers with LiteLLM Proxy**](../secret)
|
||||||
|
|
|
@ -46,18 +46,17 @@ You can see the full DB Schema [here](https://github.com/BerriAI/litellm/blob/ma
|
||||||
|
|
||||||
| Table Name | Description | Row Insert Frequency |
|
| Table Name | Description | Row Insert Frequency |
|
||||||
|------------|-------------|---------------------|
|
|------------|-------------|---------------------|
|
||||||
| LiteLLM_SpendLogs | Detailed logs of all API requests. Records token usage, spend, and timing information. Tracks which models and keys were used. | **High - every LLM API request** |
|
| LiteLLM_SpendLogs | Detailed logs of all API requests. Records token usage, spend, and timing information. Tracks which models and keys were used. | **High - every LLM API request - Success or Failure** |
|
||||||
| LiteLLM_ErrorLogs | Captures failed requests and errors. Stores exception details and request information. Helps with debugging and monitoring. | **Medium - on errors only** |
|
|
||||||
| LiteLLM_AuditLog | Tracks changes to system configuration. Records who made changes and what was modified. Maintains history of updates to teams, users, and models. | **Off by default**, **High - when enabled** |
|
| LiteLLM_AuditLog | Tracks changes to system configuration. Records who made changes and what was modified. Maintains history of updates to teams, users, and models. | **Off by default**, **High - when enabled** |
|
||||||
|
|
||||||
## Disable `LiteLLM_SpendLogs` & `LiteLLM_ErrorLogs`
|
## Disable `LiteLLM_SpendLogs`
|
||||||
|
|
||||||
You can disable spend_logs and error_logs by setting `disable_spend_logs` and `disable_error_logs` to `True` on the `general_settings` section of your proxy_config.yaml file.
|
You can disable spend_logs and error_logs by setting `disable_spend_logs` and `disable_error_logs` to `True` on the `general_settings` section of your proxy_config.yaml file.
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
general_settings:
|
general_settings:
|
||||||
disable_spend_logs: True # Disable writing spend logs to DB
|
disable_spend_logs: True # Disable writing spend logs to DB
|
||||||
disable_error_logs: True # Disable writing error logs to DB
|
disable_error_logs: True # Only disable writing error logs to DB, regular spend logs will still be written unless `disable_spend_logs: True`
|
||||||
```
|
```
|
||||||
|
|
||||||
### What is the impact of disabling these logs?
|
### What is the impact of disabling these logs?
|
||||||
|
|
|
@ -37,7 +37,7 @@ guardrails:
|
||||||
- guardrail_name: aim-protected-app
|
- guardrail_name: aim-protected-app
|
||||||
litellm_params:
|
litellm_params:
|
||||||
guardrail: aim
|
guardrail: aim
|
||||||
mode: pre_call # 'during_call' is also available
|
mode: [pre_call, post_call] # "During_call" is also available
|
||||||
api_key: os.environ/AIM_API_KEY
|
api_key: os.environ/AIM_API_KEY
|
||||||
api_base: os.environ/AIM_API_BASE # Optional, use only when using a self-hosted Aim Outpost
|
api_base: os.environ/AIM_API_BASE # Optional, use only when using a self-hosted Aim Outpost
|
||||||
```
|
```
|
||||||
|
|
|
@ -78,6 +78,7 @@ Inherits from `StandardLoggingUserAPIKeyMetadata` and adds:
|
||||||
| `api_base` | `Optional[str]` | Optional API base URL |
|
| `api_base` | `Optional[str]` | Optional API base URL |
|
||||||
| `response_cost` | `Optional[str]` | Optional response cost |
|
| `response_cost` | `Optional[str]` | Optional response cost |
|
||||||
| `additional_headers` | `Optional[StandardLoggingAdditionalHeaders]` | Additional headers |
|
| `additional_headers` | `Optional[StandardLoggingAdditionalHeaders]` | Additional headers |
|
||||||
|
| `batch_models` | `Optional[List[str]]` | Only set for Batches API. Lists the models used for cost calculation |
|
||||||
|
|
||||||
## StandardLoggingModelInformation
|
## StandardLoggingModelInformation
|
||||||
|
|
||||||
|
|
53
docs/my-website/docs/proxy/master_key_rotations.md
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# Rotating Master Key
|
||||||
|
|
||||||
|
Here are our recommended steps for rotating your master key.
|
||||||
|
|
||||||
|
|
||||||
|
**1. Backup your DB**
|
||||||
|
In case of any errors during the encryption/de-encryption process, this will allow you to revert back to current state without issues.
|
||||||
|
|
||||||
|
**2. Call `/key/regenerate` with the new master key**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://localhost:4000/key/regenerate' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-d '{
|
||||||
|
"key": "sk-1234",
|
||||||
|
"new_master_key": "sk-PIp1h0RekR"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
This will re-encrypt any models in your Proxy_ModelTable with the new master key.
|
||||||
|
|
||||||
|
Expect to start seeing decryption errors in logs, as your old master key is no longer able to decrypt the new values.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
raise Exception("Unable to decrypt value={}".format(v))
|
||||||
|
Exception: Unable to decrypt value=<new-encrypted-value>
|
||||||
|
```
|
||||||
|
|
||||||
|
**3. Update LITELLM_MASTER_KEY**
|
||||||
|
|
||||||
|
In your environment variables update the value of LITELLM_MASTER_KEY to the new_master_key from Step 2.
|
||||||
|
|
||||||
|
This ensures the key used for decryption from db is the new key.
|
||||||
|
|
||||||
|
**4. Test it**
|
||||||
|
|
||||||
|
Make a test request to a model stored on proxy with a litellm key (new master key or virtual key) and see if it works
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4o-mini", # 👈 REPLACE with 'public model name' for any db-model
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"content": "Hey, how's it going",
|
||||||
|
"role": "user"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}'
|
||||||
|
```
|
|
@ -107,9 +107,9 @@ general_settings:
|
||||||
|
|
||||||
By default, LiteLLM writes several types of logs to the database:
|
By default, LiteLLM writes several types of logs to the database:
|
||||||
- Every LLM API request to the `LiteLLM_SpendLogs` table
|
- Every LLM API request to the `LiteLLM_SpendLogs` table
|
||||||
- LLM Exceptions to the `LiteLLM_LogsErrors` table
|
- LLM Exceptions to the `LiteLLM_SpendLogs` table
|
||||||
|
|
||||||
If you're not viewing these logs on the LiteLLM UI (most users use Prometheus for monitoring), you can disable them by setting the following flags to `True`:
|
If you're not viewing these logs on the LiteLLM UI, you can disable them by setting the following flags to `True`:
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
general_settings:
|
general_settings:
|
||||||
|
|
12
docs/my-website/docs/proxy/release_cycle.md
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
# Release Cycle
|
||||||
|
|
||||||
|
Litellm Proxy has the following release cycle:
|
||||||
|
|
||||||
|
- `v1.x.x-nightly`: These are releases which pass ci/cd.
|
||||||
|
- `v1.x.x.rc`: These are releases which pass ci/cd + [manual review](https://github.com/BerriAI/litellm/discussions/8495#discussioncomment-12180711).
|
||||||
|
- `v1.x.x` OR `v1.x.x-stable`: These are releases which pass ci/cd + manual review + 3 days of production testing.
|
||||||
|
|
||||||
|
In production, we recommend using the latest `v1.x.x` release.
|
||||||
|
|
||||||
|
|
||||||
|
Follow our release notes [here](https://github.com/BerriAI/litellm/releases).
|
|
@ -8,7 +8,16 @@ Special headers that are supported by LiteLLM.
|
||||||
|
|
||||||
`x-litellm-enable-message-redaction`: Optional[bool]: Don't log the message content to logging integrations. Just track spend. [Learn More](./logging#redact-messages-response-content)
|
`x-litellm-enable-message-redaction`: Optional[bool]: Don't log the message content to logging integrations. Just track spend. [Learn More](./logging#redact-messages-response-content)
|
||||||
|
|
||||||
|
`x-litellm-tags`: Optional[str]: A comma separated list (e.g. `tag1,tag2,tag3`) of tags to use for [tag-based routing](./tag_routing) **OR** [spend-tracking](./enterprise.md#tracking-spend-for-custom-tags).
|
||||||
|
|
||||||
## Anthropic Headers
|
## Anthropic Headers
|
||||||
|
|
||||||
`anthropic-version` Optional[str]: The version of the Anthropic API to use.
|
`anthropic-version` Optional[str]: The version of the Anthropic API to use.
|
||||||
`anthropic-beta` Optional[str]: The beta version of the Anthropic API to use.
|
`anthropic-beta` Optional[str]: The beta version of the Anthropic API to use.
|
||||||
|
|
||||||
|
## OpenAI Headers
|
||||||
|
|
||||||
|
`openai-organization` Optional[str]: The organization to use for the OpenAI API. (currently needs to be enabled via `general_settings::forward_openai_org_id: true`)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,17 +1,20 @@
|
||||||
# Rate Limit Headers
|
# Response Headers
|
||||||
|
|
||||||
When you make a request to the proxy, the proxy will return the following [OpenAI-compatible headers](https://platform.openai.com/docs/guides/rate-limits/rate-limits-in-headers):
|
When you make a request to the proxy, the proxy will return the following headers:
|
||||||
|
|
||||||
- `x-ratelimit-remaining-requests` - Optional[int]: The remaining number of requests that are permitted before exhausting the rate limit.
|
## Rate Limit Headers
|
||||||
- `x-ratelimit-remaining-tokens` - Optional[int]: The remaining number of tokens that are permitted before exhausting the rate limit.
|
[OpenAI-compatible headers](https://platform.openai.com/docs/guides/rate-limits/rate-limits-in-headers):
|
||||||
- `x-ratelimit-limit-requests` - Optional[int]: The maximum number of requests that are permitted before exhausting the rate limit.
|
|
||||||
- `x-ratelimit-limit-tokens` - Optional[int]: The maximum number of tokens that are permitted before exhausting the rate limit.
|
|
||||||
- `x-ratelimit-reset-requests` - Optional[int]: The time at which the rate limit will reset.
|
|
||||||
- `x-ratelimit-reset-tokens` - Optional[int]: The time at which the rate limit will reset.
|
|
||||||
|
|
||||||
These headers are useful for clients to understand the current rate limit status and adjust their request rate accordingly.
|
| Header | Type | Description |
|
||||||
|
|--------|------|-------------|
|
||||||
|
| `x-ratelimit-remaining-requests` | Optional[int] | The remaining number of requests that are permitted before exhausting the rate limit |
|
||||||
|
| `x-ratelimit-remaining-tokens` | Optional[int] | The remaining number of tokens that are permitted before exhausting the rate limit |
|
||||||
|
| `x-ratelimit-limit-requests` | Optional[int] | The maximum number of requests that are permitted before exhausting the rate limit |
|
||||||
|
| `x-ratelimit-limit-tokens` | Optional[int] | The maximum number of tokens that are permitted before exhausting the rate limit |
|
||||||
|
| `x-ratelimit-reset-requests` | Optional[int] | The time at which the rate limit will reset |
|
||||||
|
| `x-ratelimit-reset-tokens` | Optional[int] | The time at which the rate limit will reset |
|
||||||
|
|
||||||
## How are these headers calculated?
|
### How Rate Limit Headers work
|
||||||
|
|
||||||
**If key has rate limits set**
|
**If key has rate limits set**
|
||||||
|
|
||||||
|
@ -19,6 +22,50 @@ The proxy will return the [remaining rate limits for that key](https://github.co
|
||||||
|
|
||||||
**If key does not have rate limits set**
|
**If key does not have rate limits set**
|
||||||
|
|
||||||
The proxy returns the remaining requests/tokens returned by the backend provider.
|
The proxy returns the remaining requests/tokens returned by the backend provider. (LiteLLM will standardize the backend provider's response headers to match the OpenAI format)
|
||||||
|
|
||||||
If the backend provider does not return these headers, the value will be `None`.
|
If the backend provider does not return these headers, the value will be `None`.
|
||||||
|
|
||||||
|
These headers are useful for clients to understand the current rate limit status and adjust their request rate accordingly.
|
||||||
|
|
||||||
|
|
||||||
|
## Latency Headers
|
||||||
|
| Header | Type | Description |
|
||||||
|
|--------|------|-------------|
|
||||||
|
| `x-litellm-response-duration-ms` | float | Total duration of the API response in milliseconds |
|
||||||
|
| `x-litellm-overhead-duration-ms` | float | LiteLLM processing overhead in milliseconds |
|
||||||
|
|
||||||
|
## Retry, Fallback Headers
|
||||||
|
| Header | Type | Description |
|
||||||
|
|--------|------|-------------|
|
||||||
|
| `x-litellm-attempted-retries` | int | Number of retry attempts made |
|
||||||
|
| `x-litellm-attempted-fallbacks` | int | Number of fallback attempts made |
|
||||||
|
| `x-litellm-max-fallbacks` | int | Maximum number of fallback attempts allowed |
|
||||||
|
|
||||||
|
## Cost Tracking Headers
|
||||||
|
| Header | Type | Description |
|
||||||
|
|--------|------|-------------|
|
||||||
|
| `x-litellm-response-cost` | float | Cost of the API call |
|
||||||
|
| `x-litellm-key-spend` | float | Total spend for the API key |
|
||||||
|
|
||||||
|
## LiteLLM Specific Headers
|
||||||
|
| Header | Type | Description |
|
||||||
|
|--------|------|-------------|
|
||||||
|
| `x-litellm-call-id` | string | Unique identifier for the API call |
|
||||||
|
| `x-litellm-model-id` | string | Unique identifier for the model used |
|
||||||
|
| `x-litellm-model-api-base` | string | Base URL of the API endpoint |
|
||||||
|
| `x-litellm-version` | string | Version of LiteLLM being used |
|
||||||
|
| `x-litellm-model-group` | string | Model group identifier |
|
||||||
|
|
||||||
|
## Response headers from LLM providers
|
||||||
|
|
||||||
|
LiteLLM also returns the original response headers from the LLM provider. These headers are prefixed with `llm_provider-` to distinguish them from LiteLLM's headers.
|
||||||
|
|
||||||
|
Example response headers:
|
||||||
|
```
|
||||||
|
llm_provider-openai-processing-ms: 256
|
||||||
|
llm_provider-openai-version: 2020-10-01
|
||||||
|
llm_provider-x-ratelimit-limit-requests: 30000
|
||||||
|
llm_provider-x-ratelimit-limit-tokens: 150000000
|
||||||
|
```
|
||||||
|
|
||||||
|
|
|
@ -143,6 +143,26 @@ Response
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Calling via Request Header
|
||||||
|
|
||||||
|
You can also call via request header `x-litellm-tags`
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer sk-1234' \
|
||||||
|
-H 'x-litellm-tags: free,my-custom-tag' \
|
||||||
|
-d '{
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey, how'\''s it going 123456?"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
## Setting Default Tags
|
## Setting Default Tags
|
||||||
|
|
||||||
Use this if you want all untagged requests to be routed to specific deployments
|
Use this if you want all untagged requests to be routed to specific deployments
|
||||||
|
|
|
@ -102,7 +102,19 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Advanced - Set Accepted JWT Scope Names
|
## Advanced
|
||||||
|
|
||||||
|
### Multiple OIDC providers
|
||||||
|
|
||||||
|
Use this if you want LiteLLM to validate your JWT against multiple OIDC providers (e.g. Google Cloud, GitHub Auth)
|
||||||
|
|
||||||
|
Set `JWT_PUBLIC_KEY_URL` in your environment to a comma-separated list of URLs for your OIDC providers.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export JWT_PUBLIC_KEY_URL="https://demo.duendesoftware.com/.well-known/openid-configuration/jwks,https://accounts.google.com/.well-known/openid-configuration/jwks"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Set Accepted JWT Scope Names
|
||||||
|
|
||||||
Change the string in JWT 'scopes', that litellm evaluates to see if a user has admin access.
|
Change the string in JWT 'scopes', that litellm evaluates to see if a user has admin access.
|
||||||
|
|
||||||
|
@ -114,7 +126,7 @@ general_settings:
|
||||||
admin_jwt_scope: "litellm-proxy-admin"
|
admin_jwt_scope: "litellm-proxy-admin"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Tracking End-Users / Internal Users / Team / Org
|
### Tracking End-Users / Internal Users / Team / Org
|
||||||
|
|
||||||
Set the field in the jwt token, which corresponds to a litellm user / team / org.
|
Set the field in the jwt token, which corresponds to a litellm user / team / org.
|
||||||
|
|
||||||
|
@ -156,7 +168,7 @@ scope: ["litellm-proxy-admin",...]
|
||||||
scope: "litellm-proxy-admin ..."
|
scope: "litellm-proxy-admin ..."
|
||||||
```
|
```
|
||||||
|
|
||||||
## Control model access with Teams
|
### Control model access with Teams
|
||||||
|
|
||||||
|
|
||||||
1. Specify the JWT field that contains the team ids, that the user belongs to.
|
1. Specify the JWT field that contains the team ids, that the user belongs to.
|
||||||
|
@ -207,7 +219,65 @@ OIDC Auth for API: [**See Walkthrough**](https://www.loom.com/share/00fe2deab59a
|
||||||
- If all checks pass, allow the request
|
- If all checks pass, allow the request
|
||||||
|
|
||||||
|
|
||||||
## Advanced - Allowed Routes
|
### Custom JWT Validate
|
||||||
|
|
||||||
|
Validate a JWT Token using custom logic, if you need an extra way to verify if tokens are valid for LiteLLM Proxy.
|
||||||
|
|
||||||
|
#### 1. Setup custom validate function
|
||||||
|
|
||||||
|
```python
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
def my_custom_validate(token: str) -> Literal[True]:
|
||||||
|
"""
|
||||||
|
Only allow tokens with tenant-id == "my-unique-tenant", and claims == ["proxy-admin"]
|
||||||
|
"""
|
||||||
|
allowed_tenants = ["my-unique-tenant"]
|
||||||
|
allowed_claims = ["proxy-admin"]
|
||||||
|
|
||||||
|
if token["tenant_id"] not in allowed_tenants:
|
||||||
|
raise Exception("Invalid JWT token")
|
||||||
|
if token["claims"] not in allowed_claims:
|
||||||
|
raise Exception("Invalid JWT token")
|
||||||
|
return True
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 2. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
enable_jwt_auth: True
|
||||||
|
litellm_jwtauth:
|
||||||
|
user_id_jwt_field: "sub"
|
||||||
|
team_id_jwt_field: "tenant_id"
|
||||||
|
user_id_upsert: True
|
||||||
|
custom_validate: custom_validate.my_custom_validate # 👈 custom validate function
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 3. Test the flow
|
||||||
|
|
||||||
|
**Expected JWT**
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"sub": "my-unique-user",
|
||||||
|
"tenant_id": "INVALID_TENANT",
|
||||||
|
"claims": ["proxy-admin"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"error": "Invalid JWT token"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Allowed Routes
|
||||||
|
|
||||||
Configure which routes a JWT can access via the config.
|
Configure which routes a JWT can access via the config.
|
||||||
|
|
||||||
|
@ -239,7 +309,7 @@ general_settings:
|
||||||
team_allowed_routes: ["/v1/chat/completions"] # 👈 Set accepted routes
|
team_allowed_routes: ["/v1/chat/completions"] # 👈 Set accepted routes
|
||||||
```
|
```
|
||||||
|
|
||||||
## Advanced - Caching Public Keys
|
### Caching Public Keys
|
||||||
|
|
||||||
Control how long public keys are cached for (in seconds).
|
Control how long public keys are cached for (in seconds).
|
||||||
|
|
||||||
|
@ -253,7 +323,7 @@ general_settings:
|
||||||
public_key_ttl: 600 # 👈 KEY CHANGE
|
public_key_ttl: 600 # 👈 KEY CHANGE
|
||||||
```
|
```
|
||||||
|
|
||||||
## Advanced - Custom JWT Field
|
### Custom JWT Field
|
||||||
|
|
||||||
Set a custom field in which the team_id exists. By default, the 'client_id' field is checked.
|
Set a custom field in which the team_id exists. By default, the 'client_id' field is checked.
|
||||||
|
|
||||||
|
@ -265,14 +335,7 @@ general_settings:
|
||||||
team_id_jwt_field: "client_id" # 👈 KEY CHANGE
|
team_id_jwt_field: "client_id" # 👈 KEY CHANGE
|
||||||
```
|
```
|
||||||
|
|
||||||
## All Params
|
### Block Teams
|
||||||
|
|
||||||
[**See Code**](https://github.com/BerriAI/litellm/blob/b204f0c01c703317d812a1553363ab0cb989d5b6/litellm/proxy/_types.py#L95)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Advanced - Block Teams
|
|
||||||
|
|
||||||
To block all requests for a certain team id, use `/team/block`
|
To block all requests for a certain team id, use `/team/block`
|
||||||
|
|
||||||
|
@ -299,7 +362,7 @@ curl --location 'http://0.0.0.0:4000/team/unblock' \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Advanced - Upsert Users + Allowed Email Domains
|
### Upsert Users + Allowed Email Domains
|
||||||
|
|
||||||
Allow users who belong to a specific email domain, automatic access to the proxy.
|
Allow users who belong to a specific email domain, automatic access to the proxy.
|
||||||
|
|
||||||
|
@ -354,7 +417,7 @@ environment_variables:
|
||||||
|
|
||||||
### Example Token
|
### Example Token
|
||||||
|
|
||||||
```
|
```bash
|
||||||
{
|
{
|
||||||
"aud": "api://LiteLLM_Proxy",
|
"aud": "api://LiteLLM_Proxy",
|
||||||
"oid": "eec236bd-0135-4b28-9354-8fc4032d543e",
|
"oid": "eec236bd-0135-4b28-9354-8fc4032d543e",
|
||||||
|
@ -415,9 +478,9 @@ general_settings:
|
||||||
|
|
||||||
Expected Token:
|
Expected Token:
|
||||||
|
|
||||||
```
|
```bash
|
||||||
{
|
{
|
||||||
"scope": ["litellm.api.consumer", "litellm.api.gpt_3_5_turbo"]
|
"scope": ["litellm.api.consumer", "litellm.api.gpt_3_5_turbo"] # can be a list or a space-separated string
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -437,3 +500,9 @@ curl -L -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
|
||||||
]
|
]
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## All JWT Params
|
||||||
|
|
||||||
|
[**See Code**](https://github.com/BerriAI/litellm/blob/b204f0c01c703317d812a1553363ab0cb989d5b6/litellm/proxy/_types.py#L95)
|
||||||
|
|
||||||
|
|
||||||
|
|
55
docs/my-website/docs/proxy/ui_credentials.md
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# Adding LLM Credentials
|
||||||
|
|
||||||
|
You can add LLM provider credentials on the UI. Once you add credentials you can re-use them when adding new models
|
||||||
|
|
||||||
|
## Add a credential + model
|
||||||
|
|
||||||
|
### 1. Navigate to LLM Credentials page
|
||||||
|
|
||||||
|
Go to Models -> LLM Credentials -> Add Credential
|
||||||
|
|
||||||
|
<Image img={require('../../img/ui_cred_add.png')} />
|
||||||
|
|
||||||
|
### 2. Add credentials
|
||||||
|
|
||||||
|
Select your LLM provider, enter your API Key and click "Add Credential"
|
||||||
|
|
||||||
|
**Note: Credentials are based on the provider, if you select Vertex AI then you will see `Vertex Project`, `Vertex Location` and `Vertex Credentials` fields**
|
||||||
|
|
||||||
|
<Image img={require('../../img/ui_add_cred_2.png')} />
|
||||||
|
|
||||||
|
|
||||||
|
### 3. Use credentials when adding a model
|
||||||
|
|
||||||
|
Go to Add Model -> Existing Credentials -> Select your credential in the dropdown
|
||||||
|
|
||||||
|
<Image img={require('../../img/ui_cred_3.png')} />
|
||||||
|
|
||||||
|
|
||||||
|
## Create a Credential from an existing model
|
||||||
|
|
||||||
|
Use this if you have already created a model and want to store the model credentials for future use
|
||||||
|
|
||||||
|
### 1. Select model to create a credential from
|
||||||
|
|
||||||
|
Go to Models -> Select your model -> Credential -> Create Credential
|
||||||
|
|
||||||
|
<Image img={require('../../img/ui_cred_4.png')} />
|
||||||
|
|
||||||
|
### 2. Use new credential when adding a model
|
||||||
|
|
||||||
|
Go to Add Model -> Existing Credentials -> Select your credential in the dropdown
|
||||||
|
|
||||||
|
<Image img={require('../../img/use_model_cred.png')} />
|
||||||
|
|
||||||
|
## Frequently Asked Questions
|
||||||
|
|
||||||
|
|
||||||
|
How are credentials stored?
|
||||||
|
Credentials in the DB are encrypted/decrypted using `LITELLM_SALT_KEY`, if set. If not, then they are encrypted using `LITELLM_MASTER_KEY`. These keys should be kept secret and not shared with others.
|
||||||
|
|
||||||
|
|
55
docs/my-website/docs/proxy/ui_logs.md
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# UI Logs Page
|
||||||
|
|
||||||
|
View Spend, Token Usage, Key, Team Name for Each Request to LiteLLM
|
||||||
|
|
||||||
|
|
||||||
|
<Image img={require('../../img/ui_request_logs.png')}/>
|
||||||
|
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
| Log Type | Tracked by Default |
|
||||||
|
|----------|-------------------|
|
||||||
|
| Success Logs | ✅ Yes |
|
||||||
|
| Error Logs | ✅ Yes |
|
||||||
|
| Request/Response Content Stored | ❌ No by Default, **opt in with `store_prompts_in_spend_logs`** |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
**By default LiteLLM does not track the request and response content.**
|
||||||
|
|
||||||
|
## Tracking - Request / Response Content in Logs Page
|
||||||
|
|
||||||
|
If you want to view request and response content on LiteLLM Logs, you need to opt in with this setting
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
store_prompts_in_spend_logs: true
|
||||||
|
```
|
||||||
|
|
||||||
|
<Image img={require('../../img/ui_request_logs_content.png')}/>
|
||||||
|
|
||||||
|
|
||||||
|
## Stop storing Error Logs in DB
|
||||||
|
|
||||||
|
If you do not want to store error logs in DB, you can opt out with this setting
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
disable_error_logs: True # Only disable writing error logs to DB, regular spend logs will still be written unless `disable_spend_logs: True`
|
||||||
|
```
|
||||||
|
|
||||||
|
## Stop storing Spend Logs in DB
|
||||||
|
|
||||||
|
If you do not want to store spend logs in DB, you can opt out with this setting
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
disable_spend_logs: True # Disable writing spend logs to DB
|
||||||
|
```
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
import Image from '@theme/IdealImage';
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
|
||||||
# User Management Heirarchy
|
# User Management Hierarchy
|
||||||
|
|
||||||
<Image img={require('../../img/litellm_user_heirarchy.png')} style={{ width: '100%', maxWidth: '4000px' }} />
|
<Image img={require('../../img/litellm_user_heirarchy.png')} style={{ width: '100%', maxWidth: '4000px' }} />
|
||||||
|
|
||||||
LiteLLM supports a heirarchy of users, teams, organizations, and budgets.
|
LiteLLM supports a hierarchy of users, teams, organizations, and budgets.
|
||||||
|
|
||||||
- Organizations can have multiple teams. [API Reference](https://litellm-api.up.railway.app/#/organization%20management)
|
- Organizations can have multiple teams. [API Reference](https://litellm-api.up.railway.app/#/organization%20management)
|
||||||
- Teams can have multiple users. [API Reference](https://litellm-api.up.railway.app/#/team%20management)
|
- Teams can have multiple users. [API Reference](https://litellm-api.up.railway.app/#/team%20management)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Realtime Endpoints
|
# /realtime
|
||||||
|
|
||||||
Use this to loadbalance across Azure + OpenAI.
|
Use this to loadbalance across Azure + OpenAI.
|
||||||
|
|
||||||
|
|
366
docs/my-website/docs/reasoning_content.md
Normal file
|
@ -0,0 +1,366 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# 'Thinking' / 'Reasoning Content'
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
Requires LiteLLM v1.63.0+
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Supported Providers:
|
||||||
|
- Deepseek (`deepseek/`)
|
||||||
|
- Anthropic API (`anthropic/`)
|
||||||
|
- Bedrock (Anthropic + Deepseek) (`bedrock/`)
|
||||||
|
- Vertex AI (Anthropic) (`vertexai/`)
|
||||||
|
- OpenRouter (`openrouter/`)
|
||||||
|
|
||||||
|
LiteLLM will standardize the `reasoning_content` in the response and `thinking_blocks` in the assistant message.
|
||||||
|
|
||||||
|
```python
|
||||||
|
"message": {
|
||||||
|
...
|
||||||
|
"reasoning_content": "The capital of France is Paris.",
|
||||||
|
"thinking_blocks": [
|
||||||
|
{
|
||||||
|
"type": "thinking",
|
||||||
|
"thinking": "The capital of France is Paris.",
|
||||||
|
"signature": "EqoBCkgIARABGAIiQL2UoU0b1OHYi+..."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion
|
||||||
|
import os
|
||||||
|
|
||||||
|
os.environ["ANTHROPIC_API_KEY"] = ""
|
||||||
|
|
||||||
|
response = completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "What is the capital of France?"},
|
||||||
|
],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024} # 👈 REQUIRED FOR ANTHROPIC models (on `anthropic/`, `bedrock/`, `vertexai/`)
|
||||||
|
)
|
||||||
|
print(response.choices[0].message.content)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What is the capital of France?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thinking": {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
**Expected Response**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
{
|
||||||
|
"id": "3b66124d79a708e10c603496b363574c",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": " won the FIFA World Cup in 2022.",
|
||||||
|
"role": "assistant",
|
||||||
|
"tool_calls": null,
|
||||||
|
"function_call": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1723323084,
|
||||||
|
"model": "deepseek/deepseek-chat",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": "fp_7e0991cad4",
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 12,
|
||||||
|
"prompt_tokens": 16,
|
||||||
|
"total_tokens": 28,
|
||||||
|
},
|
||||||
|
"service_tier": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Tool Calling with `thinking`
|
||||||
|
|
||||||
|
Here's how to use `thinking` blocks by Anthropic with tool calling.
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm._turn_on_debug()
|
||||||
|
litellm.modify_params = True
|
||||||
|
model = "anthropic/claude-3-7-sonnet-20250219" # works across Anthropic, Bedrock, Vertex AI
|
||||||
|
# Step 1: send the conversation and available functions to the model
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
tools = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
response = litellm.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
tools=tools,
|
||||||
|
tool_choice="auto", # auto is default, but we'll be explicit
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
)
|
||||||
|
print("Response\n", response)
|
||||||
|
response_message = response.choices[0].message
|
||||||
|
tool_calls = response_message.tool_calls
|
||||||
|
|
||||||
|
print("Expecting there to be 3 tool calls")
|
||||||
|
assert (
|
||||||
|
len(tool_calls) > 0
|
||||||
|
) # this has to call the function for SF, Tokyo and paris
|
||||||
|
|
||||||
|
# Step 2: check if the model wanted to call a function
|
||||||
|
print(f"tool_calls: {tool_calls}")
|
||||||
|
if tool_calls:
|
||||||
|
# Step 3: call the function
|
||||||
|
# Note: the JSON response may not always be valid; be sure to handle errors
|
||||||
|
available_functions = {
|
||||||
|
"get_current_weather": get_current_weather,
|
||||||
|
} # only one function in this example, but you can have multiple
|
||||||
|
messages.append(
|
||||||
|
response_message
|
||||||
|
) # extend conversation with assistant's reply
|
||||||
|
print("Response message\n", response_message)
|
||||||
|
# Step 4: send the info for each function call and function response to the model
|
||||||
|
for tool_call in tool_calls:
|
||||||
|
function_name = tool_call.function.name
|
||||||
|
if function_name not in available_functions:
|
||||||
|
# the model called a function that does not exist in available_functions - don't try calling anything
|
||||||
|
return
|
||||||
|
function_to_call = available_functions[function_name]
|
||||||
|
function_args = json.loads(tool_call.function.arguments)
|
||||||
|
function_response = function_to_call(
|
||||||
|
location=function_args.get("location"),
|
||||||
|
unit=function_args.get("unit"),
|
||||||
|
)
|
||||||
|
messages.append(
|
||||||
|
{
|
||||||
|
"tool_call_id": tool_call.id,
|
||||||
|
"role": "tool",
|
||||||
|
"name": function_name,
|
||||||
|
"content": function_response,
|
||||||
|
}
|
||||||
|
) # extend conversation with function response
|
||||||
|
print(f"messages: {messages}")
|
||||||
|
second_response = litellm.completion(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
seed=22,
|
||||||
|
# tools=tools,
|
||||||
|
drop_params=True,
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
) # get a new response from the model where it can see the function response
|
||||||
|
print("second response\n", second_response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: claude-3-7-sonnet-thinking
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/claude-3-7-sonnet-20250219
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
thinking: {
|
||||||
|
"type": "enabled",
|
||||||
|
"budget_tokens": 1024
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Run proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make 1st call
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet-thinking",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "What's the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"},
|
||||||
|
],
|
||||||
|
"tools": [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"description": "Get the current weather in a given location",
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"location": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The city and state",
|
||||||
|
},
|
||||||
|
"unit": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["celsius", "fahrenheit"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["location"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"tool_choice": "auto"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Make 2nd call with tool call results
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $LITELLM_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "claude-3-7-sonnet-thinking",
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "What\'s the weather like in San Francisco, Tokyo, and Paris? - give me 3 responses"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "I\'ll check the current weather for these three cities for you:",
|
||||||
|
"tool_calls": [
|
||||||
|
{
|
||||||
|
"index": 2,
|
||||||
|
"function": {
|
||||||
|
"arguments": "{\"location\": \"San Francisco\"}",
|
||||||
|
"name": "get_current_weather"
|
||||||
|
},
|
||||||
|
"id": "tooluse_mnqzmtWYRjCxUInuAdK7-w",
|
||||||
|
"type": "function"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"function_call": null,
|
||||||
|
"reasoning_content": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.",
|
||||||
|
"thinking_blocks": [
|
||||||
|
{
|
||||||
|
"type": "thinking",
|
||||||
|
"thinking": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user.",
|
||||||
|
"signature": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c="
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"provider_specific_fields": {
|
||||||
|
"reasoningContentBlocks": [
|
||||||
|
{
|
||||||
|
"reasoningText": {
|
||||||
|
"signature": "EqoBCkgIARABGAIiQCkBXENoyB+HstUOs/iGjG+bvDbIQRrxPsPpOSt5yDxX6iulZ/4K/w9Rt4J5Nb2+3XUYsyOH+CpZMfADYvItFR4SDPb7CmzoGKoolCMAJRoM62p1ZRASZhrD3swqIjAVY7vOAFWKZyPEJglfX/60+bJphN9W1wXR6rWrqn3MwUbQ5Mb/pnpeb10HMploRgUqEGKOd6fRKTkUoNDuAnPb55c=",
|
||||||
|
"text": "The user is asking for the current weather in three different locations: San Francisco, Tokyo, and Paris. I have access to the `get_current_weather` function that can provide this information.\n\nThe function requires a `location` parameter, and has an optional `unit` parameter. The user hasn't specified which unit they prefer (celsius or fahrenheit), so I'll use the default provided by the function.\n\nI need to make three separate function calls, one for each location:\n1. San Francisco\n2. Tokyo\n3. Paris\n\nThen I'll compile the results into a response with three distinct weather reports as requested by the user."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"tool_call_id": "tooluse_mnqzmtWYRjCxUInuAdK7-w",
|
||||||
|
"role": "tool",
|
||||||
|
"name": "get_current_weather",
|
||||||
|
"content": "{\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"}"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
## Switching between Anthropic + Deepseek models
|
||||||
|
|
||||||
|
Set `drop_params=True` to drop the 'thinking' blocks when swapping from Anthropic to Deepseek models. Suggest improvements to this approach [here](https://github.com/BerriAI/litellm/discussions/8927).
|
||||||
|
|
||||||
|
```python
|
||||||
|
litellm.drop_params = True # 👈 EITHER GLOBALLY or per request
|
||||||
|
|
||||||
|
# or per request
|
||||||
|
## Anthropic
|
||||||
|
response = litellm.completion(
|
||||||
|
model="anthropic/claude-3-7-sonnet-20250219",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
drop_params=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
## Deepseek
|
||||||
|
response = litellm.completion(
|
||||||
|
model="deepseek/deepseek-chat",
|
||||||
|
messages=[{"role": "user", "content": "What is the capital of France?"}],
|
||||||
|
thinking={"type": "enabled", "budget_tokens": 1024},
|
||||||
|
drop_params=True,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Spec
|
||||||
|
|
||||||
|
|
||||||
|
These fields can be accessed via `response.choices[0].message.reasoning_content` and `response.choices[0].message.thinking_blocks`.
|
||||||
|
|
||||||
|
- `reasoning_content` - str: The reasoning content from the model. Returned across all providers.
|
||||||
|
- `thinking_blocks` - Optional[List[Dict[str, str]]]: A list of thinking blocks from the model. Only returned for Anthropic models.
|
||||||
|
- `type` - str: The type of thinking block.
|
||||||
|
- `thinking` - str: The thinking from the model.
|
||||||
|
- `signature` - str: The signature delta from the model.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Rerank
|
# /rerank
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ curl http://0.0.0.0:4000/rerank \
|
||||||
|
|
||||||
| Provider | Link to Usage |
|
| Provider | Link to Usage |
|
||||||
|-------------|--------------------|
|
|-------------|--------------------|
|
||||||
| Cohere | [Usage](#quick-start) |
|
| Cohere (v1 + v2 clients) | [Usage](#quick-start) |
|
||||||
| Together AI| [Usage](../docs/providers/togetherai) |
|
| Together AI| [Usage](../docs/providers/togetherai) |
|
||||||
| Azure AI| [Usage](../docs/providers/azure_ai) |
|
| Azure AI| [Usage](../docs/providers/azure_ai) |
|
||||||
| Jina AI| [Usage](../docs/providers/jina_ai) |
|
| Jina AI| [Usage](../docs/providers/jina_ai) |
|
||||||
|
|
117
docs/my-website/docs/response_api.md
Normal file
|
@ -0,0 +1,117 @@
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# /responses [Beta]
|
||||||
|
|
||||||
|
LiteLLM provides a BETA endpoint in the spec of [OpenAI's `/responses` API](https://platform.openai.com/docs/api-reference/responses)
|
||||||
|
|
||||||
|
| Feature | Supported | Notes |
|
||||||
|
|---------|-----------|--------|
|
||||||
|
| Cost Tracking | ✅ | Works with all supported models |
|
||||||
|
| Logging | ✅ | Works across all integrations |
|
||||||
|
| End-user Tracking | ✅ | |
|
||||||
|
| Streaming | ✅ | |
|
||||||
|
| Fallbacks | ✅ | Works between supported models |
|
||||||
|
| Loadbalancing | ✅ | Works between supported models |
|
||||||
|
| Supported LiteLLM Versions | 1.63.8+ | |
|
||||||
|
| Supported LLM providers | `openai` | |
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
## Create a model response
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="litellm-sdk" label="LiteLLM SDK">
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = litellm.responses(
|
||||||
|
model="gpt-4o",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
max_output_tokens=100
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = litellm.responses(
|
||||||
|
model="gpt-4o",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="OpenAI SDK with LiteLLM Proxy">
|
||||||
|
|
||||||
|
First, add this to your litellm proxy config.yaml:
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4o
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
Start your LiteLLM proxy:
|
||||||
|
```bash
|
||||||
|
litellm --config /path/to/config.yaml
|
||||||
|
|
||||||
|
# RUNNING on http://0.0.0.0:4000
|
||||||
|
```
|
||||||
|
|
||||||
|
Then use the OpenAI SDK pointed to your proxy:
|
||||||
|
|
||||||
|
#### Non-streaming
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="gpt-4o",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn."
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Streaming
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
# Initialize client with your proxy URL
|
||||||
|
client = OpenAI(
|
||||||
|
base_url="http://localhost:4000", # Your proxy URL
|
||||||
|
api_key="your-api-key" # Your proxy API key
|
||||||
|
)
|
||||||
|
|
||||||
|
# Streaming response
|
||||||
|
response = client.responses.create(
|
||||||
|
model="gpt-4o",
|
||||||
|
input="Tell me a three sentence bedtime story about a unicorn.",
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for event in response:
|
||||||
|
print(event)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
|
@ -826,6 +826,65 @@ asyncio.run(router_acompletion())
|
||||||
|
|
||||||
## Basic Reliability
|
## Basic Reliability
|
||||||
|
|
||||||
|
### Weighted Deployments
|
||||||
|
|
||||||
|
Set `weight` on a deployment to pick one deployment more often than others.
|
||||||
|
|
||||||
|
This works across **simple-shuffle** routing strategy (this is the default, if no routing strategy is selected).
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import Router
|
||||||
|
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "o1",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "o1-preview",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
"weight": 1
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "o1",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "o1-preview",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
"weight": 2 # 👈 PICK THIS DEPLOYMENT 2x MORE OFTEN THAN o1-preview
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
router = Router(model_list=model_list, routing_strategy="cost-based-routing")
|
||||||
|
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: o1
|
||||||
|
litellm_params:
|
||||||
|
model: o1
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
weight: 1
|
||||||
|
- model_name: o1
|
||||||
|
litellm_params:
|
||||||
|
model: o1-preview
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
weight: 2 # 👈 PICK THIS DEPLOYMENT 2x MORE OFTEN THAN o1-preview
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
### Max Parallel Requests (ASYNC)
|
### Max Parallel Requests (ASYNC)
|
||||||
|
|
||||||
Used in semaphore for async requests on router. Limit the max concurrent calls made to a deployment. Useful in high-traffic scenarios.
|
Used in semaphore for async requests on router. Limit the max concurrent calls made to a deployment. Useful in high-traffic scenarios.
|
||||||
|
@ -893,8 +952,8 @@ router_settings:
|
||||||
```
|
```
|
||||||
|
|
||||||
Defaults:
|
Defaults:
|
||||||
- allowed_fails: 0
|
- allowed_fails: 3
|
||||||
- cooldown_time: 60s
|
- cooldown_time: 5s (`DEFAULT_COOLDOWN_TIME_SECONDS` in constants.py)
|
||||||
|
|
||||||
**Set Per Model**
|
**Set Per Model**
|
||||||
|
|
||||||
|
|
|
@ -96,6 +96,33 @@ litellm --config /path/to/config.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### Using K/V pairs in 1 AWS Secret
|
||||||
|
|
||||||
|
You can read multiple keys from a single AWS Secret using the `primary_secret_name` parameter:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
general_settings:
|
||||||
|
key_management_system: "aws_secret_manager"
|
||||||
|
key_management_settings:
|
||||||
|
hosted_keys: [
|
||||||
|
"OPENAI_API_KEY_MODEL_1",
|
||||||
|
"OPENAI_API_KEY_MODEL_2",
|
||||||
|
]
|
||||||
|
primary_secret_name: "litellm_secrets" # 👈 Read multiple keys from one JSON secret
|
||||||
|
```
|
||||||
|
|
||||||
|
The `primary_secret_name` allows you to read multiple keys from a single AWS Secret as a JSON object. For example, the "litellm_secrets" would contain:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"OPENAI_API_KEY_MODEL_1": "sk-key1...",
|
||||||
|
"OPENAI_API_KEY_MODEL_2": "sk-key2..."
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This reduces the number of AWS Secrets you need to manage.
|
||||||
|
|
||||||
|
|
||||||
## Hashicorp Vault
|
## Hashicorp Vault
|
||||||
|
|
||||||
|
|
||||||
|
@ -353,4 +380,7 @@ general_settings:
|
||||||
|
|
||||||
# Hosted Keys Settings
|
# Hosted Keys Settings
|
||||||
hosted_keys: ["litellm_master_key"] # OPTIONAL. Specify which env keys you stored on AWS
|
hosted_keys: ["litellm_master_key"] # OPTIONAL. Specify which env keys you stored on AWS
|
||||||
|
|
||||||
|
# K/V pairs in 1 AWS Secret Settings
|
||||||
|
primary_secret_name: "litellm_secrets" # OPTIONAL. Read multiple keys from one JSON secret on AWS Secret Manager
|
||||||
```
|
```
|
|
@ -1,7 +1,7 @@
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Text Completion
|
# /completions
|
||||||
|
|
||||||
### Usage
|
### Usage
|
||||||
<Tabs>
|
<Tabs>
|
||||||
|
|
|
@ -2,9 +2,9 @@ import Image from '@theme/IdealImage';
|
||||||
import Tabs from '@theme/Tabs';
|
import Tabs from '@theme/Tabs';
|
||||||
import TabItem from '@theme/TabItem';
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
# Use LiteLLM AI Gateway with Aporia Guardrails
|
# Aporia Guardrails with LiteLLM Gateway
|
||||||
|
|
||||||
In this tutorial we will use LiteLLM Proxy with Aporia to detect PII in requests and profanity in responses
|
In this tutorial we will use LiteLLM AI Gateway with Aporia to detect PII in requests and profanity in responses
|
||||||
|
|
||||||
## 1. Setup guardrails on Aporia
|
## 1. Setup guardrails on Aporia
|
||||||
|
|
||||||
|
|
103
docs/my-website/docs/tutorials/openweb_ui.md
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
# OpenWeb UI with LiteLLM
|
||||||
|
|
||||||
|
This guide walks you through connecting OpenWeb UI to LiteLLM. Using LiteLLM with OpenWeb UI allows teams to
|
||||||
|
- Access 100+ LLMs on OpenWeb UI
|
||||||
|
- Track Spend / Usage, Set Budget Limits
|
||||||
|
- Send Request/Response Logs to logging destinations like langfuse, s3, gcs buckets, etc.
|
||||||
|
- Set access controls eg. Control what models OpenWebUI can access.
|
||||||
|
|
||||||
|
## Quickstart
|
||||||
|
|
||||||
|
- Make sure to setup LiteLLM with the [LiteLLM Getting Started Guide](https://docs.litellm.ai/docs/proxy/docker_quick_start)
|
||||||
|
|
||||||
|
|
||||||
|
## 1. Start LiteLLM & OpenWebUI
|
||||||
|
|
||||||
|
- OpenWebUI starts running on [http://localhost:3000](http://localhost:3000)
|
||||||
|
- LiteLLM starts running on [http://localhost:4000](http://localhost:4000)
|
||||||
|
|
||||||
|
|
||||||
|
## 2. Create a Virtual Key on LiteLLM
|
||||||
|
|
||||||
|
Virtual Keys are API Keys that allow you to authenticate to LiteLLM Proxy. We will create a Virtual Key that will allow OpenWebUI to access LiteLLM.
|
||||||
|
|
||||||
|
### 2.1 LiteLLM User Management Hierarchy
|
||||||
|
|
||||||
|
On LiteLLM, you can create Organizations, Teams, Users and Virtual Keys. For this tutorial, we will create a Team and a Virtual Key.
|
||||||
|
|
||||||
|
- `Organization` - An Organization is a group of Teams. (US Engineering, EU Developer Tools)
|
||||||
|
- `Team` - A Team is a group of Users. (OpenWeb UI Team, Data Science Team, etc.)
|
||||||
|
- `User` - A User is an individual user (employee, developer, eg. `krrish@litellm.ai`)
|
||||||
|
- `Virtual Key` - A Virtual Key is an API Key that allows you to authenticate to LiteLLM Proxy. A Virtual Key is associated with a User or Team.
|
||||||
|
|
||||||
|
Once the Team is created, you can invite Users to the Team. You can read more about LiteLLM's User Management [here](https://docs.litellm.ai/docs/proxy/user_management_heirarchy).
|
||||||
|
|
||||||
|
### 2.2 Create a Team on LiteLLM
|
||||||
|
|
||||||
|
Navigate to [http://localhost:4000/ui](http://localhost:4000/ui) and create a new team.
|
||||||
|
|
||||||
|
<Image img={require('../../img/litellm_create_team.gif')} />
|
||||||
|
|
||||||
|
### 2.2 Create a Virtual Key on LiteLLM
|
||||||
|
|
||||||
|
Navigate to [http://localhost:4000/ui](http://localhost:4000/ui) and create a new virtual Key.
|
||||||
|
|
||||||
|
LiteLLM allows you to specify what models are available on OpenWeb UI (by specifying the models the key will have access to).
|
||||||
|
|
||||||
|
<Image img={require('../../img/create_key_in_team_oweb.gif')} />
|
||||||
|
|
||||||
|
## 3. Connect OpenWeb UI to LiteLLM
|
||||||
|
|
||||||
|
On OpenWeb UI, navigate to Settings -> Connections and create a new connection to LiteLLM
|
||||||
|
|
||||||
|
Enter the following details:
|
||||||
|
- URL: `http://localhost:4000` (your litellm proxy base url)
|
||||||
|
- Key: `your-virtual-key` (the key you created in the previous step)
|
||||||
|
|
||||||
|
<Image img={require('../../img/litellm_setup_openweb.gif')} />
|
||||||
|
|
||||||
|
### 3.1 Test Request
|
||||||
|
|
||||||
|
On the top left corner, select models you should only see the models you gave the key access to in Step 2.
|
||||||
|
|
||||||
|
Once you selected a model, enter your message content and click on `Submit`
|
||||||
|
|
||||||
|
<Image img={require('../../img/basic_litellm.gif')} />
|
||||||
|
|
||||||
|
### 3.2 Tracking Spend / Usage
|
||||||
|
|
||||||
|
After your request is made, navigate to `Logs` on the LiteLLM UI, you can see Team, Key, Model, Usage and Cost.
|
||||||
|
|
||||||
|
<!-- <Image img={require('../../img/litellm_logs_openweb.gif')} /> -->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Render `thinking` content on OpenWeb UI
|
||||||
|
|
||||||
|
OpenWebUI requires reasoning/thinking content to be rendered with `<think></think>` tags. In order to render this for specific models, you can use the `merge_reasoning_content_in_choices` litellm parameter.
|
||||||
|
|
||||||
|
Example litellm config.yaml:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: thinking-anthropic-claude-3-7-sonnet
|
||||||
|
litellm_params:
|
||||||
|
model: bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0
|
||||||
|
thinking: {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
max_tokens: 1080
|
||||||
|
merge_reasoning_content_in_choices: true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test it on OpenWeb UI
|
||||||
|
|
||||||
|
On the models dropdown select `thinking-anthropic-claude-3-7-sonnet`
|
||||||
|
|
||||||
|
<Image img={require('../../img/litellm_thinking_openweb.gif')} />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ const config = {
|
||||||
path: './release_notes',
|
path: './release_notes',
|
||||||
routeBasePath: 'release_notes',
|
routeBasePath: 'release_notes',
|
||||||
blogTitle: 'Release Notes',
|
blogTitle: 'Release Notes',
|
||||||
blogSidebarTitle: 'All Releases',
|
blogSidebarTitle: 'Releases',
|
||||||
blogSidebarCount: 'ALL',
|
blogSidebarCount: 'ALL',
|
||||||
postsPerPage: 'ALL',
|
postsPerPage: 'ALL',
|
||||||
showReadingTime: false,
|
showReadingTime: false,
|
||||||
|
|
BIN
docs/my-website/img/basic_litellm.gif
Normal file
After Width: | Height: | Size: 2.6 MiB |
BIN
docs/my-website/img/create_key_in_team_oweb.gif
Normal file
After Width: | Height: | Size: 13 MiB |
BIN
docs/my-website/img/litellm_create_team.gif
Normal file
After Width: | Height: | Size: 5.4 MiB |
BIN
docs/my-website/img/litellm_setup_openweb.gif
Normal file
After Width: | Height: | Size: 2.7 MiB |
BIN
docs/my-website/img/litellm_thinking_openweb.gif
Normal file
After Width: | Height: | Size: 5.1 MiB |
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 195 KiB |
BIN
docs/my-website/img/release_notes/anthropic_thinking.jpg
Normal file
After Width: | Height: | Size: 470 KiB |
BIN
docs/my-website/img/release_notes/credentials.jpg
Normal file
After Width: | Height: | Size: 371 KiB |
BIN
docs/my-website/img/release_notes/error_logs.jpg
Normal file
After Width: | Height: | Size: 918 KiB |
BIN
docs/my-website/img/release_notes/litellm_test_connection.gif
Normal file
After Width: | Height: | Size: 16 MiB |
BIN
docs/my-website/img/release_notes/responses_api.png
Normal file
After Width: | Height: | Size: 67 KiB |
BIN
docs/my-website/img/release_notes/v1632_release.jpg
Normal file
After Width: | Height: | Size: 386 KiB |
BIN
docs/my-website/img/ui_add_cred_2.png
Normal file
After Width: | Height: | Size: 255 KiB |
BIN
docs/my-website/img/ui_cred_3.png
Normal file
After Width: | Height: | Size: 283 KiB |
BIN
docs/my-website/img/ui_cred_4.png
Normal file
After Width: | Height: | Size: 255 KiB |
BIN
docs/my-website/img/ui_cred_add.png
Normal file
After Width: | Height: | Size: 204 KiB |
BIN
docs/my-website/img/ui_request_logs.png
Normal file
After Width: | Height: | Size: 567 KiB |
BIN
docs/my-website/img/ui_request_logs_content.png
Normal file
After Width: | Height: | Size: 344 KiB |
BIN
docs/my-website/img/use_model_cred.png
Normal file
After Width: | Height: | Size: 282 KiB |