Merge branch 'main' into litellm_8864-feature-vertex-anyOf-support
|
@ -3,6 +3,18 @@ orbs:
|
||||||
codecov: codecov/codecov@4.0.1
|
codecov: codecov/codecov@4.0.1
|
||||||
node: circleci/node@5.1.0 # Add this line to declare the node orb
|
node: circleci/node@5.1.0 # Add this line to declare the node orb
|
||||||
|
|
||||||
|
commands:
|
||||||
|
setup_google_dns:
|
||||||
|
steps:
|
||||||
|
- run:
|
||||||
|
name: "Configure Google DNS"
|
||||||
|
command: |
|
||||||
|
# Backup original resolv.conf
|
||||||
|
sudo cp /etc/resolv.conf /etc/resolv.conf.backup
|
||||||
|
# Add both local and Google DNS servers
|
||||||
|
echo "nameserver 127.0.0.11" | sudo tee /etc/resolv.conf
|
||||||
|
echo "nameserver 8.8.8.8" | sudo tee -a /etc/resolv.conf
|
||||||
|
echo "nameserver 8.8.4.4" | sudo tee -a /etc/resolv.conf
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
local_testing:
|
local_testing:
|
||||||
|
@ -15,7 +27,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -66,7 +78,7 @@ jobs:
|
||||||
pip install python-multipart
|
pip install python-multipart
|
||||||
pip install google-cloud-aiplatform
|
pip install google-cloud-aiplatform
|
||||||
pip install prometheus-client==0.20.0
|
pip install prometheus-client==0.20.0
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.10.2"
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
|
@ -134,7 +146,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -185,7 +197,7 @@ jobs:
|
||||||
pip install python-multipart
|
pip install python-multipart
|
||||||
pip install google-cloud-aiplatform
|
pip install google-cloud-aiplatform
|
||||||
pip install prometheus-client==0.20.0
|
pip install prometheus-client==0.20.0
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.10.2"
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
|
@ -234,7 +246,13 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
|
- run:
|
||||||
|
name: DNS lookup for Redis host
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y dnsutils
|
||||||
|
dig redis-19899.c239.us-east-1-2.ec2.redns.redis-cloud.com +short
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -285,7 +303,7 @@ jobs:
|
||||||
pip install python-multipart
|
pip install python-multipart
|
||||||
pip install google-cloud-aiplatform
|
pip install google-cloud-aiplatform
|
||||||
pip install prometheus-client==0.20.0
|
pip install prometheus-client==0.20.0
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.10.2"
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
|
@ -334,6 +352,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -388,6 +407,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -429,6 +449,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -479,7 +500,13 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- run:
|
||||||
|
name: Install PostgreSQL
|
||||||
|
command: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install postgresql postgresql-contrib
|
||||||
|
echo 'export PATH=/usr/lib/postgresql/*/bin:$PATH' >> $BASH_ENV
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Show git commit hash
|
name: Show git commit hash
|
||||||
command: |
|
command: |
|
||||||
|
@ -530,10 +557,11 @@ jobs:
|
||||||
pip install python-multipart
|
pip install python-multipart
|
||||||
pip install google-cloud-aiplatform
|
pip install google-cloud-aiplatform
|
||||||
pip install prometheus-client==0.20.0
|
pip install prometheus-client==0.20.0
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.10.2"
|
||||||
pip install "diskcache==5.6.1"
|
pip install "diskcache==5.6.1"
|
||||||
pip install "Pillow==10.3.0"
|
pip install "Pillow==10.3.0"
|
||||||
pip install "jsonschema==4.22.0"
|
pip install "jsonschema==4.22.0"
|
||||||
|
pip install "pytest-postgresql==7.0.1"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
paths:
|
paths:
|
||||||
- ./venv
|
- ./venv
|
||||||
|
@ -569,7 +597,7 @@ jobs:
|
||||||
- litellm_proxy_unit_tests_coverage
|
- litellm_proxy_unit_tests_coverage
|
||||||
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
|
litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
|
||||||
docker:
|
docker:
|
||||||
- image: cimg/python:3.11
|
- image: cimg/python:3.13.1
|
||||||
auth:
|
auth:
|
||||||
username: ${DOCKERHUB_USERNAME}
|
username: ${DOCKERHUB_USERNAME}
|
||||||
password: ${DOCKERHUB_PASSWORD}
|
password: ${DOCKERHUB_PASSWORD}
|
||||||
|
@ -577,6 +605,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -618,6 +647,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -625,7 +655,13 @@ jobs:
|
||||||
python -m pip install -r requirements.txt
|
python -m pip install -r requirements.txt
|
||||||
pip install "pytest==7.3.1"
|
pip install "pytest==7.3.1"
|
||||||
pip install "pytest-retry==1.6.3"
|
pip install "pytest-retry==1.6.3"
|
||||||
|
pip install "pytest-cov==5.0.0"
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
pip install "respx==0.21.1"
|
||||||
|
- run:
|
||||||
|
name: Show current pydantic version
|
||||||
|
command: |
|
||||||
|
python -m pip show pydantic
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Run tests
|
name: Run tests
|
||||||
|
@ -648,6 +684,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -690,6 +727,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -700,8 +738,8 @@ jobs:
|
||||||
pip install "pytest-cov==5.0.0"
|
pip install "pytest-cov==5.0.0"
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
pip install "respx==0.21.1"
|
pip install "respx==0.21.1"
|
||||||
pip install "pydantic==2.7.2"
|
pip install "pydantic==2.10.2"
|
||||||
pip install "mcp==1.4.1"
|
pip install "mcp==1.5.0"
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Run tests
|
name: Run tests
|
||||||
|
@ -734,6 +772,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -776,6 +815,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -788,8 +828,8 @@ jobs:
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
pip install "respx==0.21.1"
|
pip install "respx==0.21.1"
|
||||||
pip install "hypercorn==0.17.3"
|
pip install "hypercorn==0.17.3"
|
||||||
pip install "pydantic==2.7.2"
|
pip install "pydantic==2.10.2"
|
||||||
pip install "mcp==1.4.1"
|
pip install "mcp==1.5.0"
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Run tests
|
name: Run tests
|
||||||
|
@ -822,6 +862,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -866,10 +907,12 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
|
pip install numpydoc
|
||||||
python -m pip install -r requirements.txt
|
python -m pip install -r requirements.txt
|
||||||
pip install "respx==0.21.1"
|
pip install "respx==0.21.1"
|
||||||
pip install "pytest==7.3.1"
|
pip install "pytest==7.3.1"
|
||||||
|
@ -878,7 +921,6 @@ jobs:
|
||||||
pip install "pytest-cov==5.0.0"
|
pip install "pytest-cov==5.0.0"
|
||||||
pip install "google-generativeai==0.3.2"
|
pip install "google-generativeai==0.3.2"
|
||||||
pip install "google-cloud-aiplatform==1.43.0"
|
pip install "google-cloud-aiplatform==1.43.0"
|
||||||
pip install numpydoc
|
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Run tests
|
name: Run tests
|
||||||
|
@ -912,6 +954,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -954,6 +997,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -996,6 +1040,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1042,6 +1087,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1054,8 +1100,8 @@ jobs:
|
||||||
pip install click
|
pip install click
|
||||||
pip install "boto3==1.34.34"
|
pip install "boto3==1.34.34"
|
||||||
pip install jinja2
|
pip install jinja2
|
||||||
pip install tokenizers=="0.20.0"
|
pip install "tokenizers==0.20.0"
|
||||||
pip install uvloop==0.21.0
|
pip install "uvloop==0.21.0"
|
||||||
pip install jsonschema
|
pip install jsonschema
|
||||||
- run:
|
- run:
|
||||||
name: Run tests
|
name: Run tests
|
||||||
|
@ -1074,6 +1120,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1098,6 +1145,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
# Install Helm
|
# Install Helm
|
||||||
- run:
|
- run:
|
||||||
name: Install Helm
|
name: Install Helm
|
||||||
|
@ -1167,6 +1215,7 @@ jobs:
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Dependencies
|
name: Install Dependencies
|
||||||
command: |
|
command: |
|
||||||
|
@ -1203,6 +1252,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Python 3.9
|
name: Install Python 3.9
|
||||||
command: |
|
command: |
|
||||||
|
@ -1277,6 +1327,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1412,6 +1463,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1448,6 +1500,7 @@ jobs:
|
||||||
pip install "boto3==1.34.34"
|
pip install "boto3==1.34.34"
|
||||||
pip install "aioboto3==12.3.0"
|
pip install "aioboto3==12.3.0"
|
||||||
pip install langchain
|
pip install langchain
|
||||||
|
pip install "langchain_mcp_adapters==0.0.5"
|
||||||
pip install "langfuse>=2.0.0"
|
pip install "langfuse>=2.0.0"
|
||||||
pip install "logfire==0.29.0"
|
pip install "logfire==0.29.0"
|
||||||
pip install numpydoc
|
pip install numpydoc
|
||||||
|
@ -1535,6 +1588,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1697,6 +1751,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1808,6 +1863,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -1847,7 +1903,7 @@ jobs:
|
||||||
command: |
|
command: |
|
||||||
docker run -d \
|
docker run -d \
|
||||||
-p 4000:4000 \
|
-p 4000:4000 \
|
||||||
-e DATABASE_URL=$PROXY_DATABASE_URL \
|
-e DATABASE_URL=$CLEAN_STORE_MODEL_IN_DB_DATABASE_URL \
|
||||||
-e STORE_MODEL_IN_DB="True" \
|
-e STORE_MODEL_IN_DB="True" \
|
||||||
-e LITELLM_MASTER_KEY="sk-1234" \
|
-e LITELLM_MASTER_KEY="sk-1234" \
|
||||||
-e LITELLM_LICENSE=$LITELLM_LICENSE \
|
-e LITELLM_LICENSE=$LITELLM_LICENSE \
|
||||||
|
@ -1890,6 +1946,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
# Remove Docker CLI installation since it's already available in machine executor
|
# Remove Docker CLI installation since it's already available in machine executor
|
||||||
- run:
|
- run:
|
||||||
name: Install Python 3.13
|
name: Install Python 3.13
|
||||||
|
@ -1987,6 +2044,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Install Docker CLI (In case it's not already installed)
|
name: Install Docker CLI (In case it's not already installed)
|
||||||
command: |
|
command: |
|
||||||
|
@ -2014,7 +2072,7 @@ jobs:
|
||||||
pip install "openai==1.68.2"
|
pip install "openai==1.68.2"
|
||||||
pip install "assemblyai==0.37.0"
|
pip install "assemblyai==0.37.0"
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.10.2"
|
||||||
pip install "pytest==7.3.1"
|
pip install "pytest==7.3.1"
|
||||||
pip install "pytest-mock==3.12.0"
|
pip install "pytest-mock==3.12.0"
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
@ -2031,6 +2089,9 @@ jobs:
|
||||||
pip install "PyGithub==1.59.1"
|
pip install "PyGithub==1.59.1"
|
||||||
pip install "google-cloud-aiplatform==1.59.0"
|
pip install "google-cloud-aiplatform==1.59.0"
|
||||||
pip install "anthropic==0.49.0"
|
pip install "anthropic==0.49.0"
|
||||||
|
pip install "langchain_mcp_adapters==0.0.5"
|
||||||
|
pip install "langchain_openai==0.2.1"
|
||||||
|
pip install "langgraph==0.3.18"
|
||||||
# Run pytest and generate JUnit XML report
|
# Run pytest and generate JUnit XML report
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
|
@ -2243,6 +2304,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Build UI
|
name: Build UI
|
||||||
command: |
|
command: |
|
||||||
|
@ -2289,7 +2351,7 @@ jobs:
|
||||||
pip install aiohttp
|
pip install aiohttp
|
||||||
pip install "openai==1.68.2"
|
pip install "openai==1.68.2"
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install "pydantic==2.7.1"
|
pip install "pydantic==2.10.2"
|
||||||
pip install "pytest==7.3.1"
|
pip install "pytest==7.3.1"
|
||||||
pip install "pytest-mock==3.12.0"
|
pip install "pytest-mock==3.12.0"
|
||||||
pip install "pytest-asyncio==0.21.1"
|
pip install "pytest-asyncio==0.21.1"
|
||||||
|
@ -2357,6 +2419,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
command: |
|
command: |
|
||||||
|
@ -2379,6 +2442,7 @@ jobs:
|
||||||
working_directory: ~/project
|
working_directory: ~/project
|
||||||
steps:
|
steps:
|
||||||
- checkout
|
- checkout
|
||||||
|
- setup_google_dns
|
||||||
- run:
|
- run:
|
||||||
name: Build Docker image
|
name: Build Docker image
|
||||||
command: |
|
command: |
|
||||||
|
|
|
@ -4,10 +4,12 @@ python-dotenv
|
||||||
tiktoken
|
tiktoken
|
||||||
importlib_metadata
|
importlib_metadata
|
||||||
cohere
|
cohere
|
||||||
redis
|
redis==5.2.1
|
||||||
|
redisvl==0.4.1
|
||||||
anthropic
|
anthropic
|
||||||
orjson==3.9.15
|
orjson==3.9.15
|
||||||
pydantic==2.7.1
|
pydantic==2.10.2
|
||||||
google-cloud-aiplatform==1.43.0
|
google-cloud-aiplatform==1.43.0
|
||||||
fastapi-sso==0.10.0
|
fastapi-sso==0.10.0
|
||||||
uvloop==0.21.0
|
uvloop==0.21.0
|
||||||
|
mcp==1.5.0 # for MCP server
|
||||||
|
|
206
.github/workflows/publish-migrations.yml
vendored
Normal file
|
@ -0,0 +1,206 @@
|
||||||
|
name: Publish Prisma Migrations
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- 'schema.prisma' # Check root schema.prisma
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
publish-migrations:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
services:
|
||||||
|
postgres:
|
||||||
|
image: postgres:14
|
||||||
|
env:
|
||||||
|
POSTGRES_DB: temp_db
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
ports:
|
||||||
|
- 5432:5432
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
|
# Add shadow database service
|
||||||
|
postgres_shadow:
|
||||||
|
image: postgres:14
|
||||||
|
env:
|
||||||
|
POSTGRES_DB: shadow_db
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
ports:
|
||||||
|
- 5433:5432
|
||||||
|
options: >-
|
||||||
|
--health-cmd pg_isready
|
||||||
|
--health-interval 10s
|
||||||
|
--health-timeout 5s
|
||||||
|
--health-retries 5
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: '3.x'
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
pip install prisma
|
||||||
|
pip install python-dotenv
|
||||||
|
|
||||||
|
- name: Generate Initial Migration if None Exists
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
mkdir -p deploy/migrations
|
||||||
|
echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
|
||||||
|
|
||||||
|
if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
|
||||||
|
echo "No existing migrations found, creating baseline..."
|
||||||
|
VERSION=$(date +%Y%m%d%H%M%S)
|
||||||
|
mkdir -p deploy/migrations/${VERSION}_initial
|
||||||
|
|
||||||
|
echo "Generating initial migration..."
|
||||||
|
# Save raw output for debugging
|
||||||
|
prisma migrate diff \
|
||||||
|
--from-empty \
|
||||||
|
--to-schema-datamodel schema.prisma \
|
||||||
|
--shadow-database-url "${SHADOW_DATABASE_URL}" \
|
||||||
|
--script > deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
|
||||||
|
echo "Raw migration file content:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
|
||||||
|
echo "Cleaning migration file..."
|
||||||
|
# Clean the file
|
||||||
|
sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
|
||||||
|
# Verify the migration file
|
||||||
|
if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
|
||||||
|
echo "ERROR: Migration file is empty after cleaning"
|
||||||
|
echo "Original content was:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Final migration file content:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
|
||||||
|
# Verify it starts with SQL
|
||||||
|
if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
|
||||||
|
echo "ERROR: Migration file does not start with SQL command or comment"
|
||||||
|
echo "First line is:"
|
||||||
|
head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
echo "Full content is:"
|
||||||
|
cat deploy/migrations/${VERSION}_initial/migration.sql
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Compare and Generate Migration
|
||||||
|
if: success()
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
# Create temporary migration workspace
|
||||||
|
mkdir -p temp_migrations
|
||||||
|
|
||||||
|
# Copy existing migrations (will not fail if directory is empty)
|
||||||
|
cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
|
||||||
|
|
||||||
|
VERSION=$(date +%Y%m%d%H%M%S)
|
||||||
|
|
||||||
|
# Generate diff against existing migrations or empty state
|
||||||
|
prisma migrate diff \
|
||||||
|
--from-migrations temp_migrations \
|
||||||
|
--to-schema-datamodel schema.prisma \
|
||||||
|
--shadow-database-url "${SHADOW_DATABASE_URL}" \
|
||||||
|
--script > temp_migrations/migration_${VERSION}.sql
|
||||||
|
|
||||||
|
# Check if there are actual changes
|
||||||
|
if [ -s temp_migrations/migration_${VERSION}.sql ]; then
|
||||||
|
echo "Changes detected, creating new migration"
|
||||||
|
mkdir -p deploy/migrations/${VERSION}_schema_update
|
||||||
|
mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
|
||||||
|
echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
|
||||||
|
else
|
||||||
|
echo "No schema changes detected"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Verify Migration
|
||||||
|
if: success()
|
||||||
|
env:
|
||||||
|
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
|
||||||
|
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
|
||||||
|
run: |
|
||||||
|
# Create test database
|
||||||
|
psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
|
||||||
|
|
||||||
|
# Apply all migrations in order to verify
|
||||||
|
for migration in deploy/migrations/*/migration.sql; do
|
||||||
|
echo "Applying migration: $migration"
|
||||||
|
psql "${SHADOW_DATABASE_URL}" -f $migration
|
||||||
|
done
|
||||||
|
|
||||||
|
# Add this step before create-pull-request to debug permissions
|
||||||
|
- name: Check Token Permissions
|
||||||
|
run: |
|
||||||
|
echo "Checking token permissions..."
|
||||||
|
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||||
|
-H "Accept: application/vnd.github.v3+json" \
|
||||||
|
https://api.github.com/repos/BerriAI/litellm/collaborators
|
||||||
|
|
||||||
|
echo "\nChecking if token can create PRs..."
|
||||||
|
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
|
||||||
|
-H "Accept: application/vnd.github.v3+json" \
|
||||||
|
https://api.github.com/repos/BerriAI/litellm
|
||||||
|
|
||||||
|
# Add this debug step before git push
|
||||||
|
- name: Debug Changed Files
|
||||||
|
run: |
|
||||||
|
echo "Files staged for commit:"
|
||||||
|
git diff --name-status --staged
|
||||||
|
|
||||||
|
echo "\nAll changed files:"
|
||||||
|
git status
|
||||||
|
|
||||||
|
- name: Create Pull Request
|
||||||
|
if: success()
|
||||||
|
uses: peter-evans/create-pull-request@v5
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
commit-message: "chore: update prisma migrations"
|
||||||
|
title: "Update Prisma Migrations"
|
||||||
|
body: |
|
||||||
|
Auto-generated migration based on schema.prisma changes.
|
||||||
|
|
||||||
|
Generated files:
|
||||||
|
- deploy/migrations/${VERSION}_schema_update/migration.sql
|
||||||
|
- deploy/migrations/${VERSION}_schema_update/README.md
|
||||||
|
branch: feat/prisma-migration-${{ env.VERSION }}
|
||||||
|
base: main
|
||||||
|
delete-branch: true
|
||||||
|
|
||||||
|
- name: Generate and Save Migrations
|
||||||
|
run: |
|
||||||
|
# Only add migration files
|
||||||
|
git add deploy/migrations/
|
||||||
|
git status # Debug what's being committed
|
||||||
|
git commit -m "chore: update prisma migrations"
|
1
.gitignore
vendored
|
@ -1,3 +1,4 @@
|
||||||
|
.python-version
|
||||||
.venv
|
.venv
|
||||||
.env
|
.env
|
||||||
.newenv
|
.newenv
|
||||||
|
|
|
@ -37,9 +37,6 @@ RUN pip install dist/*.whl
|
||||||
# install dependencies as wheels
|
# install dependencies as wheels
|
||||||
RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
|
RUN pip wheel --no-cache-dir --wheel-dir=/wheels/ -r requirements.txt
|
||||||
|
|
||||||
# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0
|
|
||||||
RUN pip install redisvl==0.0.7 --no-deps
|
|
||||||
|
|
||||||
# ensure pyjwt is used, not jwt
|
# ensure pyjwt is used, not jwt
|
||||||
RUN pip uninstall jwt -y
|
RUN pip uninstall jwt -y
|
||||||
RUN pip uninstall PyJWT -y
|
RUN pip uninstall PyJWT -y
|
||||||
|
|
|
@ -16,9 +16,6 @@
|
||||||
<a href="https://pypi.org/project/litellm/" target="_blank">
|
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||||
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||||
</a>
|
</a>
|
||||||
<a href="https://dl.circleci.com/status-badge/redirect/gh/BerriAI/litellm/tree/main" target="_blank">
|
|
||||||
<img src="https://dl.circleci.com/status-badge/img/gh/BerriAI/litellm/tree/main.svg?style=svg" alt="CircleCI">
|
|
||||||
</a>
|
|
||||||
<a href="https://www.ycombinator.com/companies/berriai">
|
<a href="https://www.ycombinator.com/companies/berriai">
|
||||||
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
|
<img src="https://img.shields.io/badge/Y%20Combinator-W23-orange?style=flat-square" alt="Y Combinator W23">
|
||||||
</a>
|
</a>
|
||||||
|
|
60
ci_cd/baseline_db.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def create_baseline():
|
||||||
|
"""Create baseline migration in deploy/migrations"""
|
||||||
|
try:
|
||||||
|
# Get paths
|
||||||
|
root_dir = Path(__file__).parent.parent
|
||||||
|
deploy_dir = root_dir / "deploy"
|
||||||
|
migrations_dir = deploy_dir / "migrations"
|
||||||
|
schema_path = root_dir / "schema.prisma"
|
||||||
|
|
||||||
|
# Create migrations directory
|
||||||
|
migrations_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Create migration_lock.toml if it doesn't exist
|
||||||
|
lock_file = migrations_dir / "migration_lock.toml"
|
||||||
|
if not lock_file.exists():
|
||||||
|
lock_file.write_text('provider = "postgresql"\n')
|
||||||
|
|
||||||
|
# Create timestamp-based migration directory
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
migration_dir = migrations_dir / f"{timestamp}_baseline"
|
||||||
|
migration_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Generate migration SQL
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"prisma",
|
||||||
|
"migrate",
|
||||||
|
"diff",
|
||||||
|
"--from-empty",
|
||||||
|
"--to-schema-datamodel",
|
||||||
|
str(schema_path),
|
||||||
|
"--script",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Write the SQL to migration.sql
|
||||||
|
migration_file = migration_dir / "migration.sql"
|
||||||
|
migration_file.write_text(result.stdout)
|
||||||
|
|
||||||
|
print(f"Created baseline migration in {migration_dir}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error running prisma command: {e.stderr}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating baseline migration: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
create_baseline()
|
96
ci_cd/run_migration.py
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import testing.postgresql
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
|
def create_migration(migration_name: str = None):
|
||||||
|
"""
|
||||||
|
Create a new migration SQL file in deploy/migrations directory by comparing
|
||||||
|
current database state with schema
|
||||||
|
|
||||||
|
Args:
|
||||||
|
migration_name (str): Name for the migration
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get paths
|
||||||
|
root_dir = Path(__file__).parent.parent
|
||||||
|
deploy_dir = root_dir / "deploy"
|
||||||
|
migrations_dir = deploy_dir / "migrations"
|
||||||
|
schema_path = root_dir / "schema.prisma"
|
||||||
|
|
||||||
|
# Create temporary PostgreSQL database
|
||||||
|
with testing.postgresql.Postgresql() as postgresql:
|
||||||
|
db_url = postgresql.url()
|
||||||
|
|
||||||
|
# Create temporary migrations directory next to schema.prisma
|
||||||
|
temp_migrations_dir = schema_path.parent / "migrations"
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Copy existing migrations to temp directory
|
||||||
|
if temp_migrations_dir.exists():
|
||||||
|
shutil.rmtree(temp_migrations_dir)
|
||||||
|
shutil.copytree(migrations_dir, temp_migrations_dir)
|
||||||
|
|
||||||
|
# Apply existing migrations to temp database
|
||||||
|
os.environ["DATABASE_URL"] = db_url
|
||||||
|
subprocess.run(
|
||||||
|
["prisma", "migrate", "deploy", "--schema", str(schema_path)],
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate diff between current database and schema
|
||||||
|
result = subprocess.run(
|
||||||
|
[
|
||||||
|
"prisma",
|
||||||
|
"migrate",
|
||||||
|
"diff",
|
||||||
|
"--from-url",
|
||||||
|
db_url,
|
||||||
|
"--to-schema-datamodel",
|
||||||
|
str(schema_path),
|
||||||
|
"--script",
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.stdout.strip():
|
||||||
|
# Generate timestamp and create migration directory
|
||||||
|
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
migration_name = migration_name or "unnamed_migration"
|
||||||
|
migration_dir = migrations_dir / f"{timestamp}_{migration_name}"
|
||||||
|
migration_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Write the SQL to migration.sql
|
||||||
|
migration_file = migration_dir / "migration.sql"
|
||||||
|
migration_file.write_text(result.stdout)
|
||||||
|
|
||||||
|
print(f"Created migration in {migration_dir}")
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
print("No schema changes detected. Migration not needed.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Clean up: remove temporary migrations directory
|
||||||
|
if temp_migrations_dir.exists():
|
||||||
|
shutil.rmtree(temp_migrations_dir)
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"Error generating migration: {e.stderr}")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating migration: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# If running directly, can optionally pass migration name as argument
|
||||||
|
import sys
|
||||||
|
|
||||||
|
migration_name = sys.argv[1] if len(sys.argv) > 1 else None
|
||||||
|
create_migration(migration_name)
|
360
deploy/migrations/20250326162113_baseline/migration.sql
Normal file
|
@ -0,0 +1,360 @@
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_BudgetTable" (
|
||||||
|
"budget_id" TEXT NOT NULL,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"soft_budget" DOUBLE PRECISION,
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"model_max_budget" JSONB,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_BudgetTable_pkey" PRIMARY KEY ("budget_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_CredentialsTable" (
|
||||||
|
"credential_id" TEXT NOT NULL,
|
||||||
|
"credential_name" TEXT NOT NULL,
|
||||||
|
"credential_values" JSONB NOT NULL,
|
||||||
|
"credential_info" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_CredentialsTable_pkey" PRIMARY KEY ("credential_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ProxyModelTable" (
|
||||||
|
"model_id" TEXT NOT NULL,
|
||||||
|
"model_name" TEXT NOT NULL,
|
||||||
|
"litellm_params" JSONB NOT NULL,
|
||||||
|
"model_info" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ProxyModelTable_pkey" PRIMARY KEY ("model_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_OrganizationTable" (
|
||||||
|
"organization_id" TEXT NOT NULL,
|
||||||
|
"organization_alias" TEXT NOT NULL,
|
||||||
|
"budget_id" TEXT NOT NULL,
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"models" TEXT[],
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_OrganizationTable_pkey" PRIMARY KEY ("organization_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ModelTable" (
|
||||||
|
"id" SERIAL NOT NULL,
|
||||||
|
"aliases" JSONB,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ModelTable_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_TeamTable" (
|
||||||
|
"team_id" TEXT NOT NULL,
|
||||||
|
"team_alias" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"admins" TEXT[],
|
||||||
|
"members" TEXT[],
|
||||||
|
"members_with_roles" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"models" TEXT[],
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"blocked" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_id" INTEGER,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_TeamTable_pkey" PRIMARY KEY ("team_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_UserTable" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"user_alias" TEXT,
|
||||||
|
"team_id" TEXT,
|
||||||
|
"sso_user_id" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"password" TEXT,
|
||||||
|
"teams" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"user_role" TEXT,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"user_email" TEXT,
|
||||||
|
"models" TEXT[],
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_UserTable_pkey" PRIMARY KEY ("user_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_VerificationToken" (
|
||||||
|
"token" TEXT NOT NULL,
|
||||||
|
"key_name" TEXT,
|
||||||
|
"key_alias" TEXT,
|
||||||
|
"soft_budget_cooldown" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"expires" TIMESTAMP(3),
|
||||||
|
"models" TEXT[],
|
||||||
|
"aliases" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"config" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"user_id" TEXT,
|
||||||
|
"team_id" TEXT,
|
||||||
|
"permissions" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"max_parallel_requests" INTEGER,
|
||||||
|
"metadata" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"blocked" BOOLEAN,
|
||||||
|
"tpm_limit" BIGINT,
|
||||||
|
"rpm_limit" BIGINT,
|
||||||
|
"max_budget" DOUBLE PRECISION,
|
||||||
|
"budget_duration" TEXT,
|
||||||
|
"budget_reset_at" TIMESTAMP(3),
|
||||||
|
"allowed_cache_controls" TEXT[] DEFAULT ARRAY[]::TEXT[],
|
||||||
|
"model_spend" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"model_max_budget" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"organization_id" TEXT,
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"created_by" TEXT,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_by" TEXT,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_VerificationToken_pkey" PRIMARY KEY ("token")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_EndUserTable" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"alias" TEXT,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"allowed_model_region" TEXT,
|
||||||
|
"default_model" TEXT,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"blocked" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_EndUserTable_pkey" PRIMARY KEY ("user_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_Config" (
|
||||||
|
"param_name" TEXT NOT NULL,
|
||||||
|
"param_value" JSONB,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_Config_pkey" PRIMARY KEY ("param_name")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_SpendLogs" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"call_type" TEXT NOT NULL,
|
||||||
|
"api_key" TEXT NOT NULL DEFAULT '',
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"total_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"startTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"endTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"completionStartTime" TIMESTAMP(3),
|
||||||
|
"model" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_id" TEXT DEFAULT '',
|
||||||
|
"model_group" TEXT DEFAULT '',
|
||||||
|
"custom_llm_provider" TEXT DEFAULT '',
|
||||||
|
"api_base" TEXT DEFAULT '',
|
||||||
|
"user" TEXT DEFAULT '',
|
||||||
|
"metadata" JSONB DEFAULT '{}',
|
||||||
|
"cache_hit" TEXT DEFAULT '',
|
||||||
|
"cache_key" TEXT DEFAULT '',
|
||||||
|
"request_tags" JSONB DEFAULT '[]',
|
||||||
|
"team_id" TEXT,
|
||||||
|
"end_user" TEXT,
|
||||||
|
"requester_ip_address" TEXT,
|
||||||
|
"messages" JSONB DEFAULT '{}',
|
||||||
|
"response" JSONB DEFAULT '{}',
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_SpendLogs_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_ErrorLogs" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"startTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"endTime" TIMESTAMP(3) NOT NULL,
|
||||||
|
"api_base" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_group" TEXT NOT NULL DEFAULT '',
|
||||||
|
"litellm_model_name" TEXT NOT NULL DEFAULT '',
|
||||||
|
"model_id" TEXT NOT NULL DEFAULT '',
|
||||||
|
"request_kwargs" JSONB NOT NULL DEFAULT '{}',
|
||||||
|
"exception_type" TEXT NOT NULL DEFAULT '',
|
||||||
|
"exception_string" TEXT NOT NULL DEFAULT '',
|
||||||
|
"status_code" TEXT NOT NULL DEFAULT '',
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_ErrorLogs_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_UserNotifications" (
|
||||||
|
"request_id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"models" TEXT[],
|
||||||
|
"justification" TEXT NOT NULL,
|
||||||
|
"status" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_UserNotifications_pkey" PRIMARY KEY ("request_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_TeamMembership" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"team_id" TEXT NOT NULL,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_TeamMembership_pkey" PRIMARY KEY ("user_id","team_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_OrganizationMembership" (
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"organization_id" TEXT NOT NULL,
|
||||||
|
"user_role" TEXT,
|
||||||
|
"spend" DOUBLE PRECISION DEFAULT 0.0,
|
||||||
|
"budget_id" TEXT,
|
||||||
|
"created_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_OrganizationMembership_pkey" PRIMARY KEY ("user_id","organization_id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_InvitationLink" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"is_accepted" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"accepted_at" TIMESTAMP(3),
|
||||||
|
"expires_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"created_by" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
"updated_by" TEXT NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_InvitationLink_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_AuditLog" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"changed_by" TEXT NOT NULL DEFAULT '',
|
||||||
|
"changed_by_api_key" TEXT NOT NULL DEFAULT '',
|
||||||
|
"action" TEXT NOT NULL,
|
||||||
|
"table_name" TEXT NOT NULL,
|
||||||
|
"object_id" TEXT NOT NULL,
|
||||||
|
"before_value" JSONB,
|
||||||
|
"updated_values" JSONB,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_AuditLog_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_CredentialsTable_credential_name_key" ON "LiteLLM_CredentialsTable"("credential_name");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_TeamTable_model_id_key" ON "LiteLLM_TeamTable"("model_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_UserTable_sso_user_id_key" ON "LiteLLM_UserTable"("sso_user_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_SpendLogs_startTime_idx" ON "LiteLLM_SpendLogs"("startTime");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_SpendLogs_end_user_idx" ON "LiteLLM_SpendLogs"("end_user");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_OrganizationMembership_user_id_organization_id_key" ON "LiteLLM_OrganizationMembership"("user_id", "organization_id");
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationTable" ADD CONSTRAINT "LiteLLM_OrganizationTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamTable" ADD CONSTRAINT "LiteLLM_TeamTable_model_id_fkey" FOREIGN KEY ("model_id") REFERENCES "LiteLLM_ModelTable"("id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_UserTable" ADD CONSTRAINT "LiteLLM_UserTable_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_VerificationToken" ADD CONSTRAINT "LiteLLM_VerificationToken_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_EndUserTable" ADD CONSTRAINT "LiteLLM_EndUserTable_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_TeamMembership" ADD CONSTRAINT "LiteLLM_TeamMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_organization_id_fkey" FOREIGN KEY ("organization_id") REFERENCES "LiteLLM_OrganizationTable"("organization_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_OrganizationMembership" ADD CONSTRAINT "LiteLLM_OrganizationMembership_budget_id_fkey" FOREIGN KEY ("budget_id") REFERENCES "LiteLLM_BudgetTable"("budget_id") ON DELETE SET NULL ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_user_id_fkey" FOREIGN KEY ("user_id") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_created_by_fkey" FOREIGN KEY ("created_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "LiteLLM_InvitationLink" ADD CONSTRAINT "LiteLLM_InvitationLink_updated_by_fkey" FOREIGN KEY ("updated_by") REFERENCES "LiteLLM_UserTable"("user_id") ON DELETE RESTRICT ON UPDATE CASCADE;
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "LiteLLM_DailyUserSpend" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"user_id" TEXT NOT NULL,
|
||||||
|
"date" TEXT NOT NULL,
|
||||||
|
"api_key" TEXT NOT NULL,
|
||||||
|
"model" TEXT NOT NULL,
|
||||||
|
"model_group" TEXT,
|
||||||
|
"custom_llm_provider" TEXT,
|
||||||
|
"prompt_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"completion_tokens" INTEGER NOT NULL DEFAULT 0,
|
||||||
|
"spend" DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "LiteLLM_DailyUserSpend_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_date_idx" ON "LiteLLM_DailyUserSpend"("date");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_user_id_idx" ON "LiteLLM_DailyUserSpend"("user_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_api_key_idx" ON "LiteLLM_DailyUserSpend"("api_key");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "LiteLLM_DailyUserSpend_model_idx" ON "LiteLLM_DailyUserSpend"("model");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "LiteLLM_DailyUserSpend_user_id_date_api_key_model_custom_ll_key" ON "LiteLLM_DailyUserSpend"("user_id", "date", "api_key", "model", "custom_llm_provider");
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
-- AlterTable
|
||||||
|
ALTER TABLE "LiteLLM_DailyUserSpend" ADD COLUMN "api_requests" INTEGER NOT NULL DEFAULT 0;
|
||||||
|
|
1
deploy/migrations/migration_lock.toml
Normal file
|
@ -0,0 +1 @@
|
||||||
|
provider = "postgresql"
|
|
@ -66,5 +66,3 @@ volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
name: litellm_postgres_data # Named volume for Postgres data persistence
|
name: litellm_postgres_data # Named volume for Postgres data persistence
|
||||||
|
|
||||||
|
|
||||||
# ...rest of your docker-compose config if any
|
|
||||||
|
|
|
@ -59,9 +59,6 @@ COPY --from=builder /wheels/ /wheels/
|
||||||
# Install the built wheel using pip; again using a wildcard if it's the only file
|
# Install the built wheel using pip; again using a wildcard if it's the only file
|
||||||
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
|
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
|
||||||
|
|
||||||
# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0
|
|
||||||
RUN pip install redisvl==0.0.7 --no-deps
|
|
||||||
|
|
||||||
# ensure pyjwt is used, not jwt
|
# ensure pyjwt is used, not jwt
|
||||||
RUN pip uninstall jwt -y
|
RUN pip uninstall jwt -y
|
||||||
RUN pip uninstall PyJWT -y
|
RUN pip uninstall PyJWT -y
|
||||||
|
|
|
@ -14,7 +14,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||||
|
|
||||||
# Install build dependencies
|
# Install build dependencies
|
||||||
RUN apt-get clean && apt-get update && \
|
RUN apt-get clean && apt-get update && \
|
||||||
apt-get install -y gcc python3-dev && \
|
apt-get install -y gcc g++ python3-dev && \
|
||||||
rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
RUN pip install --no-cache-dir --upgrade pip && \
|
RUN pip install --no-cache-dir --upgrade pip && \
|
||||||
|
@ -56,10 +56,8 @@ COPY --from=builder /wheels/ /wheels/
|
||||||
# Install the built wheel using pip; again using a wildcard if it's the only file
|
# Install the built wheel using pip; again using a wildcard if it's the only file
|
||||||
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
|
RUN pip install *.whl /wheels/* --no-index --find-links=/wheels/ && rm -f *.whl && rm -rf /wheels
|
||||||
|
|
||||||
# install semantic-cache [Experimental]- we need this here and not in requirements.txt because redisvl pins to pydantic 1.0
|
|
||||||
# ensure pyjwt is used, not jwt
|
# ensure pyjwt is used, not jwt
|
||||||
RUN pip install redisvl==0.0.7 --no-deps --no-cache-dir && \
|
RUN pip uninstall jwt -y && \
|
||||||
pip uninstall jwt -y && \
|
|
||||||
pip uninstall PyJWT -y && \
|
pip uninstall PyJWT -y && \
|
||||||
pip install PyJWT==2.9.0 --no-cache-dir
|
pip install PyJWT==2.9.0 --no-cache-dir
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ Install redis
|
||||||
pip install redis
|
pip install redis
|
||||||
```
|
```
|
||||||
|
|
||||||
For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/
|
For the hosted version you can setup your own Redis DB here: https://redis.io/try-free/
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -91,12 +91,12 @@ response2 = completion(
|
||||||
|
|
||||||
<TabItem value="redis-sem" label="redis-semantic cache">
|
<TabItem value="redis-sem" label="redis-semantic cache">
|
||||||
|
|
||||||
Install redis
|
Install redisvl client
|
||||||
```shell
|
```shell
|
||||||
pip install redisvl==0.0.7
|
pip install redisvl==0.4.1
|
||||||
```
|
```
|
||||||
|
|
||||||
For the hosted version you can setup your own Redis DB here: https://app.redislabs.com/
|
For the hosted version you can setup your own Redis DB here: https://redis.io/try-free/
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import litellm
|
import litellm
|
||||||
|
@ -114,6 +114,7 @@ litellm.cache = Cache(
|
||||||
port=os.environ["REDIS_PORT"],
|
port=os.environ["REDIS_PORT"],
|
||||||
password=os.environ["REDIS_PASSWORD"],
|
password=os.environ["REDIS_PASSWORD"],
|
||||||
similarity_threshold=0.8, # similarity threshold for cache hits, 0 == no similarity, 1 = exact matches, 0.5 == 50% similarity
|
similarity_threshold=0.8, # similarity threshold for cache hits, 0 == no similarity, 1 = exact matches, 0.5 == 50% similarity
|
||||||
|
ttl=120,
|
||||||
redis_semantic_cache_embedding_model="text-embedding-ada-002", # this model is passed to litellm.embedding(), any litellm.embedding() model is supported here
|
redis_semantic_cache_embedding_model="text-embedding-ada-002", # this model is passed to litellm.embedding(), any litellm.embedding() model is supported here
|
||||||
)
|
)
|
||||||
response1 = completion(
|
response1 = completion(
|
||||||
|
@ -471,11 +472,13 @@ def __init__(
|
||||||
password: Optional[str] = None,
|
password: Optional[str] = None,
|
||||||
namespace: Optional[str] = None,
|
namespace: Optional[str] = None,
|
||||||
default_in_redis_ttl: Optional[float] = None,
|
default_in_redis_ttl: Optional[float] = None,
|
||||||
similarity_threshold: Optional[float] = None,
|
|
||||||
redis_semantic_cache_use_async=False,
|
|
||||||
redis_semantic_cache_embedding_model="text-embedding-ada-002",
|
|
||||||
redis_flush_size=None,
|
redis_flush_size=None,
|
||||||
|
|
||||||
|
# redis semantic cache params
|
||||||
|
similarity_threshold: Optional[float] = None,
|
||||||
|
redis_semantic_cache_embedding_model: str = "text-embedding-ada-002",
|
||||||
|
redis_semantic_cache_index_name: Optional[str] = None,
|
||||||
|
|
||||||
# s3 Bucket, boto3 configuration
|
# s3 Bucket, boto3 configuration
|
||||||
s3_bucket_name: Optional[str] = None,
|
s3_bucket_name: Optional[str] = None,
|
||||||
s3_region_name: Optional[str] = None,
|
s3_region_name: Optional[str] = None,
|
||||||
|
|
|
@ -272,14 +272,7 @@ async with stdio_client(server_params) as (read, write):
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Upcoming Features
|
## Advanced Usage
|
||||||
|
|
||||||
:::info
|
|
||||||
|
|
||||||
**This feature is not live as yet** this is a beta interface. Expect this to be live on litellm `v1.63.15` and above.
|
|
||||||
|
|
||||||
:::
|
|
||||||
|
|
||||||
|
|
||||||
### Expose MCP tools on LiteLLM Proxy Server
|
### Expose MCP tools on LiteLLM Proxy Server
|
||||||
|
|
||||||
|
|
|
@ -1776,6 +1776,7 @@ response = completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
1. Setup config.yaml
|
1. Setup config.yaml
|
||||||
|
@ -1820,11 +1821,13 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
```
|
```
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
### SSO Login (AWS Profile)
|
### SSO Login (AWS Profile)
|
||||||
- Set `AWS_PROFILE` environment variable
|
- Set `AWS_PROFILE` environment variable
|
||||||
- Make bedrock completion call
|
- Make bedrock completion call
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import os
|
import os
|
||||||
from litellm import completion
|
from litellm import completion
|
||||||
|
@ -1917,12 +1920,46 @@ model_list:
|
||||||
|
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
Text to Image :
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
-d '{
|
||||||
|
"model": "amazon.nova-canvas-v1:0",
|
||||||
|
"prompt": "A cute baby sea otter"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Color Guided Generation:
|
||||||
|
```bash
|
||||||
|
curl -L -X POST 'http://0.0.0.0:4000/v1/images/generations' \
|
||||||
|
-H 'Content-Type: application/json' \
|
||||||
|
-H 'Authorization: Bearer $LITELLM_VIRTUAL_KEY' \
|
||||||
|
-d '{
|
||||||
|
"model": "amazon.nova-canvas-v1:0",
|
||||||
|
"prompt": "A cute baby sea otter",
|
||||||
|
"taskType": "COLOR_GUIDED_GENERATION",
|
||||||
|
"colorGuidedGenerationParams":{"colors":["#FFFFFF"]}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|-------------------------|---------------------------------------------|
|
||||||
|
| Stable Diffusion 3 - v0 | `image_generation(model="bedrock/stability.stability.sd3-large-v1:0", prompt=prompt)` |
|
||||||
|
| Stable Diffusion - v0 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v0", prompt=prompt)` |
|
||||||
|
| Stable Diffusion - v1 | `image_generation(model="bedrock/stability.stable-diffusion-xl-v1", prompt=prompt)` |
|
||||||
|
| Amazon Nova Canvas - v0 | `image_generation(model="bedrock/amazon.nova-canvas-v1:0", prompt=prompt)` |
|
||||||
|
|
||||||
|
|
||||||
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
### Passing an external BedrockRuntime.Client as a parameter - Completion()
|
||||||
|
|
||||||
|
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
||||||
|
|
||||||
:::warning
|
:::warning
|
||||||
|
|
||||||
This is a deprecated flow. Boto3 is not async. And boto3.client does not let us make the http call through httpx. Pass in your aws params through the method above 👆. [See Auth Code](https://github.com/BerriAI/litellm/blob/55a20c7cce99a93d36a82bf3ae90ba3baf9a7f89/litellm/llms/bedrock_httpx.py#L284) [Add new auth flow](https://github.com/BerriAI/litellm/issues)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Experimental - 2024-Jun-23:
|
Experimental - 2024-Jun-23:
|
||||||
|
|
|
@ -325,6 +325,74 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
|
||||||
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
|
| fine tuned `gpt-3.5-turbo-0613` | `response = completion(model="ft:gpt-3.5-turbo-0613", messages=messages)` |
|
||||||
|
|
||||||
|
|
||||||
|
## OpenAI Audio Transcription
|
||||||
|
|
||||||
|
LiteLLM supports OpenAI Audio Transcription endpoint.
|
||||||
|
|
||||||
|
Supported models:
|
||||||
|
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|---------------------------|-----------------------------------------------------------------|
|
||||||
|
| `whisper-1` | `response = completion(model="whisper-1", file=audio_file)` |
|
||||||
|
| `gpt-4o-transcribe` | `response = completion(model="gpt-4o-transcribe", file=audio_file)` |
|
||||||
|
| `gpt-4o-mini-transcribe` | `response = completion(model="gpt-4o-mini-transcribe", file=audio_file)` |
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="SDK">
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import transcription
|
||||||
|
import os
|
||||||
|
|
||||||
|
# set api keys
|
||||||
|
os.environ["OPENAI_API_KEY"] = ""
|
||||||
|
audio_file = open("/path/to/audio.mp3", "rb")
|
||||||
|
|
||||||
|
response = transcription(model="gpt-4o-transcribe", file=audio_file)
|
||||||
|
|
||||||
|
print(f"response: {response}")
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="PROXY">
|
||||||
|
|
||||||
|
1. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-4o-transcribe
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-4o-transcribe
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
|
mode: audio_transcription
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: sk-1234
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
litellm --config config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:8000/v1/audio/transcriptions' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--form 'file=@"/Users/krrishdholakia/Downloads/gettysburg.wav"' \
|
||||||
|
--form 'model="gpt-4o-transcribe"'
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Advanced
|
## Advanced
|
||||||
|
|
||||||
### Getting OpenAI API Response Headers
|
### Getting OpenAI API Response Headers
|
||||||
|
|
|
@ -1369,6 +1369,103 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## Gemini Pro
|
||||||
|
| Model Name | Function Call |
|
||||||
|
|------------------|--------------------------------------|
|
||||||
|
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
|
||||||
|
|
||||||
|
## Fine-tuned Models
|
||||||
|
|
||||||
|
You can call fine-tuned Vertex AI Gemini models through LiteLLM
|
||||||
|
|
||||||
|
| Property | Details |
|
||||||
|
|----------|---------|
|
||||||
|
| Provider Route | `vertex_ai/gemini/{MODEL_ID}` |
|
||||||
|
| Vertex Documentation | [Vertex AI - Fine-tuned Gemini Models](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-supervised-tuning#test_the_tuned_model_with_a_prompt)|
|
||||||
|
| Supported Operations | `/chat/completions`, `/completions`, `/embeddings`, `/images` |
|
||||||
|
|
||||||
|
To use a model that follows the `/gemini` request/response format, simply set the model parameter as
|
||||||
|
|
||||||
|
```python title="Model parameter for calling fine-tuned gemini models"
|
||||||
|
model="vertex_ai/gemini/<your-finetuned-model>"
|
||||||
|
```
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="sdk" label="LiteLLM Python SDK">
|
||||||
|
|
||||||
|
```python showLineNumbers title="Example"
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
## set ENV variables
|
||||||
|
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
|
||||||
|
os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
||||||
|
|
||||||
|
response = litellm.completion(
|
||||||
|
model="vertex_ai/gemini/<your-finetuned-model>", # e.g. vertex_ai/gemini/4965075652664360960
|
||||||
|
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="proxy" label="LiteLLM Proxy">
|
||||||
|
|
||||||
|
1. Add Vertex Credentials to your env
|
||||||
|
|
||||||
|
```bash title="Authenticate to Vertex AI"
|
||||||
|
!gcloud auth application-default login
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Setup config.yaml
|
||||||
|
|
||||||
|
```yaml showLineNumbers title="Add to litellm config"
|
||||||
|
- model_name: finetuned-gemini
|
||||||
|
litellm_params:
|
||||||
|
model: vertex_ai/gemini/<ENDPOINT_ID>
|
||||||
|
vertex_project: <PROJECT_ID>
|
||||||
|
vertex_location: <LOCATION>
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Test it!
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="openai" label="OpenAI Python SDK">
|
||||||
|
|
||||||
|
```python showLineNumbers title="Example request"
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="your-litellm-key",
|
||||||
|
base_url="http://0.0.0.0:4000"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="finetuned-gemini",
|
||||||
|
messages=[
|
||||||
|
{"role": "user", "content": "hi"}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="curl" label="curl">
|
||||||
|
|
||||||
|
```bash showLineNumbers title="Example request"
|
||||||
|
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--header 'Authorization: <LITELLM_KEY>' \
|
||||||
|
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Model Garden
|
## Model Garden
|
||||||
|
|
||||||
:::tip
|
:::tip
|
||||||
|
@ -1479,67 +1576,6 @@ response = completion(
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
## Gemini Pro
|
|
||||||
| Model Name | Function Call |
|
|
||||||
|------------------|--------------------------------------|
|
|
||||||
| gemini-pro | `completion('gemini-pro', messages)`, `completion('vertex_ai/gemini-pro', messages)` |
|
|
||||||
|
|
||||||
## Fine-tuned Models
|
|
||||||
|
|
||||||
Fine tuned models on vertex have a numerical model/endpoint id.
|
|
||||||
|
|
||||||
<Tabs>
|
|
||||||
<TabItem value="sdk" label="SDK">
|
|
||||||
|
|
||||||
```python
|
|
||||||
from litellm import completion
|
|
||||||
import os
|
|
||||||
|
|
||||||
## set ENV variables
|
|
||||||
os.environ["VERTEXAI_PROJECT"] = "hardy-device-38811"
|
|
||||||
os.environ["VERTEXAI_LOCATION"] = "us-central1"
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="vertex_ai/<your-finetuned-model>", # e.g. vertex_ai/4965075652664360960
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
base_model="vertex_ai/gemini-1.5-pro" # the base model - used for routing
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
<TabItem value="proxy" label="PROXY">
|
|
||||||
|
|
||||||
1. Add Vertex Credentials to your env
|
|
||||||
|
|
||||||
```bash
|
|
||||||
!gcloud auth application-default login
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Setup config.yaml
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
- model_name: finetuned-gemini
|
|
||||||
litellm_params:
|
|
||||||
model: vertex_ai/<ENDPOINT_ID>
|
|
||||||
vertex_project: <PROJECT_ID>
|
|
||||||
vertex_location: <LOCATION>
|
|
||||||
model_info:
|
|
||||||
base_model: vertex_ai/gemini-1.5-pro # IMPORTANT
|
|
||||||
```
|
|
||||||
|
|
||||||
3. Test it!
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl --location 'https://0.0.0.0:4000/v1/chat/completions' \
|
|
||||||
--header 'Content-Type: application/json' \
|
|
||||||
--header 'Authorization: <LITELLM_KEY>' \
|
|
||||||
--data '{"model": "finetuned-gemini" ,"messages":[{"role": "user", "content":[{"type": "text", "text": "hi"}]}]}'
|
|
||||||
```
|
|
||||||
|
|
||||||
</TabItem>
|
|
||||||
</Tabs>
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Gemini Pro Vision
|
## Gemini Pro Vision
|
||||||
| Model Name | Function Call |
|
| Model Name | Function Call |
|
||||||
|
|
|
@ -147,6 +147,11 @@ Some SSO providers require a specific redirect url for login and logout. You can
|
||||||
- Login: `<your-proxy-base-url>/sso/key/generate`
|
- Login: `<your-proxy-base-url>/sso/key/generate`
|
||||||
- Logout: `<your-proxy-base-url>`
|
- Logout: `<your-proxy-base-url>`
|
||||||
|
|
||||||
|
Here's the env var to set the logout url on the proxy
|
||||||
|
```bash
|
||||||
|
PROXY_LOGOUT_URL="https://www.google.com"
|
||||||
|
```
|
||||||
|
|
||||||
#### Step 3. Set `PROXY_BASE_URL` in your .env
|
#### Step 3. Set `PROXY_BASE_URL` in your .env
|
||||||
|
|
||||||
Set this in your .env (so the proxy can set the correct redirect url)
|
Set this in your .env (so the proxy can set the correct redirect url)
|
||||||
|
|
|
@ -160,7 +160,7 @@ general_settings:
|
||||||
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
|
| database_url | string | The URL for the database connection [Set up Virtual Keys](virtual_keys) |
|
||||||
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
|
| database_connection_pool_limit | integer | The limit for database connection pool [Setting DB Connection Pool limit](#configure-db-pool-limits--connection-timeouts) |
|
||||||
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
|
| database_connection_timeout | integer | The timeout for database connections in seconds [Setting DB Connection Pool limit, timeout](#configure-db-pool-limits--connection-timeouts) |
|
||||||
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key |
|
| allow_requests_on_db_unavailable | boolean | If true, allows requests to succeed even if DB is unreachable. **Only use this if running LiteLLM in your VPC** This will allow requests to work even when LiteLLM cannot connect to the DB to verify a Virtual Key [Doc on graceful db unavailability](prod#5-if-running-litellm-on-vpc-gracefully-handle-db-unavailability) |
|
||||||
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
|
| custom_auth | string | Write your own custom authentication logic [Doc Custom Auth](virtual_keys#custom-auth) |
|
||||||
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
|
| max_parallel_requests | integer | The max parallel requests allowed per deployment |
|
||||||
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
|
| global_max_parallel_requests | integer | The max parallel requests allowed on the proxy overall |
|
||||||
|
@ -479,7 +479,7 @@ router_settings:
|
||||||
| PROXY_ADMIN_ID | Admin identifier for proxy server
|
| PROXY_ADMIN_ID | Admin identifier for proxy server
|
||||||
| PROXY_BASE_URL | Base URL for proxy service
|
| PROXY_BASE_URL | Base URL for proxy service
|
||||||
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
|
| PROXY_LOGOUT_URL | URL for logging out of the proxy service
|
||||||
| PROXY_MASTER_KEY | Master key for proxy authentication
|
| LITELLM_MASTER_KEY | Master key for proxy authentication
|
||||||
| QDRANT_API_BASE | Base URL for Qdrant API
|
| QDRANT_API_BASE | Base URL for Qdrant API
|
||||||
| QDRANT_API_KEY | API key for Qdrant service
|
| QDRANT_API_KEY | API key for Qdrant service
|
||||||
| QDRANT_URL | Connection URL for Qdrant database
|
| QDRANT_URL | Connection URL for Qdrant database
|
||||||
|
|
|
@ -94,15 +94,31 @@ This disables the load_dotenv() functionality, which will automatically load you
|
||||||
|
|
||||||
## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
|
## 5. If running LiteLLM on VPC, gracefully handle DB unavailability
|
||||||
|
|
||||||
This will allow LiteLLM to continue to process requests even if the DB is unavailable. This is better handling for DB unavailability.
|
When running LiteLLM on a VPC (and inaccessible from the public internet), you can enable graceful degradation so that request processing continues even if the database is temporarily unavailable.
|
||||||
|
|
||||||
|
|
||||||
**WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
|
**WARNING: Only do this if you're running LiteLLM on VPC, that cannot be accessed from the public internet.**
|
||||||
|
|
||||||
```yaml
|
#### Configuration
|
||||||
|
|
||||||
|
```yaml showLineNumbers title="litellm config.yaml"
|
||||||
general_settings:
|
general_settings:
|
||||||
allow_requests_on_db_unavailable: True
|
allow_requests_on_db_unavailable: True
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Expected Behavior
|
||||||
|
|
||||||
|
When `allow_requests_on_db_unavailable` is set to `true`, LiteLLM will handle errors as follows:
|
||||||
|
|
||||||
|
| Type of Error | Expected Behavior | Details |
|
||||||
|
|---------------|-------------------|----------------|
|
||||||
|
| Prisma Errors | ✅ Request will be allowed | Covers issues like DB connection resets or rejections from the DB via Prisma, the ORM used by LiteLLM. |
|
||||||
|
| Httpx Errors | ✅ Request will be allowed | Occurs when the database is unreachable, allowing the request to proceed despite the DB outage. |
|
||||||
|
| Pod Startup Behavior | ✅ Pods start regardless | LiteLLM Pods will start even if the database is down or unreachable, ensuring higher uptime guarantees for deployments. |
|
||||||
|
| Health/Readiness Check | ✅ Always returns 200 OK | The /health/readiness endpoint returns a 200 OK status to ensure that pods remain operational even when the database is unavailable.
|
||||||
|
| LiteLLM Budget Errors or Model Errors | ❌ Request will be blocked | Triggered when the DB is reachable but the authentication token is invalid, lacks access, or exceeds budget limits. |
|
||||||
|
|
||||||
|
|
||||||
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
|
## 6. Disable spend_logs & error_logs if not using the LiteLLM UI
|
||||||
|
|
||||||
By default, LiteLLM writes several types of logs to the database:
|
By default, LiteLLM writes several types of logs to the database:
|
||||||
|
@ -183,93 +199,3 @@ You should only see the following level of details in logs on the proxy server
|
||||||
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
|
# INFO: 192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
|
# INFO: 192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
### Machine Specifications to Deploy LiteLLM
|
|
||||||
|
|
||||||
| Service | Spec | CPUs | Memory | Architecture | Version|
|
|
||||||
| --- | --- | --- | --- | --- | --- |
|
|
||||||
| Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
|
|
||||||
| Redis Cache | - | - | - | - | 7.0+ Redis Engine|
|
|
||||||
|
|
||||||
|
|
||||||
### Reference Kubernetes Deployment YAML
|
|
||||||
|
|
||||||
Reference Kubernetes `deployment.yaml` that was load tested by us
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: litellm-deployment
|
|
||||||
spec:
|
|
||||||
replicas: 3
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: litellm
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: litellm
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: litellm-container
|
|
||||||
image: ghcr.io/berriai/litellm:main-latest
|
|
||||||
imagePullPolicy: Always
|
|
||||||
env:
|
|
||||||
- name: AZURE_API_KEY
|
|
||||||
value: "d6******"
|
|
||||||
- name: AZURE_API_BASE
|
|
||||||
value: "https://ope******"
|
|
||||||
- name: LITELLM_MASTER_KEY
|
|
||||||
value: "sk-1234"
|
|
||||||
- name: DATABASE_URL
|
|
||||||
value: "po**********"
|
|
||||||
args:
|
|
||||||
- "--config"
|
|
||||||
- "/app/proxy_config.yaml" # Update the path to mount the config file
|
|
||||||
volumeMounts: # Define volume mount for proxy_config.yaml
|
|
||||||
- name: config-volume
|
|
||||||
mountPath: /app
|
|
||||||
readOnly: true
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health/liveliness
|
|
||||||
port: 4000
|
|
||||||
initialDelaySeconds: 120
|
|
||||||
periodSeconds: 15
|
|
||||||
successThreshold: 1
|
|
||||||
failureThreshold: 3
|
|
||||||
timeoutSeconds: 10
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health/readiness
|
|
||||||
port: 4000
|
|
||||||
initialDelaySeconds: 120
|
|
||||||
periodSeconds: 15
|
|
||||||
successThreshold: 1
|
|
||||||
failureThreshold: 3
|
|
||||||
timeoutSeconds: 10
|
|
||||||
volumes: # Define volume to mount proxy_config.yaml
|
|
||||||
- name: config-volume
|
|
||||||
configMap:
|
|
||||||
name: litellm-config
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
Reference Kubernetes `service.yaml` that was load tested by us
|
|
||||||
```yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: litellm-service
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: litellm
|
|
||||||
ports:
|
|
||||||
- protocol: TCP
|
|
||||||
port: 4000
|
|
||||||
targetPort: 4000
|
|
||||||
type: LoadBalancer
|
|
||||||
```
|
|
||||||
|
|
|
@ -188,7 +188,13 @@ Currently implemented for:
|
||||||
- OpenAI (if OPENAI_API_KEY is set)
|
- OpenAI (if OPENAI_API_KEY is set)
|
||||||
- Fireworks AI (if FIREWORKS_AI_API_KEY is set)
|
- Fireworks AI (if FIREWORKS_AI_API_KEY is set)
|
||||||
- LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
|
- LiteLLM Proxy (if LITELLM_PROXY_API_KEY is set)
|
||||||
|
- Gemini (if GEMINI_API_KEY is set)
|
||||||
|
- XAI (if XAI_API_KEY is set)
|
||||||
|
- Anthropic (if ANTHROPIC_API_KEY is set)
|
||||||
|
|
||||||
|
You can also specify a custom provider to check:
|
||||||
|
|
||||||
|
**All providers**:
|
||||||
```python
|
```python
|
||||||
from litellm import get_valid_models
|
from litellm import get_valid_models
|
||||||
|
|
||||||
|
@ -196,6 +202,14 @@ valid_models = get_valid_models(check_provider_endpoint=True)
|
||||||
print(valid_models)
|
print(valid_models)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
**Specific provider**:
|
||||||
|
```python
|
||||||
|
from litellm import get_valid_models
|
||||||
|
|
||||||
|
valid_models = get_valid_models(check_provider_endpoint=True, custom_llm_provider="openai")
|
||||||
|
print(valid_models)
|
||||||
|
```
|
||||||
|
|
||||||
### `validate_environment(model: str)`
|
### `validate_environment(model: str)`
|
||||||
|
|
||||||
This helper tells you if you have all the required environment variables for a model, and if not - what's missing.
|
This helper tells you if you have all the required environment variables for a model, and if not - what's missing.
|
||||||
|
|
BIN
docs/my-website/img/release_notes/team_model_add.png
Normal file
After Width: | Height: | Size: 70 KiB |
|
@ -24,6 +24,7 @@ This release brings:
|
||||||
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
|
- LLM Translation Improvements (MCP Support and Bedrock Application Profiles)
|
||||||
- Perf improvements for Usage-based Routing
|
- Perf improvements for Usage-based Routing
|
||||||
- Streaming guardrail support via websockets
|
- Streaming guardrail support via websockets
|
||||||
|
- Azure OpenAI client perf fix (from previous release)
|
||||||
|
|
||||||
## Docker Run LiteLLM Proxy
|
## Docker Run LiteLLM Proxy
|
||||||
|
|
||||||
|
@ -31,7 +32,7 @@ This release brings:
|
||||||
docker run
|
docker run
|
||||||
-e STORE_MODEL_IN_DB=True
|
-e STORE_MODEL_IN_DB=True
|
||||||
-p 4000:4000
|
-p 4000:4000
|
||||||
ghcr.io/berriai/litellm:main-v1.63.14-stable
|
ghcr.io/berriai/litellm:main-v1.63.14-stable.patch1
|
||||||
```
|
```
|
||||||
|
|
||||||
## Demo Instance
|
## Demo Instance
|
||||||
|
|
34
docs/my-website/release_notes/v1.65.0/index.md
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
---
|
||||||
|
title: v1.65.0 - Team Model Add - update
|
||||||
|
slug: v1.65.0
|
||||||
|
date: 2025-03-28T10:00:00
|
||||||
|
authors:
|
||||||
|
- name: Krrish Dholakia
|
||||||
|
title: CEO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/krish-d/
|
||||||
|
image_url: https://media.licdn.com/dms/image/v2/D4D03AQGrlsJ3aqpHmQ/profile-displayphoto-shrink_400_400/B4DZSAzgP7HYAg-/0/1737327772964?e=1743638400&v=beta&t=39KOXMUFedvukiWWVPHf3qI45fuQD7lNglICwN31DrI
|
||||||
|
- name: Ishaan Jaffer
|
||||||
|
title: CTO, LiteLLM
|
||||||
|
url: https://www.linkedin.com/in/reffajnaahsi/
|
||||||
|
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
|
||||||
|
tags: [management endpoints, team models, ui]
|
||||||
|
hide_table_of_contents: false
|
||||||
|
---
|
||||||
|
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
v1.65.0 updates the `/model/new` endpoint to prevent non-team admins from creating team models.
|
||||||
|
|
||||||
|
This means that only proxy admins or team admins can create team models.
|
||||||
|
|
||||||
|
## Additional Changes
|
||||||
|
|
||||||
|
- Allows team admins to call `/model/update` to update team models.
|
||||||
|
- Allows team admins to call `/model/delete` to delete team models.
|
||||||
|
- Introduces new `user_models_only` param to `/v2/model/info` - only return models added by this user.
|
||||||
|
|
||||||
|
|
||||||
|
These changes enable team admins to add and manage models for their team on the LiteLLM UI + API.
|
||||||
|
|
||||||
|
|
||||||
|
<Image img={require('../../img/release_notes/team_model_add.png')} />
|
|
@ -304,6 +304,7 @@ const sidebars = {
|
||||||
"image_variations",
|
"image_variations",
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"mcp",
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "/audio",
|
label: "/audio",
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
|
warnings.filterwarnings("ignore", message=".*conflict with protected namespace.*")
|
||||||
### INIT VARIABLES ##########
|
### INIT VARIABLES ###########
|
||||||
import threading
|
import threading
|
||||||
import os
|
import os
|
||||||
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
from typing import Callable, List, Optional, Dict, Union, Any, Literal, get_args
|
||||||
|
@ -122,6 +122,9 @@ langsmith_batch_size: Optional[int] = None
|
||||||
prometheus_initialize_budget_metrics: Optional[bool] = False
|
prometheus_initialize_budget_metrics: Optional[bool] = False
|
||||||
argilla_batch_size: Optional[int] = None
|
argilla_batch_size: Optional[int] = None
|
||||||
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
|
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload
|
||||||
|
gcs_pub_sub_use_v1: Optional[bool] = (
|
||||||
|
False # if you want to use v1 gcs pubsub logged payload
|
||||||
|
)
|
||||||
argilla_transformation_object: Optional[Dict[str, Any]] = None
|
argilla_transformation_object: Optional[Dict[str, Any]] = None
|
||||||
_async_input_callback: List[Union[str, Callable, CustomLogger]] = (
|
_async_input_callback: List[Union[str, Callable, CustomLogger]] = (
|
||||||
[]
|
[]
|
||||||
|
@ -810,6 +813,7 @@ from .llms.oobabooga.chat.transformation import OobaboogaConfig
|
||||||
from .llms.maritalk import MaritalkConfig
|
from .llms.maritalk import MaritalkConfig
|
||||||
from .llms.openrouter.chat.transformation import OpenrouterConfig
|
from .llms.openrouter.chat.transformation import OpenrouterConfig
|
||||||
from .llms.anthropic.chat.transformation import AnthropicConfig
|
from .llms.anthropic.chat.transformation import AnthropicConfig
|
||||||
|
from .llms.anthropic.common_utils import AnthropicModelInfo
|
||||||
from .llms.groq.stt.transformation import GroqSTTConfig
|
from .llms.groq.stt.transformation import GroqSTTConfig
|
||||||
from .llms.anthropic.completion.transformation import AnthropicTextConfig
|
from .llms.anthropic.completion.transformation import AnthropicTextConfig
|
||||||
from .llms.triton.completion.transformation import TritonConfig
|
from .llms.triton.completion.transformation import TritonConfig
|
||||||
|
@ -845,6 +849,7 @@ from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import (
|
||||||
VertexGeminiConfig,
|
VertexGeminiConfig,
|
||||||
VertexGeminiConfig as VertexAIConfig,
|
VertexGeminiConfig as VertexAIConfig,
|
||||||
)
|
)
|
||||||
|
from .llms.gemini.common_utils import GeminiModelInfo
|
||||||
from .llms.gemini.chat.transformation import (
|
from .llms.gemini.chat.transformation import (
|
||||||
GoogleAIStudioGeminiConfig,
|
GoogleAIStudioGeminiConfig,
|
||||||
GoogleAIStudioGeminiConfig as GeminiConfig, # aliased to maintain backwards compatibility
|
GoogleAIStudioGeminiConfig as GeminiConfig, # aliased to maintain backwards compatibility
|
||||||
|
@ -947,6 +952,12 @@ openaiOSeriesConfig = OpenAIOSeriesConfig()
|
||||||
from .llms.openai.chat.gpt_transformation import (
|
from .llms.openai.chat.gpt_transformation import (
|
||||||
OpenAIGPTConfig,
|
OpenAIGPTConfig,
|
||||||
)
|
)
|
||||||
|
from .llms.openai.transcriptions.whisper_transformation import (
|
||||||
|
OpenAIWhisperAudioTranscriptionConfig,
|
||||||
|
)
|
||||||
|
from .llms.openai.transcriptions.gpt_transformation import (
|
||||||
|
OpenAIGPTAudioTranscriptionConfig,
|
||||||
|
)
|
||||||
|
|
||||||
openAIGPTConfig = OpenAIGPTConfig()
|
openAIGPTConfig = OpenAIGPTConfig()
|
||||||
from .llms.openai.chat.gpt_audio_transformation import (
|
from .llms.openai.chat.gpt_audio_transformation import (
|
||||||
|
@ -975,6 +986,7 @@ from .llms.fireworks_ai.embed.fireworks_ai_transformation import (
|
||||||
from .llms.friendliai.chat.transformation import FriendliaiChatConfig
|
from .llms.friendliai.chat.transformation import FriendliaiChatConfig
|
||||||
from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
|
from .llms.jina_ai.embedding.transformation import JinaAIEmbeddingConfig
|
||||||
from .llms.xai.chat.transformation import XAIChatConfig
|
from .llms.xai.chat.transformation import XAIChatConfig
|
||||||
|
from .llms.xai.common_utils import XAIModelInfo
|
||||||
from .llms.volcengine import VolcEngineConfig
|
from .llms.volcengine import VolcEngineConfig
|
||||||
from .llms.codestral.completion.transformation import CodestralTextCompletionConfig
|
from .llms.codestral.completion.transformation import CodestralTextCompletionConfig
|
||||||
from .llms.azure.azure import (
|
from .llms.azure.azure import (
|
||||||
|
|
|
@ -88,16 +88,16 @@ class Cache:
|
||||||
s3_aws_session_token: Optional[str] = None,
|
s3_aws_session_token: Optional[str] = None,
|
||||||
s3_config: Optional[Any] = None,
|
s3_config: Optional[Any] = None,
|
||||||
s3_path: Optional[str] = None,
|
s3_path: Optional[str] = None,
|
||||||
redis_semantic_cache_use_async=False,
|
redis_semantic_cache_embedding_model: str = "text-embedding-ada-002",
|
||||||
redis_semantic_cache_embedding_model="text-embedding-ada-002",
|
redis_semantic_cache_index_name: Optional[str] = None,
|
||||||
redis_flush_size: Optional[int] = None,
|
redis_flush_size: Optional[int] = None,
|
||||||
redis_startup_nodes: Optional[List] = None,
|
redis_startup_nodes: Optional[List] = None,
|
||||||
disk_cache_dir=None,
|
disk_cache_dir: Optional[str] = None,
|
||||||
qdrant_api_base: Optional[str] = None,
|
qdrant_api_base: Optional[str] = None,
|
||||||
qdrant_api_key: Optional[str] = None,
|
qdrant_api_key: Optional[str] = None,
|
||||||
qdrant_collection_name: Optional[str] = None,
|
qdrant_collection_name: Optional[str] = None,
|
||||||
qdrant_quantization_config: Optional[str] = None,
|
qdrant_quantization_config: Optional[str] = None,
|
||||||
qdrant_semantic_cache_embedding_model="text-embedding-ada-002",
|
qdrant_semantic_cache_embedding_model: str = "text-embedding-ada-002",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -170,8 +170,8 @@ class Cache:
|
||||||
port=port,
|
port=port,
|
||||||
password=password,
|
password=password,
|
||||||
similarity_threshold=similarity_threshold,
|
similarity_threshold=similarity_threshold,
|
||||||
use_async=redis_semantic_cache_use_async,
|
|
||||||
embedding_model=redis_semantic_cache_embedding_model,
|
embedding_model=redis_semantic_cache_embedding_model,
|
||||||
|
index_name=redis_semantic_cache_index_name,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
elif type == LiteLLMCacheType.QDRANT_SEMANTIC:
|
elif type == LiteLLMCacheType.QDRANT_SEMANTIC:
|
||||||
|
|
|
@ -1,271 +1,284 @@
|
||||||
"""
|
"""
|
||||||
Redis Semantic Cache implementation
|
Redis Semantic Cache implementation for LiteLLM
|
||||||
|
|
||||||
Has 4 methods:
|
The RedisSemanticCache provides semantic caching functionality using Redis as a backend.
|
||||||
- set_cache
|
This cache stores responses based on the semantic similarity of prompts rather than
|
||||||
- get_cache
|
exact matching, allowing for more flexible caching of LLM responses.
|
||||||
- async_set_cache
|
|
||||||
- async_get_cache
|
This implementation uses RedisVL's SemanticCache to find semantically similar prompts
|
||||||
|
and their cached responses.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import ast
|
import ast
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
from typing import Any
|
import os
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm._logging import print_verbose
|
from litellm._logging import print_verbose
|
||||||
|
from litellm.litellm_core_utils.prompt_templates.common_utils import get_str_from_messages
|
||||||
from .base_cache import BaseCache
|
from .base_cache import BaseCache
|
||||||
|
|
||||||
|
|
||||||
class RedisSemanticCache(BaseCache):
|
class RedisSemanticCache(BaseCache):
|
||||||
|
"""
|
||||||
|
Redis-backed semantic cache for LLM responses.
|
||||||
|
|
||||||
|
This cache uses vector similarity to find semantically similar prompts that have been
|
||||||
|
previously sent to the LLM, allowing for cache hits even when prompts are not identical
|
||||||
|
but carry similar meaning.
|
||||||
|
"""
|
||||||
|
|
||||||
|
DEFAULT_REDIS_INDEX_NAME: str = "litellm_semantic_cache_index"
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
host=None,
|
host: Optional[str] = None,
|
||||||
port=None,
|
port: Optional[str] = None,
|
||||||
password=None,
|
password: Optional[str] = None,
|
||||||
redis_url=None,
|
redis_url: Optional[str] = None,
|
||||||
similarity_threshold=None,
|
similarity_threshold: Optional[float] = None,
|
||||||
use_async=False,
|
embedding_model: str = "text-embedding-ada-002",
|
||||||
embedding_model="text-embedding-ada-002",
|
index_name: Optional[str] = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
from redisvl.index import SearchIndex
|
|
||||||
|
|
||||||
print_verbose(
|
|
||||||
"redis semantic-cache initializing INDEX - litellm_semantic_cache_index"
|
|
||||||
)
|
|
||||||
if similarity_threshold is None:
|
|
||||||
raise Exception("similarity_threshold must be provided, passed None")
|
|
||||||
self.similarity_threshold = similarity_threshold
|
|
||||||
self.embedding_model = embedding_model
|
|
||||||
schema = {
|
|
||||||
"index": {
|
|
||||||
"name": "litellm_semantic_cache_index",
|
|
||||||
"prefix": "litellm",
|
|
||||||
"storage_type": "hash",
|
|
||||||
},
|
|
||||||
"fields": {
|
|
||||||
"text": [{"name": "response"}],
|
|
||||||
"vector": [
|
|
||||||
{
|
|
||||||
"name": "litellm_embedding",
|
|
||||||
"dims": 1536,
|
|
||||||
"distance_metric": "cosine",
|
|
||||||
"algorithm": "flat",
|
|
||||||
"datatype": "float32",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
if redis_url is None:
|
|
||||||
# if no url passed, check if host, port and password are passed, if not raise an Exception
|
|
||||||
if host is None or port is None or password is None:
|
|
||||||
# try checking env for host, port and password
|
|
||||||
import os
|
|
||||||
|
|
||||||
host = os.getenv("REDIS_HOST")
|
|
||||||
port = os.getenv("REDIS_PORT")
|
|
||||||
password = os.getenv("REDIS_PASSWORD")
|
|
||||||
if host is None or port is None or password is None:
|
|
||||||
raise Exception("Redis host, port, and password must be provided")
|
|
||||||
|
|
||||||
redis_url = "redis://:" + password + "@" + host + ":" + port
|
|
||||||
print_verbose(f"redis semantic-cache redis_url: {redis_url}")
|
|
||||||
if use_async is False:
|
|
||||||
self.index = SearchIndex.from_dict(schema)
|
|
||||||
self.index.connect(redis_url=redis_url)
|
|
||||||
try:
|
|
||||||
self.index.create(overwrite=False) # don't overwrite existing index
|
|
||||||
except Exception as e:
|
|
||||||
print_verbose(f"Got exception creating semantic cache index: {str(e)}")
|
|
||||||
elif use_async is True:
|
|
||||||
schema["index"]["name"] = "litellm_semantic_cache_index_async"
|
|
||||||
self.index = SearchIndex.from_dict(schema)
|
|
||||||
self.index.connect(redis_url=redis_url, use_async=True)
|
|
||||||
|
|
||||||
#
|
|
||||||
def _get_cache_logic(self, cached_response: Any):
|
|
||||||
"""
|
"""
|
||||||
Common 'get_cache_logic' across sync + async redis client implementations
|
Initialize the Redis Semantic Cache.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
host: Redis host address
|
||||||
|
port: Redis port
|
||||||
|
password: Redis password
|
||||||
|
redis_url: Full Redis URL (alternative to separate host/port/password)
|
||||||
|
similarity_threshold: Threshold for semantic similarity (0.0 to 1.0)
|
||||||
|
where 1.0 requires exact matches and 0.0 accepts any match
|
||||||
|
embedding_model: Model to use for generating embeddings
|
||||||
|
index_name: Name for the Redis index
|
||||||
|
ttl: Default time-to-live for cache entries in seconds
|
||||||
|
**kwargs: Additional arguments passed to the Redis client
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If similarity_threshold is not provided or required Redis
|
||||||
|
connection information is missing
|
||||||
|
"""
|
||||||
|
from redisvl.extensions.llmcache import SemanticCache
|
||||||
|
from redisvl.utils.vectorize import CustomTextVectorizer
|
||||||
|
|
||||||
|
if index_name is None:
|
||||||
|
index_name = self.DEFAULT_REDIS_INDEX_NAME
|
||||||
|
|
||||||
|
print_verbose(f"Redis semantic-cache initializing index - {index_name}")
|
||||||
|
|
||||||
|
# Validate similarity threshold
|
||||||
|
if similarity_threshold is None:
|
||||||
|
raise ValueError("similarity_threshold must be provided, passed None")
|
||||||
|
|
||||||
|
# Store configuration
|
||||||
|
self.similarity_threshold = similarity_threshold
|
||||||
|
|
||||||
|
# Convert similarity threshold [0,1] to distance threshold [0,2]
|
||||||
|
# For cosine distance: 0 = most similar, 2 = least similar
|
||||||
|
# While similarity: 1 = most similar, 0 = least similar
|
||||||
|
self.distance_threshold = 1 - similarity_threshold
|
||||||
|
self.embedding_model = embedding_model
|
||||||
|
|
||||||
|
# Set up Redis connection
|
||||||
|
if redis_url is None:
|
||||||
|
try:
|
||||||
|
# Attempt to use provided parameters or fallback to environment variables
|
||||||
|
host = host or os.environ['REDIS_HOST']
|
||||||
|
port = port or os.environ['REDIS_PORT']
|
||||||
|
password = password or os.environ['REDIS_PASSWORD']
|
||||||
|
except KeyError as e:
|
||||||
|
# Raise a more informative exception if any of the required keys are missing
|
||||||
|
missing_var = e.args[0]
|
||||||
|
raise ValueError(f"Missing required Redis configuration: {missing_var}. "
|
||||||
|
f"Provide {missing_var} or redis_url.") from e
|
||||||
|
|
||||||
|
redis_url = f"redis://:{password}@{host}:{port}"
|
||||||
|
|
||||||
|
print_verbose(f"Redis semantic-cache redis_url: {redis_url}")
|
||||||
|
|
||||||
|
# Initialize the Redis vectorizer and cache
|
||||||
|
cache_vectorizer = CustomTextVectorizer(self._get_embedding)
|
||||||
|
|
||||||
|
self.llmcache = SemanticCache(
|
||||||
|
name=index_name,
|
||||||
|
redis_url=redis_url,
|
||||||
|
vectorizer=cache_vectorizer,
|
||||||
|
distance_threshold=self.distance_threshold,
|
||||||
|
overwrite=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _get_ttl(self, **kwargs) -> Optional[int]:
|
||||||
|
"""
|
||||||
|
Get the TTL (time-to-live) value for cache entries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
**kwargs: Keyword arguments that may contain a custom TTL
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[int]: The TTL value in seconds, or None if no TTL should be applied
|
||||||
|
"""
|
||||||
|
ttl = kwargs.get("ttl")
|
||||||
|
if ttl is not None:
|
||||||
|
ttl = int(ttl)
|
||||||
|
return ttl
|
||||||
|
|
||||||
|
def _get_embedding(self, prompt: str) -> List[float]:
|
||||||
|
"""
|
||||||
|
Generate an embedding vector for the given prompt using the configured embedding model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The text to generate an embedding for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[float]: The embedding vector
|
||||||
|
"""
|
||||||
|
# Create an embedding from prompt
|
||||||
|
embedding_response = litellm.embedding(
|
||||||
|
model=self.embedding_model,
|
||||||
|
input=prompt,
|
||||||
|
cache={"no-store": True, "no-cache": True},
|
||||||
|
)
|
||||||
|
embedding = embedding_response["data"][0]["embedding"]
|
||||||
|
return embedding
|
||||||
|
|
||||||
|
def _get_cache_logic(self, cached_response: Any) -> Any:
|
||||||
|
"""
|
||||||
|
Process the cached response to prepare it for use.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cached_response: The raw cached response
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The processed cache response, or None if input was None
|
||||||
"""
|
"""
|
||||||
if cached_response is None:
|
if cached_response is None:
|
||||||
return cached_response
|
return cached_response
|
||||||
|
|
||||||
# check if cached_response is bytes
|
# Convert bytes to string if needed
|
||||||
if isinstance(cached_response, bytes):
|
if isinstance(cached_response, bytes):
|
||||||
cached_response = cached_response.decode("utf-8")
|
cached_response = cached_response.decode("utf-8")
|
||||||
|
|
||||||
|
# Convert string representation to Python object
|
||||||
|
try:
|
||||||
|
cached_response = json.loads(cached_response)
|
||||||
|
except json.JSONDecodeError:
|
||||||
try:
|
try:
|
||||||
cached_response = json.loads(
|
|
||||||
cached_response
|
|
||||||
) # Convert string to dictionary
|
|
||||||
except Exception:
|
|
||||||
cached_response = ast.literal_eval(cached_response)
|
cached_response = ast.literal_eval(cached_response)
|
||||||
|
except (ValueError, SyntaxError) as e:
|
||||||
|
print_verbose(f"Error parsing cached response: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
return cached_response
|
return cached_response
|
||||||
|
|
||||||
def set_cache(self, key, value, **kwargs):
|
def set_cache(self, key: str, value: Any, **kwargs) -> None:
|
||||||
import numpy as np
|
"""
|
||||||
|
Store a value in the semantic cache.
|
||||||
|
|
||||||
print_verbose(f"redis semantic-cache set_cache, kwargs: {kwargs}")
|
Args:
|
||||||
|
key: The cache key (not directly used in semantic caching)
|
||||||
# get the prompt
|
value: The response value to cache
|
||||||
messages = kwargs["messages"]
|
**kwargs: Additional arguments including 'messages' for the prompt
|
||||||
prompt = "".join(message["content"] for message in messages)
|
and optional 'ttl' for time-to-live
|
||||||
|
"""
|
||||||
# create an embedding for prompt
|
print_verbose(f"Redis semantic-cache set_cache, kwargs: {kwargs}")
|
||||||
embedding_response = litellm.embedding(
|
|
||||||
model=self.embedding_model,
|
|
||||||
input=prompt,
|
|
||||||
cache={"no-store": True, "no-cache": True},
|
|
||||||
)
|
|
||||||
|
|
||||||
# get the embedding
|
|
||||||
embedding = embedding_response["data"][0]["embedding"]
|
|
||||||
|
|
||||||
# make the embedding a numpy array, convert to bytes
|
|
||||||
embedding_bytes = np.array(embedding, dtype=np.float32).tobytes()
|
|
||||||
value = str(value)
|
|
||||||
assert isinstance(value, str)
|
|
||||||
|
|
||||||
new_data = [
|
|
||||||
{"response": value, "prompt": prompt, "litellm_embedding": embedding_bytes}
|
|
||||||
]
|
|
||||||
|
|
||||||
# Add more data
|
|
||||||
self.index.load(new_data)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def get_cache(self, key, **kwargs):
|
|
||||||
print_verbose(f"sync redis semantic-cache get_cache, kwargs: {kwargs}")
|
|
||||||
from redisvl.query import VectorQuery
|
|
||||||
|
|
||||||
# query
|
|
||||||
# get the messages
|
|
||||||
messages = kwargs["messages"]
|
|
||||||
prompt = "".join(message["content"] for message in messages)
|
|
||||||
|
|
||||||
# convert to embedding
|
|
||||||
embedding_response = litellm.embedding(
|
|
||||||
model=self.embedding_model,
|
|
||||||
input=prompt,
|
|
||||||
cache={"no-store": True, "no-cache": True},
|
|
||||||
)
|
|
||||||
|
|
||||||
# get the embedding
|
|
||||||
embedding = embedding_response["data"][0]["embedding"]
|
|
||||||
|
|
||||||
query = VectorQuery(
|
|
||||||
vector=embedding,
|
|
||||||
vector_field_name="litellm_embedding",
|
|
||||||
return_fields=["response", "prompt", "vector_distance"],
|
|
||||||
num_results=1,
|
|
||||||
)
|
|
||||||
|
|
||||||
results = self.index.query(query)
|
|
||||||
if results is None:
|
|
||||||
return None
|
|
||||||
if isinstance(results, list):
|
|
||||||
if len(results) == 0:
|
|
||||||
return None
|
|
||||||
|
|
||||||
vector_distance = results[0]["vector_distance"]
|
|
||||||
vector_distance = float(vector_distance)
|
|
||||||
similarity = 1 - vector_distance
|
|
||||||
cached_prompt = results[0]["prompt"]
|
|
||||||
|
|
||||||
# check similarity, if more than self.similarity_threshold, return results
|
|
||||||
print_verbose(
|
|
||||||
f"semantic cache: similarity threshold: {self.similarity_threshold}, similarity: {similarity}, prompt: {prompt}, closest_cached_prompt: {cached_prompt}"
|
|
||||||
)
|
|
||||||
if similarity > self.similarity_threshold:
|
|
||||||
# cache hit !
|
|
||||||
cached_value = results[0]["response"]
|
|
||||||
print_verbose(
|
|
||||||
f"got a cache hit, similarity: {similarity}, Current prompt: {prompt}, cached_prompt: {cached_prompt}"
|
|
||||||
)
|
|
||||||
return self._get_cache_logic(cached_response=cached_value)
|
|
||||||
else:
|
|
||||||
# cache miss !
|
|
||||||
return None
|
|
||||||
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def async_set_cache(self, key, value, **kwargs):
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from litellm.proxy.proxy_server import llm_model_list, llm_router
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.index.acreate(overwrite=False) # don't overwrite existing index
|
# Extract the prompt from messages
|
||||||
except Exception as e:
|
messages = kwargs.get("messages", [])
|
||||||
print_verbose(f"Got exception creating semantic cache index: {str(e)}")
|
if not messages:
|
||||||
print_verbose(f"async redis semantic-cache set_cache, kwargs: {kwargs}")
|
print_verbose("No messages provided for semantic caching")
|
||||||
|
|
||||||
# get the prompt
|
|
||||||
messages = kwargs["messages"]
|
|
||||||
prompt = "".join(message["content"] for message in messages)
|
|
||||||
# create an embedding for prompt
|
|
||||||
router_model_names = (
|
|
||||||
[m["model_name"] for m in llm_model_list]
|
|
||||||
if llm_model_list is not None
|
|
||||||
else []
|
|
||||||
)
|
|
||||||
if llm_router is not None and self.embedding_model in router_model_names:
|
|
||||||
user_api_key = kwargs.get("metadata", {}).get("user_api_key", "")
|
|
||||||
embedding_response = await llm_router.aembedding(
|
|
||||||
model=self.embedding_model,
|
|
||||||
input=prompt,
|
|
||||||
cache={"no-store": True, "no-cache": True},
|
|
||||||
metadata={
|
|
||||||
"user_api_key": user_api_key,
|
|
||||||
"semantic-cache-embedding": True,
|
|
||||||
"trace_id": kwargs.get("metadata", {}).get("trace_id", None),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# convert to embedding
|
|
||||||
embedding_response = await litellm.aembedding(
|
|
||||||
model=self.embedding_model,
|
|
||||||
input=prompt,
|
|
||||||
cache={"no-store": True, "no-cache": True},
|
|
||||||
)
|
|
||||||
|
|
||||||
# get the embedding
|
|
||||||
embedding = embedding_response["data"][0]["embedding"]
|
|
||||||
|
|
||||||
# make the embedding a numpy array, convert to bytes
|
|
||||||
embedding_bytes = np.array(embedding, dtype=np.float32).tobytes()
|
|
||||||
value = str(value)
|
|
||||||
assert isinstance(value, str)
|
|
||||||
|
|
||||||
new_data = [
|
|
||||||
{"response": value, "prompt": prompt, "litellm_embedding": embedding_bytes}
|
|
||||||
]
|
|
||||||
|
|
||||||
# Add more data
|
|
||||||
await self.index.aload(new_data)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
async def async_get_cache(self, key, **kwargs):
|
prompt = get_str_from_messages(messages)
|
||||||
print_verbose(f"async redis semantic-cache get_cache, kwargs: {kwargs}")
|
value_str = str(value)
|
||||||
from redisvl.query import VectorQuery
|
|
||||||
|
|
||||||
|
# Get TTL and store in Redis semantic cache
|
||||||
|
ttl = self._get_ttl(**kwargs)
|
||||||
|
if ttl is not None:
|
||||||
|
self.llmcache.store(prompt, value_str, ttl=int(ttl))
|
||||||
|
else:
|
||||||
|
self.llmcache.store(prompt, value_str)
|
||||||
|
except Exception as e:
|
||||||
|
print_verbose(f"Error setting {value_str} in the Redis semantic cache: {str(e)}")
|
||||||
|
|
||||||
|
def get_cache(self, key: str, **kwargs) -> Any:
|
||||||
|
"""
|
||||||
|
Retrieve a semantically similar cached response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: The cache key (not directly used in semantic caching)
|
||||||
|
**kwargs: Additional arguments including 'messages' for the prompt
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The cached response if a semantically similar prompt is found, else None
|
||||||
|
"""
|
||||||
|
print_verbose(f"Redis semantic-cache get_cache, kwargs: {kwargs}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Extract the prompt from messages
|
||||||
|
messages = kwargs.get("messages", [])
|
||||||
|
if not messages:
|
||||||
|
print_verbose("No messages provided for semantic cache lookup")
|
||||||
|
return None
|
||||||
|
|
||||||
|
prompt = get_str_from_messages(messages)
|
||||||
|
# Check the cache for semantically similar prompts
|
||||||
|
results = self.llmcache.check(prompt=prompt)
|
||||||
|
|
||||||
|
# Return None if no similar prompts found
|
||||||
|
if not results:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Process the best matching result
|
||||||
|
cache_hit = results[0]
|
||||||
|
vector_distance = float(cache_hit["vector_distance"])
|
||||||
|
|
||||||
|
# Convert vector distance back to similarity score
|
||||||
|
# For cosine distance: 0 = most similar, 2 = least similar
|
||||||
|
# While similarity: 1 = most similar, 0 = least similar
|
||||||
|
similarity = 1 - vector_distance
|
||||||
|
|
||||||
|
cached_prompt = cache_hit["prompt"]
|
||||||
|
cached_response = cache_hit["response"]
|
||||||
|
|
||||||
|
print_verbose(
|
||||||
|
f"Cache hit: similarity threshold: {self.similarity_threshold}, "
|
||||||
|
f"actual similarity: {similarity}, "
|
||||||
|
f"current prompt: {prompt}, "
|
||||||
|
f"cached prompt: {cached_prompt}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return self._get_cache_logic(cached_response=cached_response)
|
||||||
|
except Exception as e:
|
||||||
|
print_verbose(f"Error retrieving from Redis semantic cache: {str(e)}")
|
||||||
|
|
||||||
|
async def _get_async_embedding(self, prompt: str, **kwargs) -> List[float]:
|
||||||
|
"""
|
||||||
|
Asynchronously generate an embedding for the given prompt.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The text to generate an embedding for
|
||||||
|
**kwargs: Additional arguments that may contain metadata
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[float]: The embedding vector
|
||||||
|
"""
|
||||||
from litellm.proxy.proxy_server import llm_model_list, llm_router
|
from litellm.proxy.proxy_server import llm_model_list, llm_router
|
||||||
|
|
||||||
# query
|
# Route the embedding request through the proxy if appropriate
|
||||||
# get the messages
|
|
||||||
messages = kwargs["messages"]
|
|
||||||
prompt = "".join(message["content"] for message in messages)
|
|
||||||
|
|
||||||
router_model_names = (
|
router_model_names = (
|
||||||
[m["model_name"] for m in llm_model_list]
|
[m["model_name"] for m in llm_model_list]
|
||||||
if llm_model_list is not None
|
if llm_model_list is not None
|
||||||
else []
|
else []
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
if llm_router is not None and self.embedding_model in router_model_names:
|
if llm_router is not None and self.embedding_model in router_model_names:
|
||||||
|
# Use the router for embedding generation
|
||||||
user_api_key = kwargs.get("metadata", {}).get("user_api_key", "")
|
user_api_key = kwargs.get("metadata", {}).get("user_api_key", "")
|
||||||
embedding_response = await llm_router.aembedding(
|
embedding_response = await llm_router.aembedding(
|
||||||
model=self.embedding_model,
|
model=self.embedding_model,
|
||||||
|
@ -278,60 +291,147 @@ class RedisSemanticCache(BaseCache):
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# convert to embedding
|
# Generate embedding directly
|
||||||
embedding_response = await litellm.aembedding(
|
embedding_response = await litellm.aembedding(
|
||||||
model=self.embedding_model,
|
model=self.embedding_model,
|
||||||
input=prompt,
|
input=prompt,
|
||||||
cache={"no-store": True, "no-cache": True},
|
cache={"no-store": True, "no-cache": True},
|
||||||
)
|
)
|
||||||
|
|
||||||
# get the embedding
|
# Extract and return the embedding vector
|
||||||
embedding = embedding_response["data"][0]["embedding"]
|
return embedding_response["data"][0]["embedding"]
|
||||||
|
except Exception as e:
|
||||||
|
print_verbose(f"Error generating async embedding: {str(e)}")
|
||||||
|
raise ValueError(f"Failed to generate embedding: {str(e)}") from e
|
||||||
|
|
||||||
query = VectorQuery(
|
async def async_set_cache(self, key: str, value: Any, **kwargs) -> None:
|
||||||
vector=embedding,
|
"""
|
||||||
vector_field_name="litellm_embedding",
|
Asynchronously store a value in the semantic cache.
|
||||||
return_fields=["response", "prompt", "vector_distance"],
|
|
||||||
|
Args:
|
||||||
|
key: The cache key (not directly used in semantic caching)
|
||||||
|
value: The response value to cache
|
||||||
|
**kwargs: Additional arguments including 'messages' for the prompt
|
||||||
|
and optional 'ttl' for time-to-live
|
||||||
|
"""
|
||||||
|
print_verbose(f"Async Redis semantic-cache set_cache, kwargs: {kwargs}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Extract the prompt from messages
|
||||||
|
messages = kwargs.get("messages", [])
|
||||||
|
if not messages:
|
||||||
|
print_verbose("No messages provided for semantic caching")
|
||||||
|
return
|
||||||
|
|
||||||
|
prompt = get_str_from_messages(messages)
|
||||||
|
value_str = str(value)
|
||||||
|
|
||||||
|
# Generate embedding for the value (response) to cache
|
||||||
|
prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
|
||||||
|
|
||||||
|
# Get TTL and store in Redis semantic cache
|
||||||
|
ttl = self._get_ttl(**kwargs)
|
||||||
|
if ttl is not None:
|
||||||
|
await self.llmcache.astore(
|
||||||
|
prompt,
|
||||||
|
value_str,
|
||||||
|
vector=prompt_embedding, # Pass through custom embedding
|
||||||
|
ttl=ttl
|
||||||
)
|
)
|
||||||
results = await self.index.aquery(query)
|
else:
|
||||||
if results is None:
|
await self.llmcache.astore(
|
||||||
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
|
prompt,
|
||||||
return None
|
value_str,
|
||||||
if isinstance(results, list):
|
vector=prompt_embedding # Pass through custom embedding
|
||||||
if len(results) == 0:
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print_verbose(f"Error in async_set_cache: {str(e)}")
|
||||||
|
|
||||||
|
async def async_get_cache(self, key: str, **kwargs) -> Any:
|
||||||
|
"""
|
||||||
|
Asynchronously retrieve a semantically similar cached response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key: The cache key (not directly used in semantic caching)
|
||||||
|
**kwargs: Additional arguments including 'messages' for the prompt
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The cached response if a semantically similar prompt is found, else None
|
||||||
|
"""
|
||||||
|
print_verbose(f"Async Redis semantic-cache get_cache, kwargs: {kwargs}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Extract the prompt from messages
|
||||||
|
messages = kwargs.get("messages", [])
|
||||||
|
if not messages:
|
||||||
|
print_verbose("No messages provided for semantic cache lookup")
|
||||||
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
|
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
|
||||||
return None
|
return None
|
||||||
|
|
||||||
vector_distance = results[0]["vector_distance"]
|
prompt = get_str_from_messages(messages)
|
||||||
vector_distance = float(vector_distance)
|
|
||||||
|
# Generate embedding for the prompt
|
||||||
|
prompt_embedding = await self._get_async_embedding(prompt, **kwargs)
|
||||||
|
|
||||||
|
# Check the cache for semantically similar prompts
|
||||||
|
results = await self.llmcache.acheck(
|
||||||
|
prompt=prompt,
|
||||||
|
vector=prompt_embedding
|
||||||
|
)
|
||||||
|
|
||||||
|
# handle results / cache hit
|
||||||
|
if not results:
|
||||||
|
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0 # TODO why here but not above??
|
||||||
|
return None
|
||||||
|
|
||||||
|
cache_hit = results[0]
|
||||||
|
vector_distance = float(cache_hit["vector_distance"])
|
||||||
|
|
||||||
|
# Convert vector distance back to similarity
|
||||||
|
# For cosine distance: 0 = most similar, 2 = least similar
|
||||||
|
# While similarity: 1 = most similar, 0 = least similar
|
||||||
similarity = 1 - vector_distance
|
similarity = 1 - vector_distance
|
||||||
cached_prompt = results[0]["prompt"]
|
|
||||||
|
|
||||||
# check similarity, if more than self.similarity_threshold, return results
|
cached_prompt = cache_hit["prompt"]
|
||||||
print_verbose(
|
cached_response = cache_hit["response"]
|
||||||
f"semantic cache: similarity threshold: {self.similarity_threshold}, similarity: {similarity}, prompt: {prompt}, closest_cached_prompt: {cached_prompt}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# update kwargs["metadata"] with similarity, don't rewrite the original metadata
|
# update kwargs["metadata"] with similarity, don't rewrite the original metadata
|
||||||
kwargs.setdefault("metadata", {})["semantic-similarity"] = similarity
|
kwargs.setdefault("metadata", {})["semantic-similarity"] = similarity
|
||||||
|
|
||||||
if similarity > self.similarity_threshold:
|
|
||||||
# cache hit !
|
|
||||||
cached_value = results[0]["response"]
|
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"got a cache hit, similarity: {similarity}, Current prompt: {prompt}, cached_prompt: {cached_prompt}"
|
f"Cache hit: similarity threshold: {self.similarity_threshold}, "
|
||||||
|
f"actual similarity: {similarity}, "
|
||||||
|
f"current prompt: {prompt}, "
|
||||||
|
f"cached prompt: {cached_prompt}"
|
||||||
)
|
)
|
||||||
return self._get_cache_logic(cached_response=cached_value)
|
|
||||||
else:
|
|
||||||
# cache miss !
|
|
||||||
return None
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def _index_info(self):
|
return self._get_cache_logic(cached_response=cached_response)
|
||||||
return await self.index.ainfo()
|
except Exception as e:
|
||||||
|
print_verbose(f"Error in async_get_cache: {str(e)}")
|
||||||
|
kwargs.setdefault("metadata", {})["semantic-similarity"] = 0.0
|
||||||
|
|
||||||
async def async_set_cache_pipeline(self, cache_list, **kwargs):
|
async def _index_info(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get information about the Redis index.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[str, Any]: Information about the Redis index
|
||||||
|
"""
|
||||||
|
aindex = await self.llmcache._get_async_index()
|
||||||
|
return await aindex.info()
|
||||||
|
|
||||||
|
async def async_set_cache_pipeline(self, cache_list: List[Tuple[str, Any]], **kwargs) -> None:
|
||||||
|
"""
|
||||||
|
Asynchronously store multiple values in the semantic cache.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
cache_list: List of (key, value) tuples to cache
|
||||||
|
**kwargs: Additional arguments
|
||||||
|
"""
|
||||||
|
try:
|
||||||
tasks = []
|
tasks = []
|
||||||
for val in cache_list:
|
for val in cache_list:
|
||||||
tasks.append(self.async_set_cache(val[0], val[1], **kwargs))
|
tasks.append(self.async_set_cache(val[0], val[1], **kwargs))
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
|
except Exception as e:
|
||||||
|
print_verbose(f"Error in async_set_cache_pipeline: {str(e)}")
|
||||||
|
|
|
@ -275,15 +275,13 @@ def cost_per_token( # noqa: PLR0915
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_characters=prompt_characters,
|
prompt_characters=prompt_characters,
|
||||||
completion_characters=completion_characters,
|
completion_characters=completion_characters,
|
||||||
prompt_tokens=prompt_tokens,
|
usage=usage_block,
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
)
|
||||||
elif cost_router == "cost_per_token":
|
elif cost_router == "cost_per_token":
|
||||||
return google_cost_per_token(
|
return google_cost_per_token(
|
||||||
model=model_without_prefix,
|
model=model_without_prefix,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_tokens=prompt_tokens,
|
usage=usage_block,
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "anthropic":
|
elif custom_llm_provider == "anthropic":
|
||||||
return anthropic_cost_per_token(model=model, usage=usage_block)
|
return anthropic_cost_per_token(model=model, usage=usage_block)
|
||||||
|
@ -828,11 +826,14 @@ def get_response_cost_from_hidden_params(
|
||||||
_hidden_params_dict = hidden_params
|
_hidden_params_dict = hidden_params
|
||||||
|
|
||||||
additional_headers = _hidden_params_dict.get("additional_headers", {})
|
additional_headers = _hidden_params_dict.get("additional_headers", {})
|
||||||
if additional_headers and "x-litellm-response-cost" in additional_headers:
|
if (
|
||||||
response_cost = additional_headers["x-litellm-response-cost"]
|
additional_headers
|
||||||
|
and "llm_provider-x-litellm-response-cost" in additional_headers
|
||||||
|
):
|
||||||
|
response_cost = additional_headers["llm_provider-x-litellm-response-cost"]
|
||||||
if response_cost is None:
|
if response_cost is None:
|
||||||
return None
|
return None
|
||||||
return float(additional_headers["x-litellm-response-cost"])
|
return float(additional_headers["llm_provider-x-litellm-response-cost"])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,13 +10,16 @@ import asyncio
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import traceback
|
import traceback
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
from litellm.types.utils import StandardLoggingPayload
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from litellm.proxy._types import SpendLogsPayload
|
from litellm.proxy._types import SpendLogsPayload
|
||||||
else:
|
else:
|
||||||
SpendLogsPayload = Any
|
SpendLogsPayload = Any
|
||||||
|
|
||||||
|
import litellm
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
from litellm.integrations.custom_batch_logger import CustomBatchLogger
|
||||||
from litellm.llms.custom_httpx.http_handler import (
|
from litellm.llms.custom_httpx.http_handler import (
|
||||||
|
@ -61,7 +64,7 @@ class GcsPubSubLogger(CustomBatchLogger):
|
||||||
self.flush_lock = asyncio.Lock()
|
self.flush_lock = asyncio.Lock()
|
||||||
super().__init__(**kwargs, flush_lock=self.flush_lock)
|
super().__init__(**kwargs, flush_lock=self.flush_lock)
|
||||||
asyncio.create_task(self.periodic_flush())
|
asyncio.create_task(self.periodic_flush())
|
||||||
self.log_queue: List[SpendLogsPayload] = []
|
self.log_queue: List[Union[SpendLogsPayload, StandardLoggingPayload]] = []
|
||||||
|
|
||||||
async def construct_request_headers(self) -> Dict[str, str]:
|
async def construct_request_headers(self) -> Dict[str, str]:
|
||||||
"""Construct authorization headers using Vertex AI auth"""
|
"""Construct authorization headers using Vertex AI auth"""
|
||||||
|
@ -115,6 +118,10 @@ class GcsPubSubLogger(CustomBatchLogger):
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"PubSub: Logging - Enters logging function for model %s", kwargs
|
"PubSub: Logging - Enters logging function for model %s", kwargs
|
||||||
)
|
)
|
||||||
|
standard_logging_payload = kwargs.get("standard_logging_object", None)
|
||||||
|
|
||||||
|
# Backwards compatibility with old logging payload
|
||||||
|
if litellm.gcs_pub_sub_use_v1 is True:
|
||||||
spend_logs_payload = get_logging_payload(
|
spend_logs_payload = get_logging_payload(
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
response_obj=response_obj,
|
response_obj=response_obj,
|
||||||
|
@ -122,6 +129,9 @@ class GcsPubSubLogger(CustomBatchLogger):
|
||||||
end_time=end_time,
|
end_time=end_time,
|
||||||
)
|
)
|
||||||
self.log_queue.append(spend_logs_payload)
|
self.log_queue.append(spend_logs_payload)
|
||||||
|
else:
|
||||||
|
# New logging payload, StandardLoggingPayload
|
||||||
|
self.log_queue.append(standard_logging_payload)
|
||||||
|
|
||||||
if len(self.log_queue) >= self.batch_size:
|
if len(self.log_queue) >= self.batch_size:
|
||||||
await self.async_send_batch()
|
await self.async_send_batch()
|
||||||
|
@ -155,7 +165,7 @@ class GcsPubSubLogger(CustomBatchLogger):
|
||||||
self.log_queue.clear()
|
self.log_queue.clear()
|
||||||
|
|
||||||
async def publish_message(
|
async def publish_message(
|
||||||
self, message: SpendLogsPayload
|
self, message: Union[SpendLogsPayload, StandardLoggingPayload]
|
||||||
) -> Optional[Dict[str, Any]]:
|
) -> Optional[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Publish message to Google Cloud Pub/Sub using REST API
|
Publish message to Google Cloud Pub/Sub using REST API
|
||||||
|
|
|
@ -79,6 +79,22 @@ def get_supported_openai_params( # noqa: PLR0915
|
||||||
elif custom_llm_provider == "maritalk":
|
elif custom_llm_provider == "maritalk":
|
||||||
return litellm.MaritalkConfig().get_supported_openai_params(model=model)
|
return litellm.MaritalkConfig().get_supported_openai_params(model=model)
|
||||||
elif custom_llm_provider == "openai":
|
elif custom_llm_provider == "openai":
|
||||||
|
if request_type == "transcription":
|
||||||
|
transcription_provider_config = (
|
||||||
|
litellm.ProviderConfigManager.get_provider_audio_transcription_config(
|
||||||
|
model=model, provider=LlmProviders.OPENAI
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if isinstance(
|
||||||
|
transcription_provider_config, litellm.OpenAIGPTAudioTranscriptionConfig
|
||||||
|
):
|
||||||
|
return transcription_provider_config.get_supported_openai_params(
|
||||||
|
model=model
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unsupported provider config: {transcription_provider_config} for model: {model}"
|
||||||
|
)
|
||||||
return litellm.OpenAIConfig().get_supported_openai_params(model=model)
|
return litellm.OpenAIConfig().get_supported_openai_params(model=model)
|
||||||
elif custom_llm_provider == "azure":
|
elif custom_llm_provider == "azure":
|
||||||
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
|
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model):
|
||||||
|
|
|
@ -518,6 +518,16 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
}
|
}
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def _get_masked_api_base(self, api_base: str) -> str:
|
||||||
|
if "key=" in api_base:
|
||||||
|
# Find the position of "key=" in the string
|
||||||
|
key_index = api_base.find("key=") + 4
|
||||||
|
# Mask the last 5 characters after "key="
|
||||||
|
masked_api_base = api_base[:key_index] + "*" * 5 + api_base[-4:]
|
||||||
|
else:
|
||||||
|
masked_api_base = api_base
|
||||||
|
return str(masked_api_base)
|
||||||
|
|
||||||
def _pre_call(self, input, api_key, model=None, additional_args={}):
|
def _pre_call(self, input, api_key, model=None, additional_args={}):
|
||||||
"""
|
"""
|
||||||
Common helper function across the sync + async pre-call function
|
Common helper function across the sync + async pre-call function
|
||||||
|
@ -531,6 +541,9 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
model
|
model
|
||||||
): # if model name was changes pre-call, overwrite the initial model call name with the new one
|
): # if model name was changes pre-call, overwrite the initial model call name with the new one
|
||||||
self.model_call_details["model"] = model
|
self.model_call_details["model"] = model
|
||||||
|
self.model_call_details["litellm_params"]["api_base"] = (
|
||||||
|
self._get_masked_api_base(additional_args.get("api_base", ""))
|
||||||
|
)
|
||||||
|
|
||||||
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
|
def pre_call(self, input, api_key, model=None, additional_args={}): # noqa: PLR0915
|
||||||
|
|
||||||
|
@ -714,15 +727,6 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
headers = {}
|
headers = {}
|
||||||
data = additional_args.get("complete_input_dict", {})
|
data = additional_args.get("complete_input_dict", {})
|
||||||
api_base = str(additional_args.get("api_base", ""))
|
api_base = str(additional_args.get("api_base", ""))
|
||||||
if "key=" in api_base:
|
|
||||||
# Find the position of "key=" in the string
|
|
||||||
key_index = api_base.find("key=") + 4
|
|
||||||
# Mask the last 5 characters after "key="
|
|
||||||
masked_api_base = api_base[:key_index] + "*" * 5 + api_base[-4:]
|
|
||||||
else:
|
|
||||||
masked_api_base = api_base
|
|
||||||
self.model_call_details["litellm_params"]["api_base"] = masked_api_base
|
|
||||||
|
|
||||||
curl_command = self._get_request_curl_command(
|
curl_command = self._get_request_curl_command(
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
|
@ -737,11 +741,12 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||||
def _get_request_curl_command(
|
def _get_request_curl_command(
|
||||||
self, api_base: str, headers: Optional[dict], additional_args: dict, data: dict
|
self, api_base: str, headers: Optional[dict], additional_args: dict, data: dict
|
||||||
) -> str:
|
) -> str:
|
||||||
|
masked_api_base = self._get_masked_api_base(api_base)
|
||||||
if headers is None:
|
if headers is None:
|
||||||
headers = {}
|
headers = {}
|
||||||
curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
|
curl_command = "\n\nPOST Request Sent from LiteLLM:\n"
|
||||||
curl_command += "curl -X POST \\\n"
|
curl_command += "curl -X POST \\\n"
|
||||||
curl_command += f"{api_base} \\\n"
|
curl_command += f"{masked_api_base} \\\n"
|
||||||
masked_headers = self._get_masked_headers(headers)
|
masked_headers = self._get_masked_headers(headers)
|
||||||
formatted_headers = " ".join(
|
formatted_headers = " ".join(
|
||||||
[f"-H '{k}: {v}'" for k, v in masked_headers.items()]
|
[f"-H '{k}: {v}'" for k, v in masked_headers.items()]
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# What is this?
|
# What is this?
|
||||||
## Helper utilities for cost_per_token()
|
## Helper utilities for cost_per_token()
|
||||||
|
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple, cast
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
|
@ -143,26 +143,50 @@ def generic_cost_per_token(
|
||||||
### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
|
### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
|
||||||
prompt_cost = 0.0
|
prompt_cost = 0.0
|
||||||
### PROCESSING COST
|
### PROCESSING COST
|
||||||
non_cache_hit_tokens = usage.prompt_tokens
|
text_tokens = usage.prompt_tokens
|
||||||
cache_hit_tokens = 0
|
cache_hit_tokens = 0
|
||||||
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
|
audio_tokens = 0
|
||||||
cache_hit_tokens = usage.prompt_tokens_details.cached_tokens
|
if usage.prompt_tokens_details:
|
||||||
non_cache_hit_tokens = non_cache_hit_tokens - cache_hit_tokens
|
cache_hit_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int], getattr(usage.prompt_tokens_details, "cached_tokens", 0)
|
||||||
|
)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
text_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int], getattr(usage.prompt_tokens_details, "text_tokens", None)
|
||||||
|
)
|
||||||
|
or 0 # default to prompt tokens, if this field is not set
|
||||||
|
)
|
||||||
|
audio_tokens = (
|
||||||
|
cast(Optional[int], getattr(usage.prompt_tokens_details, "audio_tokens", 0))
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
## EDGE CASE - text tokens not set inside PromptTokensDetails
|
||||||
|
if text_tokens == 0:
|
||||||
|
text_tokens = usage.prompt_tokens - cache_hit_tokens - audio_tokens
|
||||||
|
|
||||||
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
|
prompt_base_cost = _get_prompt_token_base_cost(model_info=model_info, usage=usage)
|
||||||
|
|
||||||
prompt_cost = float(non_cache_hit_tokens) * prompt_base_cost
|
prompt_cost = float(text_tokens) * prompt_base_cost
|
||||||
|
|
||||||
_cache_read_input_token_cost = model_info.get("cache_read_input_token_cost")
|
_cache_read_input_token_cost = model_info.get("cache_read_input_token_cost")
|
||||||
|
|
||||||
|
### CACHE READ COST
|
||||||
if (
|
if (
|
||||||
_cache_read_input_token_cost is not None
|
_cache_read_input_token_cost is not None
|
||||||
and usage.prompt_tokens_details
|
and cache_hit_tokens is not None
|
||||||
and usage.prompt_tokens_details.cached_tokens
|
and cache_hit_tokens > 0
|
||||||
):
|
):
|
||||||
prompt_cost += (
|
prompt_cost += float(cache_hit_tokens) * _cache_read_input_token_cost
|
||||||
float(usage.prompt_tokens_details.cached_tokens)
|
|
||||||
* _cache_read_input_token_cost
|
### AUDIO COST
|
||||||
)
|
|
||||||
|
audio_token_cost = model_info.get("input_cost_per_audio_token")
|
||||||
|
if audio_token_cost is not None and audio_tokens is not None and audio_tokens > 0:
|
||||||
|
prompt_cost += float(audio_tokens) * audio_token_cost
|
||||||
|
|
||||||
### CACHE WRITING COST
|
### CACHE WRITING COST
|
||||||
_cache_creation_input_token_cost = model_info.get("cache_creation_input_token_cost")
|
_cache_creation_input_token_cost = model_info.get("cache_creation_input_token_cost")
|
||||||
|
@ -175,6 +199,37 @@ def generic_cost_per_token(
|
||||||
completion_base_cost = _get_completion_token_base_cost(
|
completion_base_cost = _get_completion_token_base_cost(
|
||||||
model_info=model_info, usage=usage
|
model_info=model_info, usage=usage
|
||||||
)
|
)
|
||||||
completion_cost = usage["completion_tokens"] * completion_base_cost
|
text_tokens = usage.completion_tokens
|
||||||
|
audio_tokens = 0
|
||||||
|
if usage.completion_tokens_details is not None:
|
||||||
|
audio_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int],
|
||||||
|
getattr(usage.completion_tokens_details, "audio_tokens", 0),
|
||||||
|
)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
text_tokens = (
|
||||||
|
cast(
|
||||||
|
Optional[int],
|
||||||
|
getattr(usage.completion_tokens_details, "text_tokens", None),
|
||||||
|
)
|
||||||
|
or usage.completion_tokens # default to completion tokens, if this field is not set
|
||||||
|
)
|
||||||
|
|
||||||
|
## TEXT COST
|
||||||
|
completion_cost = float(text_tokens) * completion_base_cost
|
||||||
|
|
||||||
|
_output_cost_per_audio_token: Optional[float] = model_info.get(
|
||||||
|
"output_cost_per_audio_token"
|
||||||
|
)
|
||||||
|
|
||||||
|
## AUDIO COST
|
||||||
|
if (
|
||||||
|
_output_cost_per_audio_token is not None
|
||||||
|
and audio_tokens is not None
|
||||||
|
and audio_tokens > 0
|
||||||
|
):
|
||||||
|
completion_cost += float(audio_tokens) * _output_cost_per_audio_token
|
||||||
|
|
||||||
return prompt_cost, completion_cost
|
return prompt_cost, completion_cost
|
||||||
|
|
|
@ -138,13 +138,22 @@ class ModelParamHelper:
|
||||||
TranscriptionCreateParamsNonStreaming,
|
TranscriptionCreateParamsNonStreaming,
|
||||||
TranscriptionCreateParamsStreaming,
|
TranscriptionCreateParamsStreaming,
|
||||||
)
|
)
|
||||||
non_streaming_kwargs = set(getattr(TranscriptionCreateParamsNonStreaming, "__annotations__", {}).keys())
|
|
||||||
streaming_kwargs = set(getattr(TranscriptionCreateParamsStreaming, "__annotations__", {}).keys())
|
non_streaming_kwargs = set(
|
||||||
|
getattr(
|
||||||
|
TranscriptionCreateParamsNonStreaming, "__annotations__", {}
|
||||||
|
).keys()
|
||||||
|
)
|
||||||
|
streaming_kwargs = set(
|
||||||
|
getattr(
|
||||||
|
TranscriptionCreateParamsStreaming, "__annotations__", {}
|
||||||
|
).keys()
|
||||||
|
)
|
||||||
|
|
||||||
all_transcription_kwargs = non_streaming_kwargs.union(streaming_kwargs)
|
all_transcription_kwargs = non_streaming_kwargs.union(streaming_kwargs)
|
||||||
return all_transcription_kwargs
|
return all_transcription_kwargs
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.warning("Error getting transcription kwargs %s", str(e))
|
verbose_logger.debug("Error getting transcription kwargs %s", str(e))
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -2,11 +2,14 @@
|
||||||
This file contains common utils for anthropic calls.
|
This file contains common utils for anthropic calls.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
|
||||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||||
|
from litellm.secret_managers.main import get_secret_str
|
||||||
|
|
||||||
|
|
||||||
class AnthropicError(BaseLLMException):
|
class AnthropicError(BaseLLMException):
|
||||||
|
@ -19,6 +22,54 @@ class AnthropicError(BaseLLMException):
|
||||||
super().__init__(status_code=status_code, message=message, headers=headers)
|
super().__init__(status_code=status_code, message=message, headers=headers)
|
||||||
|
|
||||||
|
|
||||||
|
class AnthropicModelInfo(BaseLLMModelInfo):
|
||||||
|
@staticmethod
|
||||||
|
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
|
||||||
|
return (
|
||||||
|
api_base
|
||||||
|
or get_secret_str("ANTHROPIC_API_BASE")
|
||||||
|
or "https://api.anthropic.com"
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
|
||||||
|
return api_key or get_secret_str("ANTHROPIC_API_KEY")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_base_model(model: Optional[str] = None) -> Optional[str]:
|
||||||
|
return model.replace("anthropic/", "") if model else None
|
||||||
|
|
||||||
|
def get_models(
|
||||||
|
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||||
|
) -> List[str]:
|
||||||
|
api_base = AnthropicModelInfo.get_api_base(api_base)
|
||||||
|
api_key = AnthropicModelInfo.get_api_key(api_key)
|
||||||
|
if api_base is None or api_key is None:
|
||||||
|
raise ValueError(
|
||||||
|
"ANTHROPIC_API_BASE or ANTHROPIC_API_KEY is not set. Please set the environment variable, to query Anthropic's `/models` endpoint."
|
||||||
|
)
|
||||||
|
response = litellm.module_level_client.get(
|
||||||
|
url=f"{api_base}/v1/models",
|
||||||
|
headers={"x-api-key": api_key, "anthropic-version": "2023-06-01"},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError:
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to fetch models from Anthropic. Status code: {response.status_code}, Response: {response.text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
models = response.json()["data"]
|
||||||
|
|
||||||
|
litellm_model_names = []
|
||||||
|
for model in models:
|
||||||
|
stripped_model_name = model["id"]
|
||||||
|
litellm_model_name = "anthropic/" + stripped_model_name
|
||||||
|
litellm_model_names.append(litellm_model_name)
|
||||||
|
return litellm_model_names
|
||||||
|
|
||||||
|
|
||||||
def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
|
def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
|
||||||
openai_headers = {}
|
openai_headers = {}
|
||||||
if "anthropic-ratelimit-requests-limit" in headers:
|
if "anthropic-ratelimit-requests-limit" in headers:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import TYPE_CHECKING, Any, List, Optional
|
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
@ -8,7 +8,7 @@ from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
OpenAIAudioTranscriptionOptionalParams,
|
OpenAIAudioTranscriptionOptionalParams,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import ModelResponse
|
from litellm.types.utils import FileTypes, ModelResponse
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
|
||||||
|
@ -42,6 +42,18 @@ class BaseAudioTranscriptionConfig(BaseConfig, ABC):
|
||||||
"""
|
"""
|
||||||
return api_base or ""
|
return api_base or ""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def transform_audio_transcription_request(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
audio_file: FileTypes,
|
||||||
|
optional_params: dict,
|
||||||
|
litellm_params: dict,
|
||||||
|
) -> Union[dict, bytes]:
|
||||||
|
raise NotImplementedError(
|
||||||
|
"AudioTranscriptionConfig needs a request transformation for audio transcription models"
|
||||||
|
)
|
||||||
|
|
||||||
def transform_request(
|
def transform_request(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
@ -19,11 +19,19 @@ class BaseLLMModelInfo(ABC):
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
) -> Optional[ProviderSpecificModelInfo]:
|
) -> Optional[ProviderSpecificModelInfo]:
|
||||||
|
"""
|
||||||
|
Default values all models of this provider support.
|
||||||
|
"""
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_models(self) -> List[str]:
|
def get_models(
|
||||||
pass
|
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||||
|
) -> List[str]:
|
||||||
|
"""
|
||||||
|
Returns a list of models supported by this provider.
|
||||||
|
"""
|
||||||
|
return []
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|
|
@ -1274,13 +1274,6 @@ class AWSEventStreamDecoder:
|
||||||
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
def converse_chunk_parser(self, chunk_data: dict) -> ModelResponseStream:
|
||||||
try:
|
try:
|
||||||
verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data))
|
verbose_logger.debug("\n\nRaw Chunk: {}\n\n".format(chunk_data))
|
||||||
chunk_data["usage"] = {
|
|
||||||
"inputTokens": 3,
|
|
||||||
"outputTokens": 392,
|
|
||||||
"totalTokens": 2191,
|
|
||||||
"cacheReadInputTokens": 1796,
|
|
||||||
"cacheWriteInputTokens": 0,
|
|
||||||
}
|
|
||||||
text = ""
|
text = ""
|
||||||
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
tool_use: Optional[ChatCompletionToolCallChunk] = None
|
||||||
finish_reason = ""
|
finish_reason = ""
|
||||||
|
|
|
@ -5,7 +5,8 @@ from openai.types.image import Image
|
||||||
|
|
||||||
from litellm.types.llms.bedrock import (
|
from litellm.types.llms.bedrock import (
|
||||||
AmazonNovaCanvasTextToImageRequest, AmazonNovaCanvasTextToImageResponse,
|
AmazonNovaCanvasTextToImageRequest, AmazonNovaCanvasTextToImageResponse,
|
||||||
AmazonNovaCanvasTextToImageParams, AmazonNovaCanvasRequestBase,
|
AmazonNovaCanvasTextToImageParams, AmazonNovaCanvasRequestBase, AmazonNovaCanvasColorGuidedGenerationParams,
|
||||||
|
AmazonNovaCanvasColorGuidedRequest,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import ImageResponse
|
from litellm.types.utils import ImageResponse
|
||||||
|
|
||||||
|
@ -69,6 +70,13 @@ class AmazonNovaCanvasConfig:
|
||||||
text_to_image_params = AmazonNovaCanvasTextToImageParams(**text_to_image_params)
|
text_to_image_params = AmazonNovaCanvasTextToImageParams(**text_to_image_params)
|
||||||
return AmazonNovaCanvasTextToImageRequest(textToImageParams=text_to_image_params, taskType=task_type,
|
return AmazonNovaCanvasTextToImageRequest(textToImageParams=text_to_image_params, taskType=task_type,
|
||||||
imageGenerationConfig=image_generation_config)
|
imageGenerationConfig=image_generation_config)
|
||||||
|
if task_type == "COLOR_GUIDED_GENERATION":
|
||||||
|
color_guided_generation_params = image_generation_config.pop("colorGuidedGenerationParams", {})
|
||||||
|
color_guided_generation_params = {"text": text, **color_guided_generation_params}
|
||||||
|
color_guided_generation_params = AmazonNovaCanvasColorGuidedGenerationParams(**color_guided_generation_params)
|
||||||
|
return AmazonNovaCanvasColorGuidedRequest(taskType=task_type,
|
||||||
|
colorGuidedGenerationParams=color_guided_generation_params,
|
||||||
|
imageGenerationConfig=image_generation_config)
|
||||||
raise NotImplementedError(f"Task type {task_type} is not supported")
|
raise NotImplementedError(f"Task type {task_type} is not supported")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import io
|
|
||||||
import json
|
import json
|
||||||
from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union
|
from typing import TYPE_CHECKING, Any, Coroutine, Dict, Optional, Tuple, Union
|
||||||
|
|
||||||
|
@ -8,6 +7,9 @@ import litellm
|
||||||
import litellm.litellm_core_utils
|
import litellm.litellm_core_utils
|
||||||
import litellm.types
|
import litellm.types
|
||||||
import litellm.types.utils
|
import litellm.types.utils
|
||||||
|
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||||
|
BaseAudioTranscriptionConfig,
|
||||||
|
)
|
||||||
from litellm.llms.base_llm.chat.transformation import BaseConfig
|
from litellm.llms.base_llm.chat.transformation import BaseConfig
|
||||||
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
|
from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
|
||||||
from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
|
from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
|
||||||
|
@ -852,54 +854,12 @@ class BaseLLMHTTPHandler:
|
||||||
request_data=request_data,
|
request_data=request_data,
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle_audio_file(self, audio_file: FileTypes) -> bytes:
|
|
||||||
"""
|
|
||||||
Processes the audio file input based on its type and returns the binary data.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
The binary data of the audio file.
|
|
||||||
"""
|
|
||||||
binary_data: bytes # Explicitly declare the type
|
|
||||||
|
|
||||||
# Handle the audio file based on type
|
|
||||||
if isinstance(audio_file, str):
|
|
||||||
# If it's a file path
|
|
||||||
with open(audio_file, "rb") as f:
|
|
||||||
binary_data = f.read() # `f.read()` always returns `bytes`
|
|
||||||
elif isinstance(audio_file, tuple):
|
|
||||||
# Handle tuple case
|
|
||||||
_, file_content = audio_file[:2]
|
|
||||||
if isinstance(file_content, str):
|
|
||||||
with open(file_content, "rb") as f:
|
|
||||||
binary_data = f.read() # `f.read()` always returns `bytes`
|
|
||||||
elif isinstance(file_content, bytes):
|
|
||||||
binary_data = file_content
|
|
||||||
else:
|
|
||||||
raise TypeError(
|
|
||||||
f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes."
|
|
||||||
)
|
|
||||||
elif isinstance(audio_file, bytes):
|
|
||||||
# Assume it's already binary data
|
|
||||||
binary_data = audio_file
|
|
||||||
elif isinstance(audio_file, io.BufferedReader) or isinstance(
|
|
||||||
audio_file, io.BytesIO
|
|
||||||
):
|
|
||||||
# Handle file-like objects
|
|
||||||
binary_data = audio_file.read()
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}")
|
|
||||||
|
|
||||||
return binary_data
|
|
||||||
|
|
||||||
def audio_transcriptions(
|
def audio_transcriptions(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
audio_file: FileTypes,
|
audio_file: FileTypes,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
|
litellm_params: dict,
|
||||||
model_response: TranscriptionResponse,
|
model_response: TranscriptionResponse,
|
||||||
timeout: float,
|
timeout: float,
|
||||||
max_retries: int,
|
max_retries: int,
|
||||||
|
@ -910,11 +870,8 @@ class BaseLLMHTTPHandler:
|
||||||
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
|
||||||
atranscription: bool = False,
|
atranscription: bool = False,
|
||||||
headers: dict = {},
|
headers: dict = {},
|
||||||
litellm_params: dict = {},
|
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
|
||||||
) -> TranscriptionResponse:
|
) -> TranscriptionResponse:
|
||||||
provider_config = ProviderConfigManager.get_provider_audio_transcription_config(
|
|
||||||
model=model, provider=litellm.LlmProviders(custom_llm_provider)
|
|
||||||
)
|
|
||||||
if provider_config is None:
|
if provider_config is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"No provider config found for model: {model} and provider: {custom_llm_provider}"
|
f"No provider config found for model: {model} and provider: {custom_llm_provider}"
|
||||||
|
@ -938,7 +895,18 @@ class BaseLLMHTTPHandler:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle the audio file based on type
|
# Handle the audio file based on type
|
||||||
binary_data = self.handle_audio_file(audio_file)
|
data = provider_config.transform_audio_transcription_request(
|
||||||
|
model=model,
|
||||||
|
audio_file=audio_file,
|
||||||
|
optional_params=optional_params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
|
)
|
||||||
|
binary_data: Optional[bytes] = None
|
||||||
|
json_data: Optional[dict] = None
|
||||||
|
if isinstance(data, bytes):
|
||||||
|
binary_data = data
|
||||||
|
else:
|
||||||
|
json_data = data
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Make the POST request
|
# Make the POST request
|
||||||
|
@ -946,6 +914,7 @@ class BaseLLMHTTPHandler:
|
||||||
url=complete_url,
|
url=complete_url,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
content=binary_data,
|
content=binary_data,
|
||||||
|
json=json_data,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
Translates from OpenAI's `/v1/audio/transcriptions` to Deepgram's `/v1/listen`
|
Translates from OpenAI's `/v1/audio/transcriptions` to Deepgram's `/v1/listen`
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import io
|
||||||
from typing import List, Optional, Union
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
from httpx import Headers, Response
|
from httpx import Headers, Response
|
||||||
|
@ -12,7 +13,7 @@ from litellm.types.llms.openai import (
|
||||||
AllMessageValues,
|
AllMessageValues,
|
||||||
OpenAIAudioTranscriptionOptionalParams,
|
OpenAIAudioTranscriptionOptionalParams,
|
||||||
)
|
)
|
||||||
from litellm.types.utils import TranscriptionResponse
|
from litellm.types.utils import FileTypes, TranscriptionResponse
|
||||||
|
|
||||||
from ...base_llm.audio_transcription.transformation import (
|
from ...base_llm.audio_transcription.transformation import (
|
||||||
BaseAudioTranscriptionConfig,
|
BaseAudioTranscriptionConfig,
|
||||||
|
@ -47,6 +48,55 @@ class DeepgramAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
|
||||||
message=error_message, status_code=status_code, headers=headers
|
message=error_message, status_code=status_code, headers=headers
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def transform_audio_transcription_request(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
audio_file: FileTypes,
|
||||||
|
optional_params: dict,
|
||||||
|
litellm_params: dict,
|
||||||
|
) -> Union[dict, bytes]:
|
||||||
|
"""
|
||||||
|
Processes the audio file input based on its type and returns the binary data.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
audio_file: Can be a file path (str), a tuple (filename, file_content), or binary data (bytes).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The binary data of the audio file.
|
||||||
|
"""
|
||||||
|
binary_data: bytes # Explicitly declare the type
|
||||||
|
|
||||||
|
# Handle the audio file based on type
|
||||||
|
if isinstance(audio_file, str):
|
||||||
|
# If it's a file path
|
||||||
|
with open(audio_file, "rb") as f:
|
||||||
|
binary_data = f.read() # `f.read()` always returns `bytes`
|
||||||
|
elif isinstance(audio_file, tuple):
|
||||||
|
# Handle tuple case
|
||||||
|
_, file_content = audio_file[:2]
|
||||||
|
if isinstance(file_content, str):
|
||||||
|
with open(file_content, "rb") as f:
|
||||||
|
binary_data = f.read() # `f.read()` always returns `bytes`
|
||||||
|
elif isinstance(file_content, bytes):
|
||||||
|
binary_data = file_content
|
||||||
|
else:
|
||||||
|
raise TypeError(
|
||||||
|
f"Unexpected type in tuple: {type(file_content)}. Expected str or bytes."
|
||||||
|
)
|
||||||
|
elif isinstance(audio_file, bytes):
|
||||||
|
# Assume it's already binary data
|
||||||
|
binary_data = audio_file
|
||||||
|
elif isinstance(audio_file, io.BufferedReader) or isinstance(
|
||||||
|
audio_file, io.BytesIO
|
||||||
|
):
|
||||||
|
# Handle file-like objects
|
||||||
|
binary_data = audio_file.read()
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeError(f"Unsupported type for audio_file: {type(audio_file)}")
|
||||||
|
|
||||||
|
return binary_data
|
||||||
|
|
||||||
def transform_audio_transcription_response(
|
def transform_audio_transcription_response(
|
||||||
self,
|
self,
|
||||||
model: str,
|
model: str,
|
||||||
|
|
|
@ -2,27 +2,16 @@ from typing import List
|
||||||
|
|
||||||
from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
|
from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
|
||||||
|
|
||||||
from ...base_llm.audio_transcription.transformation import BaseAudioTranscriptionConfig
|
from ...openai.transcriptions.whisper_transformation import (
|
||||||
|
OpenAIWhisperAudioTranscriptionConfig,
|
||||||
|
)
|
||||||
from ..common_utils import FireworksAIMixin
|
from ..common_utils import FireworksAIMixin
|
||||||
|
|
||||||
|
|
||||||
class FireworksAIAudioTranscriptionConfig(
|
class FireworksAIAudioTranscriptionConfig(
|
||||||
FireworksAIMixin, BaseAudioTranscriptionConfig
|
FireworksAIMixin, OpenAIWhisperAudioTranscriptionConfig
|
||||||
):
|
):
|
||||||
def get_supported_openai_params(
|
def get_supported_openai_params(
|
||||||
self, model: str
|
self, model: str
|
||||||
) -> List[OpenAIAudioTranscriptionOptionalParams]:
|
) -> List[OpenAIAudioTranscriptionOptionalParams]:
|
||||||
return ["language", "prompt", "response_format", "timestamp_granularities"]
|
return ["language", "prompt", "response_format", "timestamp_granularities"]
|
||||||
|
|
||||||
def map_openai_params(
|
|
||||||
self,
|
|
||||||
non_default_params: dict,
|
|
||||||
optional_params: dict,
|
|
||||||
model: str,
|
|
||||||
drop_params: bool,
|
|
||||||
) -> dict:
|
|
||||||
supported_params = self.get_supported_openai_params(model)
|
|
||||||
for k, v in non_default_params.items():
|
|
||||||
if k in supported_params:
|
|
||||||
optional_params[k] = v
|
|
||||||
return optional_params
|
|
||||||
|
|
52
litellm/llms/gemini/common_utils.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
|
||||||
|
from litellm.secret_managers.main import get_secret_str
|
||||||
|
|
||||||
|
|
||||||
|
class GeminiModelInfo(BaseLLMModelInfo):
|
||||||
|
@staticmethod
|
||||||
|
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
|
||||||
|
return (
|
||||||
|
api_base
|
||||||
|
or get_secret_str("GEMINI_API_BASE")
|
||||||
|
or "https://generativelanguage.googleapis.com/v1beta"
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
|
||||||
|
return api_key or (get_secret_str("GEMINI_API_KEY"))
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_base_model(model: str) -> Optional[str]:
|
||||||
|
return model.replace("gemini/", "")
|
||||||
|
|
||||||
|
def get_models(
|
||||||
|
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||||
|
) -> List[str]:
|
||||||
|
|
||||||
|
api_base = GeminiModelInfo.get_api_base(api_base)
|
||||||
|
api_key = GeminiModelInfo.get_api_key(api_key)
|
||||||
|
if api_base is None or api_key is None:
|
||||||
|
raise ValueError(
|
||||||
|
"GEMINI_API_BASE or GEMINI_API_KEY is not set. Please set the environment variable, to query Gemini's `/models` endpoint."
|
||||||
|
)
|
||||||
|
|
||||||
|
response = litellm.module_level_client.get(
|
||||||
|
url=f"{api_base}/models?key={api_key}",
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise ValueError(
|
||||||
|
f"Failed to fetch models from Gemini. Status code: {response.status_code}, Response: {response.json()}"
|
||||||
|
)
|
||||||
|
|
||||||
|
models = response.json()["models"]
|
||||||
|
|
||||||
|
litellm_model_names = []
|
||||||
|
for model in models:
|
||||||
|
stripped_model_name = model["name"].strip("models/")
|
||||||
|
litellm_model_name = "gemini/" + stripped_model_name
|
||||||
|
litellm_model_names.append(litellm_model_name)
|
||||||
|
return litellm_model_names
|
|
@ -80,6 +80,7 @@ class MistralConfig(OpenAIGPTConfig):
|
||||||
"temperature",
|
"temperature",
|
||||||
"top_p",
|
"top_p",
|
||||||
"max_tokens",
|
"max_tokens",
|
||||||
|
"max_completion_tokens",
|
||||||
"tools",
|
"tools",
|
||||||
"tool_choice",
|
"tool_choice",
|
||||||
"seed",
|
"seed",
|
||||||
|
@ -105,6 +106,10 @@ class MistralConfig(OpenAIGPTConfig):
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
if param == "max_tokens":
|
if param == "max_tokens":
|
||||||
optional_params["max_tokens"] = value
|
optional_params["max_tokens"] = value
|
||||||
|
if (
|
||||||
|
param == "max_completion_tokens"
|
||||||
|
): # max_completion_tokens should take priority
|
||||||
|
optional_params["max_tokens"] = value
|
||||||
if param == "tools":
|
if param == "tools":
|
||||||
optional_params["tools"] = value
|
optional_params["tools"] = value
|
||||||
if param == "stream" and value is True:
|
if param == "stream" and value is True:
|
||||||
|
|
|
@ -6,6 +6,7 @@ Helper util for handling openai-specific cost calculation
|
||||||
from typing import Literal, Optional, Tuple
|
from typing import Literal, Optional, Tuple
|
||||||
|
|
||||||
from litellm._logging import verbose_logger
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
|
||||||
from litellm.types.utils import CallTypes, Usage
|
from litellm.types.utils import CallTypes, Usage
|
||||||
from litellm.utils import get_model_info
|
from litellm.utils import get_model_info
|
||||||
|
|
||||||
|
@ -28,52 +29,53 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
||||||
Returns:
|
Returns:
|
||||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||||
"""
|
"""
|
||||||
## GET MODEL INFO
|
|
||||||
model_info = get_model_info(model=model, custom_llm_provider="openai")
|
|
||||||
## CALCULATE INPUT COST
|
## CALCULATE INPUT COST
|
||||||
### Non-cached text tokens
|
return generic_cost_per_token(
|
||||||
non_cached_text_tokens = usage.prompt_tokens
|
model=model, usage=usage, custom_llm_provider="openai"
|
||||||
cached_tokens: Optional[int] = None
|
|
||||||
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
|
|
||||||
cached_tokens = usage.prompt_tokens_details.cached_tokens
|
|
||||||
non_cached_text_tokens = non_cached_text_tokens - cached_tokens
|
|
||||||
prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
|
|
||||||
## Prompt Caching cost calculation
|
|
||||||
if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
|
|
||||||
# Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
|
|
||||||
prompt_cost += cached_tokens * (
|
|
||||||
model_info.get("cache_read_input_token_cost", 0) or 0
|
|
||||||
)
|
)
|
||||||
|
# ### Non-cached text tokens
|
||||||
|
# non_cached_text_tokens = usage.prompt_tokens
|
||||||
|
# cached_tokens: Optional[int] = None
|
||||||
|
# if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
|
||||||
|
# cached_tokens = usage.prompt_tokens_details.cached_tokens
|
||||||
|
# non_cached_text_tokens = non_cached_text_tokens - cached_tokens
|
||||||
|
# prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
|
||||||
|
# ## Prompt Caching cost calculation
|
||||||
|
# if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
|
||||||
|
# # Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
|
||||||
|
# prompt_cost += cached_tokens * (
|
||||||
|
# model_info.get("cache_read_input_token_cost", 0) or 0
|
||||||
|
# )
|
||||||
|
|
||||||
_audio_tokens: Optional[int] = (
|
# _audio_tokens: Optional[int] = (
|
||||||
usage.prompt_tokens_details.audio_tokens
|
# usage.prompt_tokens_details.audio_tokens
|
||||||
if usage.prompt_tokens_details is not None
|
# if usage.prompt_tokens_details is not None
|
||||||
else None
|
# else None
|
||||||
)
|
# )
|
||||||
_audio_cost_per_token: Optional[float] = model_info.get(
|
# _audio_cost_per_token: Optional[float] = model_info.get(
|
||||||
"input_cost_per_audio_token"
|
# "input_cost_per_audio_token"
|
||||||
)
|
# )
|
||||||
if _audio_tokens is not None and _audio_cost_per_token is not None:
|
# if _audio_tokens is not None and _audio_cost_per_token is not None:
|
||||||
audio_cost: float = _audio_tokens * _audio_cost_per_token
|
# audio_cost: float = _audio_tokens * _audio_cost_per_token
|
||||||
prompt_cost += audio_cost
|
# prompt_cost += audio_cost
|
||||||
|
|
||||||
## CALCULATE OUTPUT COST
|
# ## CALCULATE OUTPUT COST
|
||||||
completion_cost: float = (
|
# completion_cost: float = (
|
||||||
usage["completion_tokens"] * model_info["output_cost_per_token"]
|
# usage["completion_tokens"] * model_info["output_cost_per_token"]
|
||||||
)
|
# )
|
||||||
_output_cost_per_audio_token: Optional[float] = model_info.get(
|
# _output_cost_per_audio_token: Optional[float] = model_info.get(
|
||||||
"output_cost_per_audio_token"
|
# "output_cost_per_audio_token"
|
||||||
)
|
# )
|
||||||
_output_audio_tokens: Optional[int] = (
|
# _output_audio_tokens: Optional[int] = (
|
||||||
usage.completion_tokens_details.audio_tokens
|
# usage.completion_tokens_details.audio_tokens
|
||||||
if usage.completion_tokens_details is not None
|
# if usage.completion_tokens_details is not None
|
||||||
else None
|
# else None
|
||||||
)
|
# )
|
||||||
if _output_cost_per_audio_token is not None and _output_audio_tokens is not None:
|
# if _output_cost_per_audio_token is not None and _output_audio_tokens is not None:
|
||||||
audio_cost = _output_audio_tokens * _output_cost_per_audio_token
|
# audio_cost = _output_audio_tokens * _output_cost_per_audio_token
|
||||||
completion_cost += audio_cost
|
# completion_cost += audio_cost
|
||||||
|
|
||||||
return prompt_cost, completion_cost
|
# return prompt_cost, completion_cost
|
||||||
|
|
||||||
|
|
||||||
def cost_per_second(
|
def cost_per_second(
|
||||||
|
|
34
litellm/llms/openai/transcriptions/gpt_transformation.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from litellm.types.llms.openai import OpenAIAudioTranscriptionOptionalParams
|
||||||
|
from litellm.types.utils import FileTypes
|
||||||
|
|
||||||
|
from .whisper_transformation import OpenAIWhisperAudioTranscriptionConfig
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIGPTAudioTranscriptionConfig(OpenAIWhisperAudioTranscriptionConfig):
|
||||||
|
def get_supported_openai_params(
|
||||||
|
self, model: str
|
||||||
|
) -> List[OpenAIAudioTranscriptionOptionalParams]:
|
||||||
|
"""
|
||||||
|
Get the supported OpenAI params for the `gpt-4o-transcribe` models
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
"language",
|
||||||
|
"prompt",
|
||||||
|
"response_format",
|
||||||
|
"temperature",
|
||||||
|
"include",
|
||||||
|
]
|
||||||
|
|
||||||
|
def transform_audio_transcription_request(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
audio_file: FileTypes,
|
||||||
|
optional_params: dict,
|
||||||
|
litellm_params: dict,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Transform the audio transcription request
|
||||||
|
"""
|
||||||
|
return {"model": model, "file": audio_file, **optional_params}
|
|
@ -7,6 +7,9 @@ from pydantic import BaseModel
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
|
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_name
|
||||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||||
|
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||||
|
BaseAudioTranscriptionConfig,
|
||||||
|
)
|
||||||
from litellm.types.utils import FileTypes
|
from litellm.types.utils import FileTypes
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
TranscriptionResponse,
|
TranscriptionResponse,
|
||||||
|
@ -75,6 +78,7 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
|
||||||
model: str,
|
model: str,
|
||||||
audio_file: FileTypes,
|
audio_file: FileTypes,
|
||||||
optional_params: dict,
|
optional_params: dict,
|
||||||
|
litellm_params: dict,
|
||||||
model_response: TranscriptionResponse,
|
model_response: TranscriptionResponse,
|
||||||
timeout: float,
|
timeout: float,
|
||||||
max_retries: int,
|
max_retries: int,
|
||||||
|
@ -83,16 +87,24 @@ class OpenAIAudioTranscription(OpenAIChatCompletion):
|
||||||
api_base: Optional[str],
|
api_base: Optional[str],
|
||||||
client=None,
|
client=None,
|
||||||
atranscription: bool = False,
|
atranscription: bool = False,
|
||||||
|
provider_config: Optional[BaseAudioTranscriptionConfig] = None,
|
||||||
) -> TranscriptionResponse:
|
) -> TranscriptionResponse:
|
||||||
data = {"model": model, "file": audio_file, **optional_params}
|
"""
|
||||||
|
Handle audio transcription request
|
||||||
if "response_format" not in data or (
|
"""
|
||||||
data["response_format"] == "text" or data["response_format"] == "json"
|
if provider_config is not None:
|
||||||
):
|
data = provider_config.transform_audio_transcription_request(
|
||||||
data["response_format"] = (
|
model=model,
|
||||||
"verbose_json" # ensures 'duration' is received - used for cost calculation
|
audio_file=audio_file,
|
||||||
|
optional_params=optional_params,
|
||||||
|
litellm_params=litellm_params,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if isinstance(data, bytes):
|
||||||
|
raise ValueError("OpenAI transformation route requires a dict")
|
||||||
|
else:
|
||||||
|
data = {"model": model, "file": audio_file, **optional_params}
|
||||||
|
|
||||||
if atranscription is True:
|
if atranscription is True:
|
||||||
return self.async_audio_transcriptions( # type: ignore
|
return self.async_audio_transcriptions( # type: ignore
|
||||||
audio_file=audio_file,
|
audio_file=audio_file,
|
||||||
|
|
97
litellm/llms/openai/transcriptions/whisper_transformation.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
|
from httpx import Headers
|
||||||
|
|
||||||
|
from litellm.llms.base_llm.audio_transcription.transformation import (
|
||||||
|
BaseAudioTranscriptionConfig,
|
||||||
|
)
|
||||||
|
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||||
|
from litellm.secret_managers.main import get_secret_str
|
||||||
|
from litellm.types.llms.openai import (
|
||||||
|
AllMessageValues,
|
||||||
|
OpenAIAudioTranscriptionOptionalParams,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import FileTypes
|
||||||
|
|
||||||
|
from ..common_utils import OpenAIError
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIWhisperAudioTranscriptionConfig(BaseAudioTranscriptionConfig):
|
||||||
|
def get_supported_openai_params(
|
||||||
|
self, model: str
|
||||||
|
) -> List[OpenAIAudioTranscriptionOptionalParams]:
|
||||||
|
"""
|
||||||
|
Get the supported OpenAI params for the `whisper-1` models
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
"language",
|
||||||
|
"prompt",
|
||||||
|
"response_format",
|
||||||
|
"temperature",
|
||||||
|
"timestamp_granularities",
|
||||||
|
]
|
||||||
|
|
||||||
|
def map_openai_params(
|
||||||
|
self,
|
||||||
|
non_default_params: dict,
|
||||||
|
optional_params: dict,
|
||||||
|
model: str,
|
||||||
|
drop_params: bool,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Map the OpenAI params to the Whisper params
|
||||||
|
"""
|
||||||
|
supported_params = self.get_supported_openai_params(model)
|
||||||
|
for k, v in non_default_params.items():
|
||||||
|
if k in supported_params:
|
||||||
|
optional_params[k] = v
|
||||||
|
return optional_params
|
||||||
|
|
||||||
|
def validate_environment(
|
||||||
|
self,
|
||||||
|
headers: dict,
|
||||||
|
model: str,
|
||||||
|
messages: List[AllMessageValues],
|
||||||
|
optional_params: dict,
|
||||||
|
api_key: Optional[str] = None,
|
||||||
|
api_base: Optional[str] = None,
|
||||||
|
) -> dict:
|
||||||
|
api_key = api_key or get_secret_str("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
auth_header = {
|
||||||
|
"Authorization": f"Bearer {api_key}",
|
||||||
|
}
|
||||||
|
|
||||||
|
headers.update(auth_header)
|
||||||
|
return headers
|
||||||
|
|
||||||
|
def transform_audio_transcription_request(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
audio_file: FileTypes,
|
||||||
|
optional_params: dict,
|
||||||
|
litellm_params: dict,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Transform the audio transcription request
|
||||||
|
"""
|
||||||
|
|
||||||
|
data = {"model": model, "file": audio_file, **optional_params}
|
||||||
|
|
||||||
|
if "response_format" not in data or (
|
||||||
|
data["response_format"] == "text" or data["response_format"] == "json"
|
||||||
|
):
|
||||||
|
data["response_format"] = (
|
||||||
|
"verbose_json" # ensures 'duration' is received - used for cost calculation
|
||||||
|
)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def get_error_class(
|
||||||
|
self, error_message: str, status_code: int, headers: Union[dict, Headers]
|
||||||
|
) -> BaseLLMException:
|
||||||
|
return OpenAIError(
|
||||||
|
status_code=status_code,
|
||||||
|
message=error_message,
|
||||||
|
headers=headers,
|
||||||
|
)
|
|
@ -11,7 +11,9 @@ class TopazException(BaseLLMException):
|
||||||
|
|
||||||
|
|
||||||
class TopazModelInfo(BaseLLMModelInfo):
|
class TopazModelInfo(BaseLLMModelInfo):
|
||||||
def get_models(self) -> List[str]:
|
def get_models(
|
||||||
|
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||||
|
) -> List[str]:
|
||||||
return [
|
return [
|
||||||
"topaz/Standard V2",
|
"topaz/Standard V2",
|
||||||
"topaz/Low Resolution V2",
|
"topaz/Low Resolution V2",
|
||||||
|
|
|
@ -3,6 +3,7 @@ from typing import Dict, List, Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
import litellm
|
||||||
from litellm import supports_response_schema, supports_system_messages, verbose_logger
|
from litellm import supports_response_schema, supports_system_messages, verbose_logger
|
||||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||||
from litellm.types.llms.vertex_ai import PartType
|
from litellm.types.llms.vertex_ai import PartType
|
||||||
|
@ -28,6 +29,10 @@ def get_supports_system_message(
|
||||||
supports_system_message = supports_system_messages(
|
supports_system_message = supports_system_messages(
|
||||||
model=model, custom_llm_provider=_custom_llm_provider
|
model=model, custom_llm_provider=_custom_llm_provider
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Vertex Models called in the `/gemini` request/response format also support system messages
|
||||||
|
if litellm.VertexGeminiConfig._is_model_gemini_spec_model(model):
|
||||||
|
supports_system_message = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_logger.warning(
|
verbose_logger.warning(
|
||||||
"Unable to identify if system message supported. Defaulting to 'False'. Received error message - {}\nAdd it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
|
"Unable to identify if system message supported. Defaulting to 'False'. Received error message - {}\nAdd it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
|
||||||
|
@ -55,7 +60,9 @@ def get_supports_response_schema(
|
||||||
|
|
||||||
from typing import Literal, Optional
|
from typing import Literal, Optional
|
||||||
|
|
||||||
all_gemini_url_modes = Literal["chat", "embedding", "batch_embedding"]
|
all_gemini_url_modes = Literal[
|
||||||
|
"chat", "embedding", "batch_embedding", "image_generation"
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def _get_vertex_url(
|
def _get_vertex_url(
|
||||||
|
@ -68,6 +75,8 @@ def _get_vertex_url(
|
||||||
) -> Tuple[str, str]:
|
) -> Tuple[str, str]:
|
||||||
url: Optional[str] = None
|
url: Optional[str] = None
|
||||||
endpoint: Optional[str] = None
|
endpoint: Optional[str] = None
|
||||||
|
|
||||||
|
model = litellm.VertexGeminiConfig.get_model_for_vertex_ai_url(model=model)
|
||||||
if mode == "chat":
|
if mode == "chat":
|
||||||
### SET RUNTIME ENDPOINT ###
|
### SET RUNTIME ENDPOINT ###
|
||||||
endpoint = "generateContent"
|
endpoint = "generateContent"
|
||||||
|
@ -91,7 +100,11 @@ def _get_vertex_url(
|
||||||
if model.isdigit():
|
if model.isdigit():
|
||||||
# https://us-central1-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/us-central1/endpoints/$ENDPOINT_ID:predict
|
# https://us-central1-aiplatform.googleapis.com/v1/projects/$PROJECT_ID/locations/us-central1/endpoints/$ENDPOINT_ID:predict
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/{vertex_api_version}/projects/{vertex_project}/locations/{vertex_location}/endpoints/{model}:{endpoint}"
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/{vertex_api_version}/projects/{vertex_project}/locations/{vertex_location}/endpoints/{model}:{endpoint}"
|
||||||
|
elif mode == "image_generation":
|
||||||
|
endpoint = "predict"
|
||||||
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:{endpoint}"
|
||||||
|
if model.isdigit():
|
||||||
|
url = f"https://{vertex_location}-aiplatform.googleapis.com/{vertex_api_version}/projects/{vertex_project}/locations/{vertex_location}/endpoints/{model}:{endpoint}"
|
||||||
if not url or not endpoint:
|
if not url or not endpoint:
|
||||||
raise ValueError(f"Unable to get vertex url/endpoint for mode: {mode}")
|
raise ValueError(f"Unable to get vertex url/endpoint for mode: {mode}")
|
||||||
return url, endpoint
|
return url, endpoint
|
||||||
|
@ -127,6 +140,10 @@ def _get_gemini_url(
|
||||||
url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
|
url = "https://generativelanguage.googleapis.com/v1beta/{}:{}?key={}".format(
|
||||||
_gemini_model_name, endpoint, gemini_api_key
|
_gemini_model_name, endpoint, gemini_api_key
|
||||||
)
|
)
|
||||||
|
elif mode == "image_generation":
|
||||||
|
raise ValueError(
|
||||||
|
"LiteLLM's `gemini/` route does not support image generation yet. Let us know if you need this feature by opening an issue at https://github.com/BerriAI/litellm/issues"
|
||||||
|
)
|
||||||
|
|
||||||
return url, endpoint
|
return url, endpoint
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,11 @@ from typing import Literal, Optional, Tuple, Union
|
||||||
|
|
||||||
import litellm
|
import litellm
|
||||||
from litellm import verbose_logger
|
from litellm import verbose_logger
|
||||||
from litellm.litellm_core_utils.llm_cost_calc.utils import _is_above_128k
|
from litellm.litellm_core_utils.llm_cost_calc.utils import (
|
||||||
|
_is_above_128k,
|
||||||
|
generic_cost_per_token,
|
||||||
|
)
|
||||||
|
from litellm.types.utils import ModelInfo, Usage
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Gemini pricing covers:
|
Gemini pricing covers:
|
||||||
|
@ -20,7 +24,7 @@ Vertex AI -> character based pricing
|
||||||
Google AI Studio -> token based pricing
|
Google AI Studio -> token based pricing
|
||||||
"""
|
"""
|
||||||
|
|
||||||
models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]
|
models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro", "gemini-2"]
|
||||||
|
|
||||||
|
|
||||||
def cost_router(
|
def cost_router(
|
||||||
|
@ -46,14 +50,15 @@ def cost_router(
|
||||||
call_type == "embedding" or call_type == "aembedding"
|
call_type == "embedding" or call_type == "aembedding"
|
||||||
):
|
):
|
||||||
return "cost_per_token"
|
return "cost_per_token"
|
||||||
|
elif custom_llm_provider == "vertex_ai" and ("gemini-2" in model):
|
||||||
|
return "cost_per_token"
|
||||||
return "cost_per_character"
|
return "cost_per_character"
|
||||||
|
|
||||||
|
|
||||||
def cost_per_character(
|
def cost_per_character(
|
||||||
model: str,
|
model: str,
|
||||||
custom_llm_provider: str,
|
custom_llm_provider: str,
|
||||||
prompt_tokens: float,
|
usage: Usage,
|
||||||
completion_tokens: float,
|
|
||||||
prompt_characters: Optional[float] = None,
|
prompt_characters: Optional[float] = None,
|
||||||
completion_characters: Optional[float] = None,
|
completion_characters: Optional[float] = None,
|
||||||
) -> Tuple[float, float]:
|
) -> Tuple[float, float]:
|
||||||
|
@ -86,8 +91,7 @@ def cost_per_character(
|
||||||
prompt_cost, _ = cost_per_token(
|
prompt_cost, _ = cost_per_token(
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_tokens=prompt_tokens,
|
usage=usage,
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
@ -124,8 +128,7 @@ def cost_per_character(
|
||||||
prompt_cost, _ = cost_per_token(
|
prompt_cost, _ = cost_per_token(
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_tokens=prompt_tokens,
|
usage=usage,
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
## CALCULATE OUTPUT COST
|
## CALCULATE OUTPUT COST
|
||||||
|
@ -133,10 +136,10 @@ def cost_per_character(
|
||||||
_, completion_cost = cost_per_token(
|
_, completion_cost = cost_per_token(
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_tokens=prompt_tokens,
|
usage=usage,
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
completion_tokens = usage.completion_tokens
|
||||||
try:
|
try:
|
||||||
if (
|
if (
|
||||||
_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
|
_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
|
||||||
|
@ -172,18 +175,54 @@ def cost_per_character(
|
||||||
_, completion_cost = cost_per_token(
|
_, completion_cost = cost_per_token(
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider=custom_llm_provider,
|
custom_llm_provider=custom_llm_provider,
|
||||||
prompt_tokens=prompt_tokens,
|
usage=usage,
|
||||||
completion_tokens=completion_tokens,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return prompt_cost, completion_cost
|
return prompt_cost, completion_cost
|
||||||
|
|
||||||
|
|
||||||
|
def _handle_128k_pricing(
|
||||||
|
model_info: ModelInfo,
|
||||||
|
usage: Usage,
|
||||||
|
) -> Tuple[float, float]:
|
||||||
|
## CALCULATE INPUT COST
|
||||||
|
input_cost_per_token_above_128k_tokens = model_info.get(
|
||||||
|
"input_cost_per_token_above_128k_tokens"
|
||||||
|
)
|
||||||
|
output_cost_per_token_above_128k_tokens = model_info.get(
|
||||||
|
"output_cost_per_token_above_128k_tokens"
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt_tokens = usage.prompt_tokens
|
||||||
|
completion_tokens = usage.completion_tokens
|
||||||
|
|
||||||
|
if (
|
||||||
|
_is_above_128k(tokens=prompt_tokens)
|
||||||
|
and input_cost_per_token_above_128k_tokens is not None
|
||||||
|
):
|
||||||
|
prompt_cost = prompt_tokens * input_cost_per_token_above_128k_tokens
|
||||||
|
else:
|
||||||
|
prompt_cost = prompt_tokens * model_info["input_cost_per_token"]
|
||||||
|
|
||||||
|
## CALCULATE OUTPUT COST
|
||||||
|
output_cost_per_token_above_128k_tokens = model_info.get(
|
||||||
|
"output_cost_per_token_above_128k_tokens"
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
_is_above_128k(tokens=completion_tokens)
|
||||||
|
and output_cost_per_token_above_128k_tokens is not None
|
||||||
|
):
|
||||||
|
completion_cost = completion_tokens * output_cost_per_token_above_128k_tokens
|
||||||
|
else:
|
||||||
|
completion_cost = completion_tokens * model_info["output_cost_per_token"]
|
||||||
|
|
||||||
|
return prompt_cost, completion_cost
|
||||||
|
|
||||||
|
|
||||||
def cost_per_token(
|
def cost_per_token(
|
||||||
model: str,
|
model: str,
|
||||||
custom_llm_provider: str,
|
custom_llm_provider: str,
|
||||||
prompt_tokens: float,
|
usage: Usage,
|
||||||
completion_tokens: float,
|
|
||||||
) -> Tuple[float, float]:
|
) -> Tuple[float, float]:
|
||||||
"""
|
"""
|
||||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||||
|
@ -205,38 +244,24 @@ def cost_per_token(
|
||||||
model=model, custom_llm_provider=custom_llm_provider
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
)
|
)
|
||||||
|
|
||||||
## CALCULATE INPUT COST
|
## HANDLE 128k+ PRICING
|
||||||
|
input_cost_per_token_above_128k_tokens = model_info.get(
|
||||||
|
"input_cost_per_token_above_128k_tokens"
|
||||||
|
)
|
||||||
|
output_cost_per_token_above_128k_tokens = model_info.get(
|
||||||
|
"output_cost_per_token_above_128k_tokens"
|
||||||
|
)
|
||||||
if (
|
if (
|
||||||
_is_above_128k(tokens=prompt_tokens)
|
input_cost_per_token_above_128k_tokens is not None
|
||||||
and model not in models_without_dynamic_pricing
|
or output_cost_per_token_above_128k_tokens is not None
|
||||||
):
|
):
|
||||||
assert (
|
return _handle_128k_pricing(
|
||||||
"input_cost_per_token_above_128k_tokens" in model_info
|
model_info=model_info,
|
||||||
and model_info["input_cost_per_token_above_128k_tokens"] is not None
|
usage=usage,
|
||||||
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
|
||||||
model, model_info
|
|
||||||
)
|
)
|
||||||
prompt_cost = (
|
|
||||||
prompt_tokens * model_info["input_cost_per_token_above_128k_tokens"]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
prompt_cost = prompt_tokens * model_info["input_cost_per_token"]
|
|
||||||
|
|
||||||
## CALCULATE OUTPUT COST
|
return generic_cost_per_token(
|
||||||
if (
|
model=model,
|
||||||
_is_above_128k(tokens=completion_tokens)
|
custom_llm_provider=custom_llm_provider,
|
||||||
and model not in models_without_dynamic_pricing
|
usage=usage,
|
||||||
):
|
|
||||||
assert (
|
|
||||||
"output_cost_per_token_above_128k_tokens" in model_info
|
|
||||||
and model_info["output_cost_per_token_above_128k_tokens"] is not None
|
|
||||||
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
|
|
||||||
model, model_info
|
|
||||||
)
|
)
|
||||||
completion_cost = (
|
|
||||||
completion_tokens * model_info["output_cost_per_token_above_128k_tokens"]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
completion_cost = completion_tokens * model_info["output_cost_per_token"]
|
|
||||||
|
|
||||||
return prompt_cost, completion_cost
|
|
||||||
|
|
|
@ -207,6 +207,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
"extra_headers",
|
"extra_headers",
|
||||||
"seed",
|
"seed",
|
||||||
"logprobs",
|
"logprobs",
|
||||||
|
"top_logprobs", # Added this to list of supported openAI params
|
||||||
]
|
]
|
||||||
|
|
||||||
def map_tool_choice_values(
|
def map_tool_choice_values(
|
||||||
|
@ -365,6 +366,8 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
optional_params["presence_penalty"] = value
|
optional_params["presence_penalty"] = value
|
||||||
if param == "logprobs":
|
if param == "logprobs":
|
||||||
optional_params["responseLogprobs"] = value
|
optional_params["responseLogprobs"] = value
|
||||||
|
if param == "top_logprobs":
|
||||||
|
optional_params["logprobs"] = value
|
||||||
if (param == "tools" or param == "functions") and isinstance(value, list):
|
if (param == "tools" or param == "functions") and isinstance(value, list):
|
||||||
optional_params["tools"] = self._map_function(value=value)
|
optional_params["tools"] = self._map_function(value=value)
|
||||||
optional_params["litellm_param_is_function_call"] = (
|
optional_params["litellm_param_is_function_call"] = (
|
||||||
|
@ -416,6 +419,49 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
"europe-west9",
|
"europe-west9",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_model_for_vertex_ai_url(model: str) -> str:
|
||||||
|
"""
|
||||||
|
Returns the model name to use in the request to Vertex AI
|
||||||
|
|
||||||
|
Handles 2 cases:
|
||||||
|
1. User passed `model="vertex_ai/gemini/ft-uuid"`, we need to return `ft-uuid` for the request to Vertex AI
|
||||||
|
2. User passed `model="vertex_ai/gemini-2.0-flash-001"`, we need to return `gemini-2.0-flash-001` for the request to Vertex AI
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model (str): The model name to use in the request to Vertex AI
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The model name to use in the request to Vertex AI
|
||||||
|
"""
|
||||||
|
if VertexGeminiConfig._is_model_gemini_spec_model(model):
|
||||||
|
return VertexGeminiConfig._get_model_name_from_gemini_spec_model(model)
|
||||||
|
return model
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _is_model_gemini_spec_model(model: Optional[str]) -> bool:
|
||||||
|
"""
|
||||||
|
Returns true if user is trying to call custom model in `/gemini` request/response format
|
||||||
|
"""
|
||||||
|
if model is None:
|
||||||
|
return False
|
||||||
|
if "gemini/" in model:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_model_name_from_gemini_spec_model(model: str) -> str:
|
||||||
|
"""
|
||||||
|
Returns the model name if model="vertex_ai/gemini/<unique_id>"
|
||||||
|
|
||||||
|
Example:
|
||||||
|
- model = "gemini/1234567890"
|
||||||
|
- returns "1234567890"
|
||||||
|
"""
|
||||||
|
if "gemini/" in model:
|
||||||
|
return model.split("/")[-1]
|
||||||
|
return model
|
||||||
|
|
||||||
def get_flagged_finish_reasons(self) -> Dict[str, str]:
|
def get_flagged_finish_reasons(self) -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Return Dictionary of finish reasons which indicate response was flagged
|
Return Dictionary of finish reasons which indicate response was flagged
|
||||||
|
@ -597,15 +643,24 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
completion_response: GenerateContentResponseBody,
|
completion_response: GenerateContentResponseBody,
|
||||||
) -> Usage:
|
) -> Usage:
|
||||||
cached_tokens: Optional[int] = None
|
cached_tokens: Optional[int] = None
|
||||||
|
audio_tokens: Optional[int] = None
|
||||||
|
text_tokens: Optional[int] = None
|
||||||
prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
|
prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None
|
||||||
if "cachedContentTokenCount" in completion_response["usageMetadata"]:
|
if "cachedContentTokenCount" in completion_response["usageMetadata"]:
|
||||||
cached_tokens = completion_response["usageMetadata"][
|
cached_tokens = completion_response["usageMetadata"][
|
||||||
"cachedContentTokenCount"
|
"cachedContentTokenCount"
|
||||||
]
|
]
|
||||||
|
if "promptTokensDetails" in completion_response["usageMetadata"]:
|
||||||
|
for detail in completion_response["usageMetadata"]["promptTokensDetails"]:
|
||||||
|
if detail["modality"] == "AUDIO":
|
||||||
|
audio_tokens = detail["tokenCount"]
|
||||||
|
elif detail["modality"] == "TEXT":
|
||||||
|
text_tokens = detail["tokenCount"]
|
||||||
|
|
||||||
if cached_tokens is not None:
|
|
||||||
prompt_tokens_details = PromptTokensDetailsWrapper(
|
prompt_tokens_details = PromptTokensDetailsWrapper(
|
||||||
cached_tokens=cached_tokens,
|
cached_tokens=cached_tokens,
|
||||||
|
audio_tokens=audio_tokens,
|
||||||
|
text_tokens=text_tokens,
|
||||||
)
|
)
|
||||||
## GET USAGE ##
|
## GET USAGE ##
|
||||||
usage = Usage(
|
usage = Usage(
|
||||||
|
@ -745,6 +800,7 @@ class VertexGeminiConfig(VertexAIBaseConfig, BaseConfig):
|
||||||
model_response.choices.append(choice)
|
model_response.choices.append(choice)
|
||||||
|
|
||||||
usage = self._calculate_usage(completion_response=completion_response)
|
usage = self._calculate_usage(completion_response=completion_response)
|
||||||
|
|
||||||
setattr(model_response, "usage", usage)
|
setattr(model_response, "usage", usage)
|
||||||
|
|
||||||
## ADD GROUNDING METADATA ##
|
## ADD GROUNDING METADATA ##
|
||||||
|
|
|
@ -43,22 +43,23 @@ class VertexImageGeneration(VertexLLM):
|
||||||
def image_generation(
|
def image_generation(
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
|
api_base: Optional[str],
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
model_response: ImageResponse,
|
model_response: ImageResponse,
|
||||||
logging_obj: Any,
|
logging_obj: Any,
|
||||||
model: Optional[
|
model: str = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
||||||
str
|
|
||||||
] = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
|
||||||
client: Optional[Any] = None,
|
client: Optional[Any] = None,
|
||||||
optional_params: Optional[dict] = None,
|
optional_params: Optional[dict] = None,
|
||||||
timeout: Optional[int] = None,
|
timeout: Optional[int] = None,
|
||||||
aimg_generation=False,
|
aimg_generation=False,
|
||||||
|
extra_headers: Optional[dict] = None,
|
||||||
) -> ImageResponse:
|
) -> ImageResponse:
|
||||||
if aimg_generation is True:
|
if aimg_generation is True:
|
||||||
return self.aimage_generation( # type: ignore
|
return self.aimage_generation( # type: ignore
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
|
api_base=api_base,
|
||||||
vertex_project=vertex_project,
|
vertex_project=vertex_project,
|
||||||
vertex_location=vertex_location,
|
vertex_location=vertex_location,
|
||||||
vertex_credentials=vertex_credentials,
|
vertex_credentials=vertex_credentials,
|
||||||
|
@ -83,13 +84,27 @@ class VertexImageGeneration(VertexLLM):
|
||||||
else:
|
else:
|
||||||
sync_handler = client # type: ignore
|
sync_handler = client # type: ignore
|
||||||
|
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
# url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
||||||
|
|
||||||
|
auth_header: Optional[str] = None
|
||||||
auth_header, _ = self._ensure_access_token(
|
auth_header, _ = self._ensure_access_token(
|
||||||
credentials=vertex_credentials,
|
credentials=vertex_credentials,
|
||||||
project_id=vertex_project,
|
project_id=vertex_project,
|
||||||
custom_llm_provider="vertex_ai",
|
custom_llm_provider="vertex_ai",
|
||||||
)
|
)
|
||||||
|
auth_header, api_base = self._get_token_and_url(
|
||||||
|
model=model,
|
||||||
|
gemini_api_key=None,
|
||||||
|
auth_header=auth_header,
|
||||||
|
vertex_project=vertex_project,
|
||||||
|
vertex_location=vertex_location,
|
||||||
|
vertex_credentials=vertex_credentials,
|
||||||
|
stream=False,
|
||||||
|
custom_llm_provider="vertex_ai",
|
||||||
|
api_base=api_base,
|
||||||
|
should_use_v1beta1_features=False,
|
||||||
|
mode="image_generation",
|
||||||
|
)
|
||||||
optional_params = optional_params or {
|
optional_params = optional_params or {
|
||||||
"sampleCount": 1
|
"sampleCount": 1
|
||||||
} # default optional params
|
} # default optional params
|
||||||
|
@ -99,31 +114,21 @@ class VertexImageGeneration(VertexLLM):
|
||||||
"parameters": optional_params,
|
"parameters": optional_params,
|
||||||
}
|
}
|
||||||
|
|
||||||
request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
|
headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
|
||||||
logging_obj.pre_call(
|
|
||||||
input=prompt,
|
|
||||||
api_key=None,
|
|
||||||
additional_args={
|
|
||||||
"complete_input_dict": optional_params,
|
|
||||||
"request_str": request_str,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=prompt,
|
input=prompt,
|
||||||
api_key=None,
|
api_key="",
|
||||||
additional_args={
|
additional_args={
|
||||||
"complete_input_dict": optional_params,
|
"complete_input_dict": optional_params,
|
||||||
"request_str": request_str,
|
"api_base": api_base,
|
||||||
|
"headers": headers,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
response = sync_handler.post(
|
response = sync_handler.post(
|
||||||
url=url,
|
url=api_base,
|
||||||
headers={
|
headers=headers,
|
||||||
"Content-Type": "application/json; charset=utf-8",
|
|
||||||
"Authorization": f"Bearer {auth_header}",
|
|
||||||
},
|
|
||||||
data=json.dumps(request_data),
|
data=json.dumps(request_data),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -138,17 +143,17 @@ class VertexImageGeneration(VertexLLM):
|
||||||
async def aimage_generation(
|
async def aimage_generation(
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
|
api_base: Optional[str],
|
||||||
vertex_project: Optional[str],
|
vertex_project: Optional[str],
|
||||||
vertex_location: Optional[str],
|
vertex_location: Optional[str],
|
||||||
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
vertex_credentials: Optional[VERTEX_CREDENTIALS_TYPES],
|
||||||
model_response: litellm.ImageResponse,
|
model_response: litellm.ImageResponse,
|
||||||
logging_obj: Any,
|
logging_obj: Any,
|
||||||
model: Optional[
|
model: str = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
||||||
str
|
|
||||||
] = "imagegeneration", # vertex ai uses imagegeneration as the default model
|
|
||||||
client: Optional[AsyncHTTPHandler] = None,
|
client: Optional[AsyncHTTPHandler] = None,
|
||||||
optional_params: Optional[dict] = None,
|
optional_params: Optional[dict] = None,
|
||||||
timeout: Optional[int] = None,
|
timeout: Optional[int] = None,
|
||||||
|
extra_headers: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
response = None
|
response = None
|
||||||
if client is None:
|
if client is None:
|
||||||
|
@ -169,7 +174,6 @@ class VertexImageGeneration(VertexLLM):
|
||||||
|
|
||||||
# make POST request to
|
# make POST request to
|
||||||
# https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
|
# https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict
|
||||||
url = f"https://{vertex_location}-aiplatform.googleapis.com/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model}:predict"
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
|
Docs link: https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/imagegeneration?project=adroit-crow-413218
|
||||||
|
@ -188,11 +192,25 @@ class VertexImageGeneration(VertexLLM):
|
||||||
} \
|
} \
|
||||||
"https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
|
"https://us-central1-aiplatform.googleapis.com/v1/projects/PROJECT_ID/locations/us-central1/publishers/google/models/imagegeneration:predict"
|
||||||
"""
|
"""
|
||||||
|
auth_header: Optional[str] = None
|
||||||
auth_header, _ = self._ensure_access_token(
|
auth_header, _ = self._ensure_access_token(
|
||||||
credentials=vertex_credentials,
|
credentials=vertex_credentials,
|
||||||
project_id=vertex_project,
|
project_id=vertex_project,
|
||||||
custom_llm_provider="vertex_ai",
|
custom_llm_provider="vertex_ai",
|
||||||
)
|
)
|
||||||
|
auth_header, api_base = self._get_token_and_url(
|
||||||
|
model=model,
|
||||||
|
gemini_api_key=None,
|
||||||
|
auth_header=auth_header,
|
||||||
|
vertex_project=vertex_project,
|
||||||
|
vertex_location=vertex_location,
|
||||||
|
vertex_credentials=vertex_credentials,
|
||||||
|
stream=False,
|
||||||
|
custom_llm_provider="vertex_ai",
|
||||||
|
api_base=api_base,
|
||||||
|
should_use_v1beta1_features=False,
|
||||||
|
mode="image_generation",
|
||||||
|
)
|
||||||
optional_params = optional_params or {
|
optional_params = optional_params or {
|
||||||
"sampleCount": 1
|
"sampleCount": 1
|
||||||
} # default optional params
|
} # default optional params
|
||||||
|
@ -202,22 +220,21 @@ class VertexImageGeneration(VertexLLM):
|
||||||
"parameters": optional_params,
|
"parameters": optional_params,
|
||||||
}
|
}
|
||||||
|
|
||||||
request_str = f"\n curl -X POST \\\n -H \"Authorization: Bearer {auth_header[:10] + 'XXXXXXXXXX'}\" \\\n -H \"Content-Type: application/json; charset=utf-8\" \\\n -d {request_data} \\\n \"{url}\""
|
headers = self.set_headers(auth_header=auth_header, extra_headers=extra_headers)
|
||||||
|
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=prompt,
|
input=prompt,
|
||||||
api_key=None,
|
api_key="",
|
||||||
additional_args={
|
additional_args={
|
||||||
"complete_input_dict": optional_params,
|
"complete_input_dict": optional_params,
|
||||||
"request_str": request_str,
|
"api_base": api_base,
|
||||||
|
"headers": headers,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await self.async_handler.post(
|
response = await self.async_handler.post(
|
||||||
url=url,
|
url=api_base,
|
||||||
headers={
|
headers=headers,
|
||||||
"Content-Type": "application/json; charset=utf-8",
|
|
||||||
"Authorization": f"Bearer {auth_header}",
|
|
||||||
},
|
|
||||||
data=json.dumps(request_data),
|
data=json.dumps(request_data),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -226,7 +226,15 @@ class VertexMultimodalEmbedding(VertexLLM):
|
||||||
else:
|
else:
|
||||||
return Instance(image=InstanceImage(gcsUri=input_element))
|
return Instance(image=InstanceImage(gcsUri=input_element))
|
||||||
elif is_base64_encoded(s=input_element):
|
elif is_base64_encoded(s=input_element):
|
||||||
return Instance(image=InstanceImage(bytesBase64Encoded=input_element))
|
return Instance(
|
||||||
|
image=InstanceImage(
|
||||||
|
bytesBase64Encoded=(
|
||||||
|
input_element.split(",")[1]
|
||||||
|
if "," in input_element
|
||||||
|
else input_element
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return Instance(text=input_element)
|
return Instance(text=input_element)
|
||||||
|
|
||||||
|
|
|
@ -111,7 +111,7 @@ class VertexEmbedding(VertexBase):
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = client.post(api_base, headers=headers, json=vertex_request) # type: ignore
|
response = client.post(url=api_base, headers=headers, json=vertex_request) # type: ignore
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
except httpx.HTTPStatusError as err:
|
except httpx.HTTPStatusError as err:
|
||||||
error_code = err.response.status_code
|
error_code = err.response.status_code
|
||||||
|
|
51
litellm/llms/xai/common_utils.py
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
|
||||||
|
from litellm.secret_managers.main import get_secret_str
|
||||||
|
|
||||||
|
|
||||||
|
class XAIModelInfo(BaseLLMModelInfo):
|
||||||
|
@staticmethod
|
||||||
|
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
|
||||||
|
return api_base or get_secret_str("XAI_API_BASE") or "https://api.x.ai"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
|
||||||
|
return api_key or get_secret_str("XAI_API_KEY")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_base_model(model: str) -> Optional[str]:
|
||||||
|
return model.replace("xai/", "")
|
||||||
|
|
||||||
|
def get_models(
|
||||||
|
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||||
|
) -> list[str]:
|
||||||
|
api_base = self.get_api_base(api_base)
|
||||||
|
api_key = self.get_api_key(api_key)
|
||||||
|
if api_base is None or api_key is None:
|
||||||
|
raise ValueError(
|
||||||
|
"XAI_API_BASE or XAI_API_KEY is not set. Please set the environment variable, to query XAI's `/models` endpoint."
|
||||||
|
)
|
||||||
|
response = litellm.module_level_client.get(
|
||||||
|
url=f"{api_base}/v1/models",
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError:
|
||||||
|
raise Exception(
|
||||||
|
f"Failed to fetch models from XAI. Status code: {response.status_code}, Response: {response.text}"
|
||||||
|
)
|
||||||
|
|
||||||
|
models = response.json()["data"]
|
||||||
|
|
||||||
|
litellm_model_names = []
|
||||||
|
for model in models:
|
||||||
|
stripped_model_name = model["id"]
|
||||||
|
litellm_model_name = "xai/" + stripped_model_name
|
||||||
|
litellm_model_names.append(litellm_model_name)
|
||||||
|
return litellm_model_names
|
|
@ -2350,6 +2350,8 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
or litellm.api_key
|
or litellm.api_key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = api_base or litellm.api_base or get_secret("GEMINI_API_BASE")
|
||||||
|
|
||||||
new_params = deepcopy(optional_params)
|
new_params = deepcopy(optional_params)
|
||||||
response = vertex_chat_completion.completion( # type: ignore
|
response = vertex_chat_completion.completion( # type: ignore
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -2392,6 +2394,8 @@ def completion( # type: ignore # noqa: PLR0915
|
||||||
or get_secret("VERTEXAI_CREDENTIALS")
|
or get_secret("VERTEXAI_CREDENTIALS")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = api_base or litellm.api_base or get_secret("VERTEXAI_API_BASE")
|
||||||
|
|
||||||
new_params = deepcopy(optional_params)
|
new_params = deepcopy(optional_params)
|
||||||
if (
|
if (
|
||||||
model.startswith("meta/")
|
model.startswith("meta/")
|
||||||
|
@ -3657,6 +3661,8 @@ def embedding( # noqa: PLR0915
|
||||||
api_key or get_secret_str("GEMINI_API_KEY") or litellm.api_key
|
api_key or get_secret_str("GEMINI_API_KEY") or litellm.api_key
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = api_base or litellm.api_base or get_secret_str("GEMINI_API_BASE")
|
||||||
|
|
||||||
response = google_batch_embeddings.batch_embeddings( # type: ignore
|
response = google_batch_embeddings.batch_embeddings( # type: ignore
|
||||||
model=model,
|
model=model,
|
||||||
input=input,
|
input=input,
|
||||||
|
@ -3671,6 +3677,8 @@ def embedding( # noqa: PLR0915
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
custom_llm_provider="gemini",
|
custom_llm_provider="gemini",
|
||||||
api_key=gemini_api_key,
|
api_key=gemini_api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
client=client,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif custom_llm_provider == "vertex_ai":
|
||||||
|
@ -3695,6 +3703,13 @@ def embedding( # noqa: PLR0915
|
||||||
or get_secret_str("VERTEX_CREDENTIALS")
|
or get_secret_str("VERTEX_CREDENTIALS")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret_str("VERTEXAI_API_BASE")
|
||||||
|
or get_secret_str("VERTEX_API_BASE")
|
||||||
|
)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
"image" in optional_params
|
"image" in optional_params
|
||||||
or "video" in optional_params
|
or "video" in optional_params
|
||||||
|
@ -3715,6 +3730,8 @@ def embedding( # noqa: PLR0915
|
||||||
aembedding=aembedding,
|
aembedding=aembedding,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
custom_llm_provider="vertex_ai",
|
custom_llm_provider="vertex_ai",
|
||||||
|
client=client,
|
||||||
|
api_base=api_base,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
response = vertex_embedding.embedding(
|
response = vertex_embedding.embedding(
|
||||||
|
@ -3732,6 +3749,8 @@ def embedding( # noqa: PLR0915
|
||||||
aembedding=aembedding,
|
aembedding=aembedding,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
|
api_base=api_base,
|
||||||
|
client=client,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "oobabooga":
|
elif custom_llm_provider == "oobabooga":
|
||||||
response = oobabooga.embedding(
|
response = oobabooga.embedding(
|
||||||
|
@ -4694,6 +4713,14 @@ def image_generation( # noqa: PLR0915
|
||||||
or optional_params.pop("vertex_ai_credentials", None)
|
or optional_params.pop("vertex_ai_credentials", None)
|
||||||
or get_secret_str("VERTEXAI_CREDENTIALS")
|
or get_secret_str("VERTEXAI_CREDENTIALS")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
api_base = (
|
||||||
|
api_base
|
||||||
|
or litellm.api_base
|
||||||
|
or get_secret_str("VERTEXAI_API_BASE")
|
||||||
|
or get_secret_str("VERTEX_API_BASE")
|
||||||
|
)
|
||||||
|
|
||||||
model_response = vertex_image_generation.image_generation(
|
model_response = vertex_image_generation.image_generation(
|
||||||
model=model,
|
model=model,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
|
@ -4705,6 +4732,8 @@ def image_generation( # noqa: PLR0915
|
||||||
vertex_location=vertex_ai_location,
|
vertex_location=vertex_ai_location,
|
||||||
vertex_credentials=vertex_credentials,
|
vertex_credentials=vertex_credentials,
|
||||||
aimg_generation=aimg_generation,
|
aimg_generation=aimg_generation,
|
||||||
|
api_base=api_base,
|
||||||
|
client=client,
|
||||||
)
|
)
|
||||||
elif (
|
elif (
|
||||||
custom_llm_provider in litellm._custom_providers
|
custom_llm_provider in litellm._custom_providers
|
||||||
|
@ -5066,6 +5095,12 @@ def transcription(
|
||||||
response: Optional[
|
response: Optional[
|
||||||
Union[TranscriptionResponse, Coroutine[Any, Any, TranscriptionResponse]]
|
Union[TranscriptionResponse, Coroutine[Any, Any, TranscriptionResponse]]
|
||||||
] = None
|
] = None
|
||||||
|
|
||||||
|
provider_config = ProviderConfigManager.get_provider_audio_transcription_config(
|
||||||
|
model=model,
|
||||||
|
provider=LlmProviders(custom_llm_provider),
|
||||||
|
)
|
||||||
|
|
||||||
if custom_llm_provider == "azure":
|
if custom_llm_provider == "azure":
|
||||||
# azure configs
|
# azure configs
|
||||||
api_base = api_base or litellm.api_base or get_secret_str("AZURE_API_BASE")
|
api_base = api_base or litellm.api_base or get_secret_str("AZURE_API_BASE")
|
||||||
|
@ -5132,12 +5167,15 @@ def transcription(
|
||||||
max_retries=max_retries,
|
max_retries=max_retries,
|
||||||
api_base=api_base,
|
api_base=api_base,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
|
provider_config=provider_config,
|
||||||
|
litellm_params=litellm_params_dict,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "deepgram":
|
elif custom_llm_provider == "deepgram":
|
||||||
response = base_llm_http_handler.audio_transcriptions(
|
response = base_llm_http_handler.audio_transcriptions(
|
||||||
model=model,
|
model=model,
|
||||||
audio_file=file,
|
audio_file=file,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
|
litellm_params=litellm_params_dict,
|
||||||
model_response=model_response,
|
model_response=model_response,
|
||||||
atranscription=atranscription,
|
atranscription=atranscription,
|
||||||
client=(
|
client=(
|
||||||
|
@ -5156,6 +5194,7 @@ def transcription(
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
custom_llm_provider="deepgram",
|
custom_llm_provider="deepgram",
|
||||||
headers={},
|
headers={},
|
||||||
|
provider_config=provider_config,
|
||||||
)
|
)
|
||||||
if response is None:
|
if response is None:
|
||||||
raise ValueError("Unmapped provider passed in. Unable to get the response.")
|
raise ValueError("Unmapped provider passed in. Unable to get the response.")
|
||||||
|
|
|
@ -1176,21 +1176,40 @@
|
||||||
"output_cost_per_pixel": 0.0,
|
"output_cost_per_pixel": 0.0,
|
||||||
"litellm_provider": "openai"
|
"litellm_provider": "openai"
|
||||||
},
|
},
|
||||||
|
"gpt-4o-transcribe": {
|
||||||
|
"mode": "audio_transcription",
|
||||||
|
"input_cost_per_token": 0.0000025,
|
||||||
|
"input_cost_per_audio_token": 0.000006,
|
||||||
|
"output_cost_per_token": 0.00001,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"supported_endpoints": ["/v1/audio/transcriptions"]
|
||||||
|
},
|
||||||
|
"gpt-4o-mini-transcribe": {
|
||||||
|
"mode": "audio_transcription",
|
||||||
|
"input_cost_per_token": 0.00000125,
|
||||||
|
"input_cost_per_audio_token": 0.000003,
|
||||||
|
"output_cost_per_token": 0.000005,
|
||||||
|
"litellm_provider": "openai",
|
||||||
|
"supported_endpoints": ["/v1/audio/transcriptions"]
|
||||||
|
},
|
||||||
"whisper-1": {
|
"whisper-1": {
|
||||||
"mode": "audio_transcription",
|
"mode": "audio_transcription",
|
||||||
"input_cost_per_second": 0.0001,
|
"input_cost_per_second": 0.0001,
|
||||||
"output_cost_per_second": 0.0001,
|
"output_cost_per_second": 0.0001,
|
||||||
"litellm_provider": "openai"
|
"litellm_provider": "openai",
|
||||||
|
"supported_endpoints": ["/v1/audio/transcriptions"]
|
||||||
},
|
},
|
||||||
"tts-1": {
|
"tts-1": {
|
||||||
"mode": "audio_speech",
|
"mode": "audio_speech",
|
||||||
"input_cost_per_character": 0.000015,
|
"input_cost_per_character": 0.000015,
|
||||||
"litellm_provider": "openai"
|
"litellm_provider": "openai",
|
||||||
|
"supported_endpoints": ["/v1/audio/speech"]
|
||||||
},
|
},
|
||||||
"tts-1-hd": {
|
"tts-1-hd": {
|
||||||
"mode": "audio_speech",
|
"mode": "audio_speech",
|
||||||
"input_cost_per_character": 0.000030,
|
"input_cost_per_character": 0.000030,
|
||||||
"litellm_provider": "openai"
|
"litellm_provider": "openai",
|
||||||
|
"supported_endpoints": ["/v1/audio/speech"]
|
||||||
},
|
},
|
||||||
"azure/gpt-4o-mini-realtime-preview-2024-12-17": {
|
"azure/gpt-4o-mini-realtime-preview-2024-12-17": {
|
||||||
"max_tokens": 4096,
|
"max_tokens": 4096,
|
||||||
|
@ -4595,6 +4614,28 @@
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"gemini-2.0-flash-lite": {
|
||||||
|
"max_input_tokens": 1048576,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 50,
|
||||||
|
"input_cost_per_audio_token": 0.000000075,
|
||||||
|
"input_cost_per_token": 0.000000075,
|
||||||
|
"output_cost_per_token": 0.0000003,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"gemini/gemini-2.0-pro-exp-02-05": {
|
"gemini/gemini-2.0-pro-exp-02-05": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 2097152,
|
"max_input_tokens": 2097152,
|
||||||
|
@ -4658,6 +4699,30 @@
|
||||||
"supports_tool_choice": true,
|
"supports_tool_choice": true,
|
||||||
"source": "https://ai.google.dev/pricing#2_0flash"
|
"source": "https://ai.google.dev/pricing#2_0flash"
|
||||||
},
|
},
|
||||||
|
"gemini/gemini-2.0-flash-lite": {
|
||||||
|
"max_input_tokens": 1048576,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"max_images_per_prompt": 3000,
|
||||||
|
"max_videos_per_prompt": 10,
|
||||||
|
"max_video_length": 1,
|
||||||
|
"max_audio_length_hours": 8.4,
|
||||||
|
"max_audio_per_prompt": 1,
|
||||||
|
"max_pdf_size_mb": 50,
|
||||||
|
"input_cost_per_audio_token": 0.000000075,
|
||||||
|
"input_cost_per_token": 0.000000075,
|
||||||
|
"output_cost_per_token": 0.0000003,
|
||||||
|
"litellm_provider": "gemini",
|
||||||
|
"mode": "chat",
|
||||||
|
"tpm": 4000000,
|
||||||
|
"rpm": 4000,
|
||||||
|
"supports_system_messages": true,
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_response_schema": true,
|
||||||
|
"supports_audio_output": true,
|
||||||
|
"supports_tool_choice": true,
|
||||||
|
"source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite"
|
||||||
|
},
|
||||||
"gemini/gemini-2.0-flash-001": {
|
"gemini/gemini-2.0-flash-001": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 1048576,
|
"max_input_tokens": 1048576,
|
||||||
|
@ -5153,6 +5218,29 @@
|
||||||
"supports_function_calling": true,
|
"supports_function_calling": true,
|
||||||
"supports_tool_choice": true
|
"supports_tool_choice": true
|
||||||
},
|
},
|
||||||
|
"vertex_ai/mistral-small-2503@001": {
|
||||||
|
"max_tokens": 8191,
|
||||||
|
"max_input_tokens": 32000,
|
||||||
|
"max_output_tokens": 8191,
|
||||||
|
"input_cost_per_token": 0.000001,
|
||||||
|
"output_cost_per_token": 0.000003,
|
||||||
|
"litellm_provider": "vertex_ai-mistral_models",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
|
"vertex_ai/mistral-small-2503": {
|
||||||
|
"max_tokens": 128000,
|
||||||
|
"max_input_tokens": 128000,
|
||||||
|
"max_output_tokens": 128000,
|
||||||
|
"input_cost_per_token": 0.000001,
|
||||||
|
"output_cost_per_token": 0.000003,
|
||||||
|
"litellm_provider": "vertex_ai-mistral_models",
|
||||||
|
"mode": "chat",
|
||||||
|
"supports_function_calling": true,
|
||||||
|
"supports_vision": true,
|
||||||
|
"supports_tool_choice": true
|
||||||
|
},
|
||||||
"vertex_ai/jamba-1.5-mini@001": {
|
"vertex_ai/jamba-1.5-mini@001": {
|
||||||
"max_tokens": 256000,
|
"max_tokens": 256000,
|
||||||
"max_input_tokens": 256000,
|
"max_input_tokens": 256000,
|
||||||
|
@ -5304,6 +5392,23 @@
|
||||||
"mode": "embedding",
|
"mode": "embedding",
|
||||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||||
},
|
},
|
||||||
|
"multimodalembedding": {
|
||||||
|
"max_tokens": 2048,
|
||||||
|
"max_input_tokens": 2048,
|
||||||
|
"output_vector_size": 768,
|
||||||
|
"input_cost_per_character": 0.0000002,
|
||||||
|
"input_cost_per_image": 0.0001,
|
||||||
|
"input_cost_per_video_per_second": 0.0005,
|
||||||
|
"input_cost_per_video_per_second_above_8s_interval": 0.0010,
|
||||||
|
"input_cost_per_video_per_second_above_15s_interval": 0.0020,
|
||||||
|
"input_cost_per_token": 0.0000008,
|
||||||
|
"output_cost_per_token": 0,
|
||||||
|
"litellm_provider": "vertex_ai-embedding-models",
|
||||||
|
"mode": "embedding",
|
||||||
|
"supported_endpoints": ["/v1/embeddings"],
|
||||||
|
"supported_modalities": ["text", "image", "video"],
|
||||||
|
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models"
|
||||||
|
},
|
||||||
"text-embedding-large-exp-03-07": {
|
"text-embedding-large-exp-03-07": {
|
||||||
"max_tokens": 8192,
|
"max_tokens": 8192,
|
||||||
"max_input_tokens": 8192,
|
"max_input_tokens": 8192,
|
||||||
|
|
123
litellm/proxy/_experimental/mcp_server/server.py
Normal file
|
@ -0,0 +1,123 @@
|
||||||
|
"""
|
||||||
|
LiteLLM MCP Server Routes
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from typing import Any, Dict, List, Union
|
||||||
|
|
||||||
|
from anyio import BrokenResourceError
|
||||||
|
from fastapi import APIRouter, HTTPException, Request
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
from pydantic import ValidationError
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
|
||||||
|
# Check if MCP is available
|
||||||
|
# "mcp" requires python 3.10 or higher, but several litellm users use python 3.8
|
||||||
|
# We're making this conditional import to avoid breaking users who use python 3.8.
|
||||||
|
try:
|
||||||
|
from mcp.server import Server
|
||||||
|
|
||||||
|
MCP_AVAILABLE = True
|
||||||
|
except ImportError as e:
|
||||||
|
verbose_logger.debug(f"MCP module not found: {e}")
|
||||||
|
MCP_AVAILABLE = False
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/mcp",
|
||||||
|
tags=["mcp"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if MCP_AVAILABLE:
|
||||||
|
from mcp.server import NotificationOptions, Server
|
||||||
|
from mcp.server.models import InitializationOptions
|
||||||
|
from mcp.types import EmbeddedResource as MCPEmbeddedResource
|
||||||
|
from mcp.types import ImageContent as MCPImageContent
|
||||||
|
from mcp.types import TextContent as MCPTextContent
|
||||||
|
from mcp.types import Tool as MCPTool
|
||||||
|
|
||||||
|
from .sse_transport import SseServerTransport
|
||||||
|
from .tool_registry import global_mcp_tool_registry
|
||||||
|
|
||||||
|
########################################################
|
||||||
|
############ Initialize the MCP Server #################
|
||||||
|
########################################################
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/mcp",
|
||||||
|
tags=["mcp"],
|
||||||
|
)
|
||||||
|
server: Server = Server("litellm-mcp-server")
|
||||||
|
sse: SseServerTransport = SseServerTransport("/mcp/sse/messages")
|
||||||
|
|
||||||
|
########################################################
|
||||||
|
############### MCP Server Routes #######################
|
||||||
|
########################################################
|
||||||
|
|
||||||
|
@server.list_tools()
|
||||||
|
async def list_tools() -> list[MCPTool]:
|
||||||
|
"""
|
||||||
|
List all available tools
|
||||||
|
"""
|
||||||
|
tools = []
|
||||||
|
for tool in global_mcp_tool_registry.list_tools():
|
||||||
|
tools.append(
|
||||||
|
MCPTool(
|
||||||
|
name=tool.name,
|
||||||
|
description=tool.description,
|
||||||
|
inputSchema=tool.input_schema,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return tools
|
||||||
|
|
||||||
|
@server.call_tool()
|
||||||
|
async def handle_call_tool(
|
||||||
|
name: str, arguments: Dict[str, Any] | None
|
||||||
|
) -> List[Union[MCPTextContent, MCPImageContent, MCPEmbeddedResource]]:
|
||||||
|
"""
|
||||||
|
Call a specific tool with the provided arguments
|
||||||
|
"""
|
||||||
|
tool = global_mcp_tool_registry.get_tool(name)
|
||||||
|
if not tool:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Tool '{name}' not found")
|
||||||
|
if arguments is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400, detail="Request arguments are required"
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = tool.handler(**arguments)
|
||||||
|
return [MCPTextContent(text=str(result), type="text")]
|
||||||
|
except Exception as e:
|
||||||
|
return [MCPTextContent(text=f"Error: {str(e)}", type="text")]
|
||||||
|
|
||||||
|
@router.get("/", response_class=StreamingResponse)
|
||||||
|
async def handle_sse(request: Request):
|
||||||
|
verbose_logger.info("new incoming SSE connection established")
|
||||||
|
async with sse.connect_sse(request) as streams:
|
||||||
|
try:
|
||||||
|
await server.run(streams[0], streams[1], options)
|
||||||
|
except BrokenResourceError:
|
||||||
|
pass
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
except ValidationError:
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
raise
|
||||||
|
await request.close()
|
||||||
|
|
||||||
|
@router.post("/sse/messages")
|
||||||
|
async def handle_messages(request: Request):
|
||||||
|
verbose_logger.info("incoming SSE message received")
|
||||||
|
await sse.handle_post_message(request.scope, request.receive, request._send)
|
||||||
|
await request.close()
|
||||||
|
|
||||||
|
options = InitializationOptions(
|
||||||
|
server_name="litellm-mcp-server",
|
||||||
|
server_version="0.1.0",
|
||||||
|
capabilities=server.get_capabilities(
|
||||||
|
notification_options=NotificationOptions(),
|
||||||
|
experimental_capabilities={},
|
||||||
|
),
|
||||||
|
)
|
150
litellm/proxy/_experimental/mcp_server/sse_transport.py
Normal file
|
@ -0,0 +1,150 @@
|
||||||
|
"""
|
||||||
|
This is a modification of code from: https://github.com/SecretiveShell/MCP-Bridge/blob/master/mcp_bridge/mcp_server/sse_transport.py
|
||||||
|
|
||||||
|
Credit to the maintainers of SecretiveShell for their SSE Transport implementation
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import quote
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
|
||||||
|
import anyio
|
||||||
|
import mcp.types as types
|
||||||
|
from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
|
||||||
|
from fastapi.requests import Request
|
||||||
|
from fastapi.responses import Response
|
||||||
|
from pydantic import ValidationError
|
||||||
|
from sse_starlette import EventSourceResponse
|
||||||
|
from starlette.types import Receive, Scope, Send
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
|
class SseServerTransport:
|
||||||
|
"""
|
||||||
|
SSE server transport for MCP. This class provides _two_ ASGI applications,
|
||||||
|
suitable to be used with a framework like Starlette and a server like Hypercorn:
|
||||||
|
|
||||||
|
1. connect_sse() is an ASGI application which receives incoming GET requests,
|
||||||
|
and sets up a new SSE stream to send server messages to the client.
|
||||||
|
2. handle_post_message() is an ASGI application which receives incoming POST
|
||||||
|
requests, which should contain client messages that link to a
|
||||||
|
previously-established SSE session.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_endpoint: str
|
||||||
|
_read_stream_writers: dict[
|
||||||
|
UUID, MemoryObjectSendStream[types.JSONRPCMessage | Exception]
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self, endpoint: str) -> None:
|
||||||
|
"""
|
||||||
|
Creates a new SSE server transport, which will direct the client to POST
|
||||||
|
messages to the relative or absolute URL given.
|
||||||
|
"""
|
||||||
|
|
||||||
|
super().__init__()
|
||||||
|
self._endpoint = endpoint
|
||||||
|
self._read_stream_writers = {}
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"SseServerTransport initialized with endpoint: {endpoint}"
|
||||||
|
)
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def connect_sse(self, request: Request):
|
||||||
|
if request.scope["type"] != "http":
|
||||||
|
verbose_logger.error("connect_sse received non-HTTP request")
|
||||||
|
raise ValueError("connect_sse can only handle HTTP requests")
|
||||||
|
|
||||||
|
verbose_logger.debug("Setting up SSE connection")
|
||||||
|
read_stream: MemoryObjectReceiveStream[types.JSONRPCMessage | Exception]
|
||||||
|
read_stream_writer: MemoryObjectSendStream[types.JSONRPCMessage | Exception]
|
||||||
|
|
||||||
|
write_stream: MemoryObjectSendStream[types.JSONRPCMessage]
|
||||||
|
write_stream_reader: MemoryObjectReceiveStream[types.JSONRPCMessage]
|
||||||
|
|
||||||
|
read_stream_writer, read_stream = anyio.create_memory_object_stream(0)
|
||||||
|
write_stream, write_stream_reader = anyio.create_memory_object_stream(0)
|
||||||
|
|
||||||
|
session_id = uuid4()
|
||||||
|
session_uri = f"{quote(self._endpoint)}?session_id={session_id.hex}"
|
||||||
|
self._read_stream_writers[session_id] = read_stream_writer
|
||||||
|
verbose_logger.debug(f"Created new session with ID: {session_id}")
|
||||||
|
|
||||||
|
sse_stream_writer: MemoryObjectSendStream[dict[str, Any]]
|
||||||
|
sse_stream_reader: MemoryObjectReceiveStream[dict[str, Any]]
|
||||||
|
sse_stream_writer, sse_stream_reader = anyio.create_memory_object_stream(
|
||||||
|
0, dict[str, Any]
|
||||||
|
)
|
||||||
|
|
||||||
|
async def sse_writer():
|
||||||
|
verbose_logger.debug("Starting SSE writer")
|
||||||
|
async with sse_stream_writer, write_stream_reader:
|
||||||
|
await sse_stream_writer.send({"event": "endpoint", "data": session_uri})
|
||||||
|
verbose_logger.debug(f"Sent endpoint event: {session_uri}")
|
||||||
|
|
||||||
|
async for message in write_stream_reader:
|
||||||
|
verbose_logger.debug(f"Sending message via SSE: {message}")
|
||||||
|
await sse_stream_writer.send(
|
||||||
|
{
|
||||||
|
"event": "message",
|
||||||
|
"data": message.model_dump_json(
|
||||||
|
by_alias=True, exclude_none=True
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
async with anyio.create_task_group() as tg:
|
||||||
|
response = EventSourceResponse(
|
||||||
|
content=sse_stream_reader, data_sender_callable=sse_writer
|
||||||
|
)
|
||||||
|
verbose_logger.debug("Starting SSE response task")
|
||||||
|
tg.start_soon(response, request.scope, request.receive, request._send)
|
||||||
|
|
||||||
|
verbose_logger.debug("Yielding read and write streams")
|
||||||
|
yield (read_stream, write_stream)
|
||||||
|
|
||||||
|
async def handle_post_message(
|
||||||
|
self, scope: Scope, receive: Receive, send: Send
|
||||||
|
) -> Response:
|
||||||
|
verbose_logger.debug("Handling POST message")
|
||||||
|
request = Request(scope, receive)
|
||||||
|
|
||||||
|
session_id_param = request.query_params.get("session_id")
|
||||||
|
if session_id_param is None:
|
||||||
|
verbose_logger.warning("Received request without session_id")
|
||||||
|
response = Response("session_id is required", status_code=400)
|
||||||
|
return response
|
||||||
|
|
||||||
|
try:
|
||||||
|
session_id = UUID(hex=session_id_param)
|
||||||
|
verbose_logger.debug(f"Parsed session ID: {session_id}")
|
||||||
|
except ValueError:
|
||||||
|
verbose_logger.warning(f"Received invalid session ID: {session_id_param}")
|
||||||
|
response = Response("Invalid session ID", status_code=400)
|
||||||
|
return response
|
||||||
|
|
||||||
|
writer = self._read_stream_writers.get(session_id)
|
||||||
|
if not writer:
|
||||||
|
verbose_logger.warning(f"Could not find session for ID: {session_id}")
|
||||||
|
response = Response("Could not find session", status_code=404)
|
||||||
|
return response
|
||||||
|
|
||||||
|
json = await request.json()
|
||||||
|
verbose_logger.debug(f"Received JSON: {json}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
message = types.JSONRPCMessage.model_validate(json)
|
||||||
|
verbose_logger.debug(f"Validated client message: {message}")
|
||||||
|
except ValidationError as err:
|
||||||
|
verbose_logger.error(f"Failed to parse message: {err}")
|
||||||
|
response = Response("Could not parse message", status_code=400)
|
||||||
|
await writer.send(err)
|
||||||
|
return response
|
||||||
|
|
||||||
|
verbose_logger.debug(f"Sending message to writer: {message}")
|
||||||
|
response = Response("Accepted", status_code=202)
|
||||||
|
await writer.send(message)
|
||||||
|
return response
|
103
litellm/proxy/_experimental/mcp_server/tool_registry.py
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
import json
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
from litellm._logging import verbose_logger
|
||||||
|
from litellm.proxy.types_utils.utils import get_instance_fn
|
||||||
|
from litellm.types.mcp_server.tool_registry import MCPTool
|
||||||
|
|
||||||
|
|
||||||
|
class MCPToolRegistry:
|
||||||
|
"""
|
||||||
|
A registry for managing MCP tools
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# Registry to store all registered tools
|
||||||
|
self.tools: Dict[str, MCPTool] = {}
|
||||||
|
|
||||||
|
def register_tool(
|
||||||
|
self,
|
||||||
|
name: str,
|
||||||
|
description: str,
|
||||||
|
input_schema: Dict[str, Any],
|
||||||
|
handler: Callable,
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Register a new tool in the registry
|
||||||
|
"""
|
||||||
|
self.tools[name] = MCPTool(
|
||||||
|
name=name,
|
||||||
|
description=description,
|
||||||
|
input_schema=input_schema,
|
||||||
|
handler=handler,
|
||||||
|
)
|
||||||
|
verbose_logger.debug(f"Registered tool: {name}")
|
||||||
|
|
||||||
|
def get_tool(self, name: str) -> Optional[MCPTool]:
|
||||||
|
"""
|
||||||
|
Get a tool from the registry by name
|
||||||
|
"""
|
||||||
|
return self.tools.get(name)
|
||||||
|
|
||||||
|
def list_tools(self) -> List[MCPTool]:
|
||||||
|
"""
|
||||||
|
List all registered tools
|
||||||
|
"""
|
||||||
|
return list(self.tools.values())
|
||||||
|
|
||||||
|
def load_tools_from_config(
|
||||||
|
self, mcp_tools_config: Optional[Dict[str, Any]] = None
|
||||||
|
) -> None:
|
||||||
|
"""
|
||||||
|
Load and register tools from the proxy config
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mcp_tools_config: The mcp_tools config from the proxy config
|
||||||
|
"""
|
||||||
|
if mcp_tools_config is None:
|
||||||
|
raise ValueError(
|
||||||
|
"mcp_tools_config is required, please set `mcp_tools` in your proxy config"
|
||||||
|
)
|
||||||
|
|
||||||
|
for tool_config in mcp_tools_config:
|
||||||
|
if not isinstance(tool_config, dict):
|
||||||
|
raise ValueError("mcp_tools_config must be a list of dictionaries")
|
||||||
|
|
||||||
|
name = tool_config.get("name")
|
||||||
|
description = tool_config.get("description")
|
||||||
|
input_schema = tool_config.get("input_schema", {})
|
||||||
|
handler_name = tool_config.get("handler")
|
||||||
|
|
||||||
|
if not all([name, description, handler_name]):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Try to resolve the handler
|
||||||
|
# First check if it's a module path (e.g., "module.submodule.function")
|
||||||
|
if handler_name is None:
|
||||||
|
raise ValueError(f"handler is required for tool {name}")
|
||||||
|
handler = get_instance_fn(handler_name)
|
||||||
|
|
||||||
|
if handler is None:
|
||||||
|
verbose_logger.warning(
|
||||||
|
f"Warning: Could not find handler {handler_name} for tool {name}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Register the tool
|
||||||
|
if name is None:
|
||||||
|
raise ValueError(f"name is required for tool {name}")
|
||||||
|
if description is None:
|
||||||
|
raise ValueError(f"description is required for tool {name}")
|
||||||
|
|
||||||
|
self.register_tool(
|
||||||
|
name=name,
|
||||||
|
description=description,
|
||||||
|
input_schema=input_schema,
|
||||||
|
handler=handler,
|
||||||
|
)
|
||||||
|
verbose_logger.debug(
|
||||||
|
"all registered tools: %s", json.dumps(self.tools, indent=4, default=str)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
global_mcp_tool_registry = MCPToolRegistry()
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{96443:function(n,e,t){Promise.resolve().then(t.t.bind(t,39974,23)),Promise.resolve().then(t.t.bind(t,2778,23))},2778:function(){},39974:function(n){n.exports={style:{fontFamily:"'__Inter_cf7686', '__Inter_Fallback_cf7686'",fontStyle:"normal"},className:"__className_cf7686"}}},function(n){n.O(0,[919,986,971,117,744],function(){return n(n.s=96443)}),_N_E=n.O()}]);
|
|
@ -1 +0,0 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{6580:function(n,e,t){Promise.resolve().then(t.t.bind(t,39974,23)),Promise.resolve().then(t.t.bind(t,2778,23))},2778:function(){},39974:function(n){n.exports={style:{fontFamily:"'__Inter_cf7686', '__Inter_Fallback_cf7686'",fontStyle:"normal"},className:"__className_cf7686"}}},function(n){n.O(0,[919,986,971,117,744],function(){return n(n.s=6580)}),_N_E=n.O()}]);
|
|
|
@ -1 +1 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{11790:function(e,n,u){Promise.resolve().then(u.bind(u,52829))},52829:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(57437),s=u(2265),r=u(99376),c=u(92699);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[42,261,250,699,971,117,744],function(){return e(e.s=11790)}),_N_E=e.O()}]);
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[418],{21024:function(e,n,u){Promise.resolve().then(u.bind(u,52829))},52829:function(e,n,u){"use strict";u.r(n),u.d(n,{default:function(){return f}});var t=u(57437),s=u(2265),r=u(99376),c=u(92699);function f(){let e=(0,r.useSearchParams)().get("key"),[n,u]=(0,s.useState)(null);return(0,s.useEffect)(()=>{e&&u(e)},[e]),(0,t.jsx)(c.Z,{accessToken:n,publicPage:!0,premiumUser:!1})}}},function(e){e.O(0,[42,261,250,699,971,117,744],function(){return e(e.s=21024)}),_N_E=e.O()}]);
|
|
@ -1 +0,0 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{32922:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),i=n(20831),c=n(94789),l=n(12514),r=n(49804),u=n(67101),d=n(84264),m=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),j=n(73002),g=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,g.e)("token");let n=t.get("invitation_id"),[S,_]=(0,o.useState)(null),[w,Z]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,v]=(0,o.useState)(null),[y,E]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),E(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),_(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),v(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(c.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tirer."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{var t;let n="/ui/";n+="?userID="+((null===(t=e.data)||void 0===t?void 0:t.user_id)||e.user_id),document.cookie="token="+C,console.log("redirecting to:",n),window.location.href=n}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(j.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,42,899,250,971,117,744],function(){return e(e.s=32922)}),_N_E=e.O()}]);
|
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[461],{8672:function(e,t,n){Promise.resolve().then(n.bind(n,12011))},12011:function(e,t,n){"use strict";n.r(t),n.d(t,{default:function(){return S}});var s=n(57437),o=n(2265),a=n(99376),i=n(20831),c=n(94789),l=n(12514),r=n(49804),u=n(67101),d=n(84264),m=n(49566),h=n(96761),x=n(84566),p=n(19250),f=n(14474),k=n(13634),j=n(73002),g=n(3914);function S(){let[e]=k.Z.useForm(),t=(0,a.useSearchParams)();(0,g.e)("token");let n=t.get("invitation_id"),[S,_]=(0,o.useState)(null),[w,Z]=(0,o.useState)(""),[N,b]=(0,o.useState)(""),[T,v]=(0,o.useState)(null),[y,E]=(0,o.useState)(""),[C,U]=(0,o.useState)("");return(0,o.useEffect)(()=>{n&&(0,p.W_)(n).then(e=>{let t=e.login_url;console.log("login_url:",t),E(t);let n=e.token,s=(0,f.o)(n);U(n),console.log("decoded:",s),_(s.key),console.log("decoded user email:",s.user_email),b(s.user_email),v(s.user_id)})},[n]),(0,s.jsx)("div",{className:"mx-auto w-full max-w-md mt-10",children:(0,s.jsxs)(l.Z,{children:[(0,s.jsx)(h.Z,{className:"text-sm mb-5 text-center",children:"\uD83D\uDE85 LiteLLM"}),(0,s.jsx)(h.Z,{className:"text-xl",children:"Sign up"}),(0,s.jsx)(d.Z,{children:"Claim your user account to login to Admin UI."}),(0,s.jsx)(c.Z,{className:"mt-4",title:"SSO",icon:x.GH$,color:"sky",children:(0,s.jsxs)(u.Z,{numItems:2,className:"flex justify-between items-center",children:[(0,s.jsx)(r.Z,{children:"SSO is under the Enterprise Tirer."}),(0,s.jsx)(r.Z,{children:(0,s.jsx)(i.Z,{variant:"primary",className:"mb-2",children:(0,s.jsx)("a",{href:"https://forms.gle/W3U4PZpJGFHWtHyA9",target:"_blank",children:"Get Free Trial"})})})]})}),(0,s.jsxs)(k.Z,{className:"mt-10 mb-5 mx-auto",layout:"vertical",onFinish:e=>{console.log("in handle submit. accessToken:",S,"token:",C,"formValues:",e),S&&C&&(e.user_email=N,T&&n&&(0,p.m_)(S,n,T,e.password).then(e=>{var t;let n="/ui/";n+="?userID="+((null===(t=e.data)||void 0===t?void 0:t.user_id)||e.user_id),document.cookie="token="+C,console.log("redirecting to:",n),window.location.href=n}))},children:[(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(k.Z.Item,{label:"Email Address",name:"user_email",children:(0,s.jsx)(m.Z,{type:"email",disabled:!0,value:N,defaultValue:N,className:"max-w-md"})}),(0,s.jsx)(k.Z.Item,{label:"Password",name:"password",rules:[{required:!0,message:"password required to sign up"}],help:"Create a password for your account",children:(0,s.jsx)(m.Z,{placeholder:"",type:"password",className:"max-w-md"})})]}),(0,s.jsx)("div",{className:"mt-10",children:(0,s.jsx)(j.ZP,{htmlType:"submit",children:"Sign Up"})})]})]})})}},3914:function(e,t,n){"use strict";function s(){let e=window.location.hostname,t=["Lax","Strict","None"];["/","/ui"].forEach(n=>{document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,";"),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,";"),t.forEach(t=>{let s="None"===t?" Secure;":"";document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; SameSite=").concat(t,";").concat(s),document.cookie="token=; expires=Thu, 01 Jan 1970 00:00:00 UTC; path=".concat(n,"; domain=").concat(e,"; SameSite=").concat(t,";").concat(s)})}),console.log("After clearing cookies:",document.cookie)}function o(e){let t=document.cookie.split("; ").find(t=>t.startsWith(e+"="));return t?t.split("=")[1]:null}n.d(t,{b:function(){return s},e:function(){return o}})}},function(e){e.O(0,[665,42,899,250,971,117,744],function(){return e(e.s=8672)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{20169:function(e,n,t){Promise.resolve().then(t.t.bind(t,12846,23)),Promise.resolve().then(t.t.bind(t,19107,23)),Promise.resolve().then(t.t.bind(t,61060,23)),Promise.resolve().then(t.t.bind(t,4707,23)),Promise.resolve().then(t.t.bind(t,80,23)),Promise.resolve().then(t.t.bind(t,36423,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,117],function(){return n(54278),n(20169)}),_N_E=e.O()}]);
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{10264:function(e,n,t){Promise.resolve().then(t.t.bind(t,12846,23)),Promise.resolve().then(t.t.bind(t,19107,23)),Promise.resolve().then(t.t.bind(t,61060,23)),Promise.resolve().then(t.t.bind(t,4707,23)),Promise.resolve().then(t.t.bind(t,80,23)),Promise.resolve().then(t.t.bind(t,36423,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,117],function(){return n(54278),n(10264)}),_N_E=e.O()}]);
|
|
@ -0,0 +1,5 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<svg width="46" height="46" viewBox="0 0 46 46" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<circle cx="23" cy="23" r="23" fill="white"/>
|
||||||
|
<path d="M32.73 7h-6.945L38.45 39h6.945L32.73 7ZM12.665 7 0 39h7.082l2.59-6.72h13.25l2.59 6.72h7.082L19.929 7h-7.264Zm-.702 19.337 4.334-11.246 4.334 11.246h-8.668Z" fill="#000000"></path>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 381 B |
After Width: | Height: | Size: 414 B |
34
litellm/proxy/_experimental/out/assets/logos/aws.svg
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<!-- Generator: Adobe Illustrator 26.0.3, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||||
|
<svg version="1.0" id="katman_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||||
|
viewBox="0 0 600 450" style="enable-background:new 0 0 600 450;" xml:space="preserve">
|
||||||
|
<style type="text/css">
|
||||||
|
.st0{fill:none;}
|
||||||
|
.st1{fill-rule:evenodd;clip-rule:evenodd;fill:#343B45;}
|
||||||
|
.st2{fill-rule:evenodd;clip-rule:evenodd;fill:#F4981A;}
|
||||||
|
</style>
|
||||||
|
<g id="_x31__stroke">
|
||||||
|
<g id="Amazon_1_">
|
||||||
|
<rect x="161.2" y="86.5" class="st0" width="277.8" height="277.8"/>
|
||||||
|
<g id="Amazon">
|
||||||
|
<path class="st1" d="M315,163.7c-8,0.6-17.2,1.2-26.4,2.4c-14.1,1.9-28.2,4.3-39.8,9.8c-22.7,9.2-38,28.8-38,57.6
|
||||||
|
c0,36.2,23.3,54.6,52.7,54.6c9.8,0,17.8-1.2,25.1-3.1c11.7-3.7,21.5-10.4,33.1-22.7c6.7,9.2,8.6,13.5,20.2,23.3
|
||||||
|
c3.1,1.2,6.1,1.2,8.6-0.6c7.4-6.1,20.3-17.2,27-23.3c3.1-2.5,2.5-6.1,0.6-9.2c-6.7-8.6-13.5-16-13.5-32.5V165
|
||||||
|
c0-23.3,1.9-44.8-15.3-60.7c-14.1-12.9-36.2-17.8-53.4-17.8h-7.4c-31.2,1.8-64.3,15.3-71.7,54c-1.2,4.9,2.5,6.8,4.9,7.4l34.3,4.3
|
||||||
|
c3.7-0.6,5.5-3.7,6.1-6.7c3.1-13.5,14.1-20.2,26.3-21.5h2.5c7.4,0,15.3,3.1,19.6,9.2c4.9,7.4,4.3,17.2,4.3,25.8L315,163.7
|
||||||
|
L315,163.7z M308.2,236.7c-4.3,8.6-11.7,14.1-19.6,16c-1.2,0-3.1,0.6-4.9,0.6c-13.5,0-21.4-10.4-21.4-25.8
|
||||||
|
c0-19.6,11.6-28.8,26.3-33.1c8-1.8,17.2-2.5,26.4-2.5v7.4C315,213.4,315.6,224.4,308.2,236.7z"/>
|
||||||
|
<path class="st2" d="M398.8,311.4c-1.4,0-2.8,0.3-4.1,0.9c-1.5,0.6-3,1.3-4.4,1.9l-2.1,0.9l-2.7,1.1v0
|
||||||
|
c-29.8,12.1-61.1,19.2-90.1,19.8c-1.1,0-2.1,0-3.2,0c-45.6,0-82.8-21.1-120.3-42c-1.3-0.7-2.7-1-4-1c-1.7,0-3.4,0.6-4.7,1.8
|
||||||
|
c-1.3,1.2-2,2.9-2,4.7c0,2.3,1.2,4.4,2.9,5.7c35.2,30.6,73.8,59,125.7,59c1,0,2,0,3.1,0c33-0.7,70.3-11.9,99.3-30.1l0.2-0.1
|
||||||
|
c3.8-2.3,7.6-4.9,11.2-7.7c2.2-1.6,3.8-4.2,3.8-6.9C407.2,314.6,403.2,311.4,398.8,311.4z M439,294.5L439,294.5
|
||||||
|
c-0.1-2.9-0.7-5.1-1.9-6.9l-0.1-0.2l-0.1-0.2c-1.2-1.3-2.4-1.8-3.7-2.4c-3.8-1.5-9.3-2.3-16-2.3c-4.8,0-10.1,0.5-15.4,1.6l0-0.4
|
||||||
|
l-5.3,1.8l-0.1,0l-3,1v0.1c-3.5,1.5-6.8,3.3-9.8,5.5c-1.9,1.4-3.4,3.2-3.5,6.1c0,1.5,0.7,3.3,2,4.3c1.3,1,2.8,1.4,4.1,1.4
|
||||||
|
c0.3,0,0.6,0,0.9-0.1l0.3,0l0.2,0c2.6-0.6,6.4-0.9,10.9-1.6c3.8-0.4,7.9-0.7,11.4-0.7c2.5,0,4.7,0.2,6.3,0.5
|
||||||
|
c0.8,0.2,1.3,0.4,1.6,0.5c0.1,0,0.2,0.1,0.2,0.1c0.1,0.2,0.2,0.8,0.1,1.5c0,2.9-1.2,8.4-2.9,13.7c-1.7,5.3-3.7,10.7-5,14.2
|
||||||
|
c-0.3,0.8-0.5,1.7-0.5,2.7c0,1.4,0.6,3.2,1.8,4.3c1.2,1.1,2.8,1.6,4.1,1.6h0.1c2,0,3.6-0.8,5.1-1.9
|
||||||
|
c13.6-12.2,18.3-31.7,18.5-42.6L439,294.5z"/>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 2.5 KiB |
1
litellm/proxy/_experimental/out/assets/logos/bedrock.svg
Normal file
|
@ -0,0 +1 @@
|
||||||
|
<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Bedrock</title><defs><linearGradient id="lobe-icons-bedrock-fill" x1="80%" x2="20%" y1="20%" y2="80%"><stop offset="0%" stop-color="#6350FB"></stop><stop offset="50%" stop-color="#3D8FFF"></stop><stop offset="100%" stop-color="#9AD8F8"></stop></linearGradient></defs><path d="M13.05 15.513h3.08c.214 0 .389.177.389.394v1.82a1.704 1.704 0 011.296 1.661c0 .943-.755 1.708-1.685 1.708-.931 0-1.686-.765-1.686-1.708 0-.807.554-1.484 1.297-1.662v-1.425h-2.69v4.663a.395.395 0 01-.188.338l-2.69 1.641a.385.385 0 01-.405-.002l-4.926-3.086a.395.395 0 01-.185-.336V16.3L2.196 14.87A.395.395 0 012 14.555L2 14.528V9.406c0-.14.073-.27.192-.34l2.465-1.462V4.448c0-.129.062-.249.165-.322l.021-.014L9.77 1.058a.385.385 0 01.407 0l2.69 1.675a.395.395 0 01.185.336V7.6h3.856V5.683a1.704 1.704 0 01-1.296-1.662c0-.943.755-1.708 1.685-1.708.931 0 1.685.765 1.685 1.708 0 .807-.553 1.484-1.296 1.662v2.311a.391.391 0 01-.389.394h-4.245v1.806h6.624a1.69 1.69 0 011.64-1.313c.93 0 1.685.764 1.685 1.707 0 .943-.754 1.708-1.685 1.708a1.69 1.69 0 01-1.64-1.314H13.05v1.937h4.953l.915 1.18a1.66 1.66 0 01.84-.227c.931 0 1.685.764 1.685 1.707 0 .943-.754 1.708-1.685 1.708-.93 0-1.685-.765-1.685-1.708 0-.346.102-.668.276-.937l-.724-.935H13.05v1.806zM9.973 1.856L7.93 3.122V6.09h-.778V3.604L5.435 4.669v2.945l2.11 1.36L9.712 7.61V5.334h.778V7.83c0 .136-.07.263-.184.335L7.963 9.638v2.081l1.422 1.009-.446.646-1.406-.998-1.53 1.005-.423-.66 1.605-1.055v-1.99L5.038 8.29l-2.26 1.34v1.676l1.972-1.189.398.677-2.37 1.429V14.3l2.166 1.258 2.27-1.368.397.677-2.176 1.311V19.3l1.876 1.175 2.365-1.426.398.678-2.017 1.216 1.918 1.201 2.298-1.403v-5.78l-4.758 2.893-.4-.675 5.158-3.136V3.289L9.972 1.856zM16.13 18.47a.913.913 0 00-.908.92c0 .507.406.918.908.918a.913.913 0 00.907-.919.913.913 0 00-.907-.92zm3.63-3.81a.913.913 0 00-.908.92c0 .508.406.92.907.92a.913.913 0 00.908-.92.913.913 0 00-.908-.92zm1.555-4.99a.913.913 0 00-.908.92c0 .507.407.918.908.918a.913.913 0 00.907-.919.913.913 0 00-.907-.92zM17.296 3.1a.913.913 0 00-.907.92c0 .508.406.92.907.92a.913.913 0 00.908-.92.913.913 0 00-.908-.92z" fill="url(#lobe-icons-bedrock-fill)" fill-rule="nonzero"></path></svg>
|
After Width: | Height: | Size: 2.2 KiB |
89
litellm/proxy/_experimental/out/assets/logos/cerebras.svg
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<!-- Generator: Adobe Illustrator 26.0.3, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||||
|
<svg version="1.0" id="katman_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||||
|
viewBox="0 0 800 600" style="enable-background:new 0 0 800 600;" xml:space="preserve">
|
||||||
|
<style type="text/css">
|
||||||
|
.st0{fill-rule:evenodd;clip-rule:evenodd;fill:#F05A28;}
|
||||||
|
.st1{fill-rule:evenodd;clip-rule:evenodd;fill:#231F20;}
|
||||||
|
</style>
|
||||||
|
<g id="Contact">
|
||||||
|
<g id="Contact-us" transform="translate(-234.000000, -1114.000000)">
|
||||||
|
<g id="map" transform="translate(-6.000000, 1027.000000)">
|
||||||
|
<g id="Contact-box" transform="translate(190.000000, 36.000000)">
|
||||||
|
<g id="Group-26" transform="translate(50.000000, 51.000000)">
|
||||||
|
<g id="Group-3">
|
||||||
|
<path id="Fill-1" class="st0" d="M220.9,421c-17,0-33.1-3.4-47.8-9.5c-22-9.2-40.8-24.6-54.1-44c-13.3-19.4-21-42.7-21-67.9
|
||||||
|
c0-16.8,3.4-32.7,9.7-47.3c9.3-21.8,24.9-40.3,44.5-53.4c19.6-13.1,43.2-20.7,68.7-20.7v-18.3c-19.5,0-38.1,3.9-55.1,11
|
||||||
|
c-25.4,10.6-47,28.3-62.2,50.6c-15.3,22.3-24.2,49.2-24.2,78.1c0,19.3,4,37.7,11.1,54.4c10.7,25.1,28.7,46.4,51.2,61.5
|
||||||
|
c22.6,15.1,49.8,23.9,79.1,23.9V421z"/>
|
||||||
|
<path id="Fill-4" class="st0" d="M157.9,374.1c-11.5-9.6-20.1-21.2-25.9-33.9c-5.8-12.7-8.8-26.4-8.8-40.2
|
||||||
|
c0-11,1.9-22,5.6-32.5c3.8-10.5,9.4-20.5,17.1-29.6c9.6-11.4,21.3-20,34-25.8c12.7-5.8,26.6-8.7,40.4-8.7
|
||||||
|
c11,0,22.1,1.9,32.6,5.6c10.6,3.8,20.6,9.4,29.7,17l11.9-14.1c-10.8-9-22.8-15.8-35.4-20.2c-12.6-4.5-25.7-6.7-38.8-6.7
|
||||||
|
c-16.5,0-32.9,3.5-48.1,10.4c-15.2,6.9-29.1,17.2-40.5,30.7c-9.1,10.8-15.8,22.7-20.3,35.2c-4.5,12.5-6.7,25.6-6.7,38.7
|
||||||
|
c0,16.4,3.5,32.8,10.4,47.9c6.9,15.1,17.3,29,30.9,40.3L157.9,374.1z"/>
|
||||||
|
<path id="Fill-6" class="st0" d="M186.4,362.2c-12.1-6.4-21.6-15.7-28.1-26.6c-6.5-10.9-9.9-23.5-9.9-36.2
|
||||||
|
c0-11.2,2.6-22.5,8.3-33c6.4-12.1,15.8-21.5,26.8-27.9c11-6.5,23.6-9.9,36.4-9.9c11.2,0,22.6,2.6,33.2,8.2l8.6-16.3
|
||||||
|
c-13.3-7-27.7-10.4-41.9-10.3c-16.1,0-32,4.3-45.8,12.4c-13.8,8.1-25.7,20.1-33.7,35.2c-7,13.3-10.4,27.6-10.4,41.6
|
||||||
|
c0,16,4.3,31.8,12.5,45.5c8.2,13.8,20.2,25.5,35.4,33.5L186.4,362.2z"/>
|
||||||
|
<path id="Fill-8" class="st0" d="M221,344.6c-6.3,0-12.3-1.3-17.7-3.6c-8.2-3.4-15.1-9.2-20-16.5c-4.9-7.3-7.8-16-7.8-25.4
|
||||||
|
c0-6.3,1.3-12.3,3.6-17.7c3.4-8.1,9.2-15.1,16.5-20c7.3-4.9,16-7.8,25.4-7.8v-18.4c-8.8,0-17.2,1.8-24.9,5
|
||||||
|
c-11.5,4.9-21.2,12.9-28.1,23.1C161,273.6,157,286,157,299.2c0,8.8,1.8,17.2,5,24.9c4.9,11.5,13,21.2,23.2,28.1
|
||||||
|
C195.4,359,207.7,363,221,363V344.6z"/>
|
||||||
|
</g>
|
||||||
|
<g id="Group" transform="translate(22.000000, 13.000000)">
|
||||||
|
<path id="Fill-10" class="st1" d="M214,271.6c-2.1-2.2-4.4-4-6.7-5.3c-2.3-1.3-4.7-2-7.2-2c-3.4,0-6.3,0.6-9,1.8
|
||||||
|
c-2.6,1.2-4.9,2.8-6.8,4.9c-1.9,2-3.3,4.4-4.3,7c-1,2.6-1.4,5.4-1.4,8.2c0,2.8,0.5,5.6,1.4,8.2c1,2.6,2.4,5,4.3,7
|
||||||
|
c1.9,2,4.1,3.7,6.8,4.9c2.6,1.2,5.6,1.8,9,1.8c2.8,0,5.5-0.6,7.9-1.7c2.4-1.2,4.5-2.9,6.2-5.1l12.2,13.1
|
||||||
|
c-1.8,1.8-3.9,3.4-6.3,4.7c-2.4,1.3-4.8,2.4-7.2,3.2s-4.8,1.4-7,1.7c-2.2,0.4-4.2,0.5-5.8,0.5c-5.5,0-10.7-0.9-15.5-2.7
|
||||||
|
c-4.9-1.8-9.1-4.4-12.6-7.8c-3.6-3.3-6.4-7.4-8.5-12.1c-2.1-4.7-3.1-10-3.1-15.7c0-5.8,1-11,3.1-15.7
|
||||||
|
c2.1-4.7,4.9-8.7,8.5-12.1c3.6-3.3,7.8-5.9,12.6-7.8c4.9-1.8,10.1-2.7,15.5-2.7c4.7,0,9.4,0.9,14.1,2.7
|
||||||
|
c4.7,1.8,8.9,4.6,12.4,8.4L214,271.6z"/>
|
||||||
|
<path id="Fill-12" class="st1" d="M280.4,278.9c-0.1-5.4-1.8-9.6-5-12.7c-3.3-3.1-7.8-4.6-13.6-4.6c-5.5,0-9.8,1.6-13,4.7
|
||||||
|
c-3.2,3.1-5.2,7.4-5.9,12.6H280.4z M243,292.6c0.6,5.5,2.7,9.7,6.4,12.8c3.7,3,8.1,4.6,13.3,4.6c4.6,0,8.4-0.9,11.5-2.8
|
||||||
|
c3.1-1.9,5.8-4.2,8.2-7.1l13.1,9.9c-4.3,5.3-9,9-14.3,11.3c-5.3,2.2-10.8,3.3-16.6,3.3c-5.5,0-10.7-0.9-15.5-2.7
|
||||||
|
c-4.9-1.8-9.1-4.4-12.6-7.8c-3.6-3.3-6.4-7.4-8.5-12.1c-2.1-4.7-3.1-10-3.1-15.7c0-5.8,1-11,3.1-15.7
|
||||||
|
c2.1-4.7,4.9-8.7,8.5-12.1c3.6-3.3,7.8-5.9,12.6-7.8c4.9-1.8,10.1-2.7,15.5-2.7c5.1,0,9.7,0.9,13.9,2.7
|
||||||
|
c4.2,1.8,7.8,4.3,10.8,7.7c3,3.3,5.3,7.5,7,12.4c1.7,4.9,2.5,10.6,2.5,17v5H243z"/>
|
||||||
|
<path id="Fill-14" class="st1" d="M306.5,249.7h18.3v11.5h0.3c2-4.3,4.9-7.5,8.7-9.9c3.8-2.3,8.1-3.5,12.9-3.5
|
||||||
|
c1.1,0,2.2,0.1,3.3,0.3c1.1,0.2,2.2,0.5,3.3,0.8v17.6c-1.5-0.4-3-0.7-4.5-1c-1.5-0.3-2.9-0.4-4.3-0.4c-4.3,0-7.7,0.8-10.3,2.4
|
||||||
|
c-2.6,1.6-4.6,3.4-5.9,5.4c-1.4,2-2.3,4.1-2.7,6.1c-0.5,2-0.7,3.5-0.7,4.6v39h-18.3V249.7z"/>
|
||||||
|
<path id="Fill-16" class="st1" d="M409,278.9c-0.1-5.4-1.8-9.6-5-12.7c-3.3-3.1-7.8-4.6-13.6-4.6c-5.5,0-9.8,1.6-13,4.7
|
||||||
|
c-3.2,3.1-5.2,7.4-5.9,12.6H409z M371.6,292.6c0.6,5.5,2.7,9.7,6.4,12.8c3.7,3,8.1,4.6,13.3,4.6c4.6,0,8.4-0.9,11.5-2.8
|
||||||
|
c3.1-1.9,5.8-4.2,8.2-7.1l13.1,9.9c-4.3,5.3-9,9-14.3,11.3c-5.3,2.2-10.8,3.3-16.6,3.3c-5.5,0-10.7-0.9-15.5-2.7
|
||||||
|
c-4.9-1.8-9.1-4.4-12.6-7.8c-3.6-3.3-6.4-7.4-8.5-12.1c-2.1-4.7-3.1-10-3.1-15.7c0-5.8,1-11,3.1-15.7
|
||||||
|
c2.1-4.7,4.9-8.7,8.5-12.1c3.6-3.3,7.8-5.9,12.6-7.8c4.9-1.8,10.1-2.7,15.5-2.7c5.1,0,9.7,0.9,13.9,2.7
|
||||||
|
c4.2,1.8,7.8,4.3,10.8,7.7c3,3.3,5.3,7.5,7,12.4c1.7,4.9,2.5,10.6,2.5,17v5H371.6z"/>
|
||||||
|
<path id="Fill-18" class="st1" d="M494.6,286.2c0-2.8-0.5-5.6-1.5-8.2c-1-2.6-2.4-5-4.3-7c-1.9-2-4.2-3.7-6.9-4.9
|
||||||
|
c-2.7-1.2-5.7-1.8-9.1-1.8c-3.4,0-6.4,0.6-9.1,1.8c-2.7,1.2-5,2.8-6.9,4.9c-1.9,2-3.3,4.4-4.3,7c-1,2.6-1.5,5.4-1.5,8.2
|
||||||
|
c0,2.8,0.5,5.6,1.5,8.2c1,2.6,2.4,5,4.3,7c1.9,2,4.2,3.7,6.9,4.9c2.7,1.2,5.7,1.8,9.1,1.8c3.4,0,6.4-0.6,9.1-1.8
|
||||||
|
c2.7-1.2,5-2.8,6.9-4.9c1.9-2,3.3-4.4,4.3-7C494.1,291.8,494.6,289,494.6,286.2L494.6,286.2z M433.2,207.6h18.5v51.3h0.5
|
||||||
|
c0.9-1.2,2.1-2.5,3.5-3.7c1.4-1.3,3.2-2.5,5.2-3.6c2.1-1.1,4.4-2,7.1-2.7c2.7-0.7,5.8-1.1,9.3-1.1c5.2,0,10.1,1,14.5,3
|
||||||
|
c4.4,2,8.2,4.7,11.3,8.1c3.1,3.5,5.6,7.5,7.3,12.2c1.7,4.7,2.6,9.7,2.6,15.1c0,5.4-0.8,10.4-2.5,15.1
|
||||||
|
c-1.6,4.7-4.1,8.7-7.2,12.2c-3.2,3.5-7,6.2-11.6,8.1c-4.5,2-9.6,3-15.3,3c-5.2,0-10.1-1-14.7-3c-4.5-2-8.1-5.3-10.8-9.7h-0.3
|
||||||
|
v11h-17.6V207.6z"/>
|
||||||
|
<path id="Fill-20" class="st1" d="M520.9,249.7h18.3v11.5h0.3c2-4.3,4.9-7.5,8.7-9.9c3.8-2.3,8.1-3.5,12.9-3.5
|
||||||
|
c1.1,0,2.2,0.1,3.3,0.3c1.1,0.2,2.2,0.5,3.3,0.8v17.6c-1.5-0.4-3-0.7-4.5-1c-1.5-0.3-2.9-0.4-4.3-0.4c-4.3,0-7.7,0.8-10.3,2.4
|
||||||
|
c-2.6,1.6-4.6,3.4-5.9,5.4c-1.4,2-2.3,4.1-2.7,6.1c-0.5,2-0.7,3.5-0.7,4.6v39h-18.3V249.7z"/>
|
||||||
|
<path id="Fill-22" class="st1" d="M616,290h-3.9c-2.6,0-5.5,0.1-8.7,0.3c-3.2,0.2-6.2,0.7-9.1,1.4c-2.8,0.8-5.2,1.9-7.2,3.3
|
||||||
|
c-2,1.5-2.9,3.5-2.9,6.2c0,1.7,0.4,3.2,1.2,4.3c0.8,1.2,1.8,2.2,3,3c1.2,0.8,2.6,1.4,4.2,1.8c1.5,0.4,3.1,0.5,4.6,0.5
|
||||||
|
c6.4,0,11.1-1.5,14.2-4.5c3-3,4.6-7.1,4.6-12.2V290z M617.1,312.7h-0.5c-2.7,4.2-6.1,7.2-10.2,9.1c-4.1,1.9-8.7,2.8-13.6,2.8
|
||||||
|
c-3.4,0-6.7-0.5-10-1.4s-6.1-2.3-8.7-4.1c-2.5-1.8-4.6-4.1-6.1-6.8s-2.3-5.9-2.3-9.6c0-4,0.7-7.3,2.2-10.1
|
||||||
|
c1.4-2.8,3.4-5.1,5.8-7c2.4-1.9,5.2-3.4,8.4-4.5c3.2-1.1,6.5-2,10-2.5c3.5-0.6,6.9-0.9,10.5-1.1c3.5-0.2,6.8-0.2,9.9-0.2h4.6
|
||||||
|
v-2c0-4.6-1.6-8-4.8-10.3c-3.2-2.3-7.3-3.4-12.2-3.4c-3.9,0-7.6,0.7-11,2.1c-3.4,1.4-6.4,3.2-8.8,5.6l-9.8-9.6
|
||||||
|
c4.1-4.2,9-7.1,14.5-9c5.5-1.8,11.2-2.7,17.1-2.7c5.3,0,9.7,0.6,13.3,1.7c3.6,1.2,6.6,2.7,9,4.5c2.4,1.8,4.2,3.9,5.5,6.3
|
||||||
|
c1.3,2.4,2.2,4.8,2.8,7.2c0.6,2.4,0.9,4.8,1,7.1c0.1,2.3,0.2,4.3,0.2,6v42h-16.7V312.7z"/>
|
||||||
|
<path id="Fill-24" class="st1" d="M683.6,269.9c-3.6-5-8.4-7.5-14.4-7.5c-2.5,0-4.9,0.6-7.2,1.8c-2.4,1.2-3.5,3.2-3.5,5.9
|
||||||
|
c0,2.2,1,3.9,2.9,4.9c1.9,1,4.4,1.9,7.4,2.6c3,0.7,6.2,1.4,9.6,2.2c3.4,0.8,6.6,1.9,9.6,3.5c3,1.6,5.4,3.7,7.4,6.5
|
||||||
|
c1.9,2.7,2.9,6.5,2.9,11.3c0,4.4-0.9,8-2.8,11c-1.9,3-4.3,5.4-7.4,7.2c-3,1.8-6.4,3.1-10.2,4c-3.8,0.8-7.6,1.2-11.3,1.2
|
||||||
|
c-5.7,0-11-0.8-15.8-2.4c-4.8-1.6-9.1-4.6-12.9-8.8l12.3-11.4c2.4,2.6,4.9,4.8,7.6,6.5c2.7,1.7,6,2.5,9.9,2.5
|
||||||
|
c1.3,0,2.7-0.2,4.1-0.5c1.4-0.3,2.8-0.8,4-1.5c1.2-0.7,2.2-1.6,3-2.7c0.8-1.1,1.1-2.3,1.1-3.7c0-2.5-1-4.4-2.9-5.6
|
||||||
|
c-1.9-1.2-4.4-2.2-7.4-3c-3-0.8-6.2-1.5-9.6-2.1c-3.4-0.7-6.6-1.7-9.6-3.2c-3-1.5-5.4-3.5-7.4-6.2c-1.9-2.6-2.9-6.3-2.9-11
|
||||||
|
c0-4.1,0.8-7.6,2.5-10.6c1.7-3,3.9-5.4,6.7-7.4c2.8-1.9,5.9-3.3,9.5-4.3c3.6-0.9,7.2-1.4,10.9-1.4c4.9,0,9.8,0.8,14.6,2.5
|
||||||
|
c4.8,1.7,8.7,4.5,11.7,8.6L683.6,269.9z"/>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 8 KiB |
1
litellm/proxy/_experimental/out/assets/logos/cohere.svg
Normal file
|
@ -0,0 +1 @@
|
||||||
|
<svg xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" xml:space="preserve" style="enable-background:new 0 0 75 75" viewBox="0 0 75 75" width="75" height="75" ><path d="M24.3 44.7c2 0 6-.1 11.6-2.4 6.5-2.7 19.3-7.5 28.6-12.5 6.5-3.5 9.3-8.1 9.3-14.3C73.8 7 66.9 0 58.3 0h-36C10 0 0 10 0 22.3s9.4 22.4 24.3 22.4z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#39594d"/><path d="M30.4 60c0-6 3.6-11.5 9.2-13.8l11.3-4.7C62.4 36.8 75 45.2 75 57.6 75 67.2 67.2 75 57.6 75H45.3c-8.2 0-14.9-6.7-14.9-15z" style="fill-rule:evenodd;clip-rule:evenodd;fill:#d18ee2"/><path d="M12.9 47.6C5.8 47.6 0 53.4 0 60.5v1.7C0 69.2 5.8 75 12.9 75c7.1 0 12.9-5.8 12.9-12.9v-1.7c-.1-7-5.8-12.8-12.9-12.8z" style="fill:#ff7759"/></svg>
|
After Width: | Height: | Size: 742 B |
|
@ -0,0 +1 @@
|
||||||
|
<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>DBRX</title><path d="M21.821 9.894l-9.81 5.595L1.505 9.511 1 9.787v4.34l11.01 6.256 9.811-5.574v2.297l-9.81 5.596-10.506-5.979L1 17v.745L12.01 24 23 17.745v-4.34l-.505-.277-10.484 5.957-9.832-5.574v-2.298l9.832 5.574L23 10.532V6.255l-.547-.319-10.442 5.936-9.327-5.276 9.327-5.298 7.663 4.362.673-.383v-.532L12.011 0 1 6.255v.681l11.01 6.255 9.811-5.595z" fill="#EE3D2C" fill-rule="nonzero"></path></svg>
|
After Width: | Height: | Size: 528 B |
25
litellm/proxy/_experimental/out/assets/logos/deepseek.svg
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<!-- Generator: Adobe Illustrator 25.4.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||||
|
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||||
|
viewBox="0 0 292.6 215.3" style="enable-background:new 0 0 292.6 215.3;" xml:space="preserve">
|
||||||
|
<style type="text/css">
|
||||||
|
.st0{fill:#566AB2;}
|
||||||
|
</style>
|
||||||
|
<path class="st0" d="M191.3,123.7c-2.4,1-4.9,1.8-7.2,1.9c-3.6,0.2-7.6-1.3-9.7-3.1c-3.3-2.8-5.7-4.4-6.7-9.2
|
||||||
|
c-0.4-2.1-0.2-5.3,0.2-7.2c0.9-4-0.1-6.5-2.9-8.9c-2.3-1.9-5.2-2.4-8.4-2.4s-2.3-0.5-3.1-1c-1.3-0.7-2.4-2.3-1.4-4.4
|
||||||
|
c0.3-0.7,2-2.3,2.3-2.5c4.3-2.5,9.4-1.7,14,0.2c4.3,1.7,7.5,5,12.2,9.5c4.8,5.5,5.6,7,8.4,11.1c2.1,3.2,4.1,6.6,5.4,10.4
|
||||||
|
C195.2,120.5,194.2,122.4,191.3,123.7L191.3,123.7z M153.4,104.3c0-2.1,1.7-3.7,3.8-3.7s0.9,0.1,1.3,0.2c0.5,0.2,1,0.5,1.4,0.9
|
||||||
|
c0.7,0.7,1.1,1.6,1.1,2.6c0,2.1-1.7,3.8-3.8,3.8s-3.7-1.7-3.7-3.8H153.4z M141.2,182.8c-25.5-20-37.8-26.6-42.9-26.3
|
||||||
|
c-4.8,0.3-3.9,5.7-2.8,9.3c1.1,3.5,2.5,5.9,4.5,9c1.4,2,2.3,5.1-1.4,7.3c-8.2,5.1-22.5-1.7-23.1-2c-16.6-9.8-30.5-22.7-40.2-40.3
|
||||||
|
c-9.5-17-14.9-35.2-15.8-54.6c-0.2-4.7,1.1-6.4,5.8-7.2c6.2-1.1,12.5-1.4,18.7-0.5c26,3.8,48.1,15.4,66.7,33.8
|
||||||
|
c10.6,10.5,18.6,23,26.8,35.2c8.8,13,18.2,25.4,30.2,35.5c4.3,3.6,7.6,6.3,10.9,8.2c-9.8,1.1-26.1,1.3-37.2-7.5L141.2,182.8z
|
||||||
|
M289.5,18c-3.1-1.5-4.4,1.4-6.3,2.8c-0.6,0.5-1.1,1.1-1.7,1.7c-4.5,4.8-9.8,8-16.8,7.6c-10.1-0.6-18.7,2.6-26.4,10.4
|
||||||
|
c-1.6-9.5-7-15.2-15.2-18.9c-4.3-1.9-8.6-3.8-11.6-7.9c-2.1-2.9-2.7-6.2-3.7-9.4c-0.7-2-1.3-3.9-3.6-4.3c-2.4-0.4-3.4,1.7-4.3,3.4
|
||||||
|
c-3.8,7-5.3,14.6-5.2,22.4c0.3,17.5,7.7,31.5,22.4,41.4c1.7,1.1,2.1,2.3,1.6,3.9c-1,3.4-2.2,6.7-3.3,10.1c-0.7,2.2-1.7,2.7-4,1.7
|
||||||
|
c-8.1-3.4-15-8.4-21.2-14.4c-10.4-10.1-19.9-21.2-31.6-30c-2.8-2.1-5.5-4-8.4-5.7c-12-11.7,1.6-21.3,4.7-22.4
|
||||||
|
c3.3-1.2,1.2-5.3-9.5-5.2c-10.6,0-20.3,3.6-32.8,8.4c-1.8,0.7-3.7,1.2-5.7,1.7c-11.3-2.1-22.9-2.6-35.1-1.2
|
||||||
|
c-23,2.5-41.4,13.4-54.8,32C1,68.3-2.8,93.6,1.9,120c4.9,27.8,19.1,50.9,41,68.9c22.6,18.7,48.7,27.8,78.5,26.1
|
||||||
|
c18.1-1,38.2-3.5,60.9-22.7c5.7,2.8,11.7,4,21.7,4.8c7.7,0.7,15.1-0.4,20.8-1.5c9-1.9,8.4-10.2,5.1-11.7
|
||||||
|
c-26.3-12.3-20.5-7.3-25.7-11.3c13.3-15.8,33.5-32.2,41.3-85.4c0.6-4.2,0.1-6.9,0-10.3c0-2.1,0.4-2.9,2.8-3.1
|
||||||
|
c6.6-0.8,13-2.6,18.8-5.8c17-9.3,23.9-24.6,25.5-42.9c0.2-2.8,0-5.7-3-7.2L289.5,18z"/>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 2.3 KiB |
|
@ -0,0 +1 @@
|
||||||
|
<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Fireworks</title><path clip-rule="evenodd" d="M14.8 5l-2.801 6.795L9.195 5H7.397l3.072 7.428a1.64 1.64 0 003.038.002L16.598 5H14.8zm1.196 10.352l5.124-5.244-.699-1.669-5.596 5.739a1.664 1.664 0 00-.343 1.807 1.642 1.642 0 001.516 1.012L16 17l8-.02-.699-1.669-7.303.041h-.002zM2.88 10.104l.699-1.669 5.596 5.739c.468.479.603 1.189.343 1.807a1.643 1.643 0 01-1.516 1.012l-8-.018-.002.002.699-1.669 7.303.042-5.122-5.246z" fill="#5019C5" fill-rule="evenodd"></path></svg>
|
After Width: | Height: | Size: 592 B |
2
litellm/proxy/_experimental/out/assets/logos/google.svg
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<svg viewBox="0 0 16 16" xmlns="http://www.w3.org/2000/svg" fill="none"><path fill="#4285F4" d="M14.9 8.161c0-.476-.039-.954-.121-1.422h-6.64v2.695h3.802a3.24 3.24 0 01-1.407 2.127v1.75h2.269c1.332-1.22 2.097-3.02 2.097-5.15z"/><path fill="#34A853" d="M8.14 15c1.898 0 3.499-.62 4.665-1.69l-2.268-1.749c-.631.427-1.446.669-2.395.669-1.836 0-3.393-1.232-3.952-2.888H1.85v1.803A7.044 7.044 0 008.14 15z"/><path fill="#FBBC04" d="M4.187 9.342a4.17 4.17 0 010-2.68V4.859H1.849a6.97 6.97 0 000 6.286l2.338-1.803z"/><path fill="#EA4335" d="M8.14 3.77a3.837 3.837 0 012.7 1.05l2.01-1.999a6.786 6.786 0 00-4.71-1.82 7.042 7.042 0 00-6.29 3.858L4.186 6.66c.556-1.658 2.116-2.89 3.952-2.89z"/></svg>
|
After Width: | Height: | Size: 728 B |
3
litellm/proxy/_experimental/out/assets/logos/groq.svg
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8" ?>
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 26.3 26.3"><defs><style>.cls-1{fill:#f05237;}.cls-2{fill:#fff;}</style></defs><g id="Layer_2" data-name="Layer 2"><g id="Content"><circle class="cls-1" cx="13.15" cy="13.15" r="13.15"/><path class="cls-2" d="M13.17,6.88a4.43,4.43,0,0,0,0,8.85h1.45V14.07H13.17a2.77,2.77,0,1,1,2.77-2.76v4.07a2.74,2.74,0,0,1-4.67,2L10.1,18.51a4.37,4.37,0,0,0,3.07,1.29h.06a4.42,4.42,0,0,0,4.36-4.4V11.2a4.43,4.43,0,0,0-4.42-4.32"/></g></g></svg>
|
After Width: | Height: | Size: 619 B |
After Width: | Height: | Size: 7.2 KiB |
1
litellm/proxy/_experimental/out/assets/logos/mistral.svg
Normal file
|
@ -0,0 +1 @@
|
||||||
|
<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>Mistral</title><path d="M3.428 3.4h3.429v3.428H3.428V3.4zm13.714 0h3.43v3.428h-3.43V3.4z" fill="gold"></path><path d="M3.428 6.828h6.857v3.429H3.429V6.828zm10.286 0h6.857v3.429h-6.857V6.828z" fill="#FFAF00"></path><path d="M3.428 10.258h17.144v3.428H3.428v-3.428z" fill="#FF8205"></path><path d="M3.428 13.686h3.429v3.428H3.428v-3.428zm6.858 0h3.429v3.428h-3.429v-3.428zm6.856 0h3.43v3.428h-3.43v-3.428z" fill="#FA500F"></path><path d="M0 17.114h10.286v3.429H0v-3.429zm13.714 0H24v3.429H13.714v-3.429z" fill="#E10500"></path></svg>
|
After Width: | Height: | Size: 655 B |
7
litellm/proxy/_experimental/out/assets/logos/ollama.svg
Normal file
After Width: | Height: | Size: 8.4 KiB |
|
@ -0,0 +1,5 @@
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<svg fill="#000000" viewBox="-2 -2 28 28" role="img" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<circle cx="12" cy="12" r="14" fill="white" />
|
||||||
|
<path d="M22.2819 9.8211a5.9847 5.9847 0 0 0-.5157-4.9108 6.0462 6.0462 0 0 0-6.5098-2.9A6.0651 6.0651 0 0 0 4.9807 4.1818a5.9847 5.9847 0 0 0-3.9977 2.9 6.0462 6.0462 0 0 0 .7427 7.0966 5.98 5.98 0 0 0 .511 4.9107 6.051 6.051 0 0 0 6.5146 2.9001A5.9847 5.9847 0 0 0 13.2599 24a6.0557 6.0557 0 0 0 5.7718-4.2058 5.9894 5.9894 0 0 0 3.9977-2.9001 6.0557 6.0557 0 0 0-.7475-7.0729zm-9.022 12.6081a4.4755 4.4755 0 0 1-2.8764-1.0408l.1419-.0804 4.7783-2.7582a.7948.7948 0 0 0 .3927-.6813v-6.7369l2.02 1.1686a.071.071 0 0 1 .038.052v5.5826a4.504 4.504 0 0 1-4.4945 4.4944zm-9.6607-4.1254a4.4708 4.4708 0 0 1-.5346-3.0137l.142.0852 4.783 2.7582a.7712.7712 0 0 0 .7806 0l5.8428-3.3685v2.3324a.0804.0804 0 0 1-.0332.0615L9.74 19.9502a4.4992 4.4992 0 0 1-6.1408-1.6464zM2.3408 7.8956a4.485 4.485 0 0 1 2.3655-1.9728V11.6a.7664.7664 0 0 0 .3879.6765l5.8144 3.3543-2.0201 1.1685a.0757.0757 0 0 1-.071 0l-4.8303-2.7865A4.504 4.504 0 0 1 2.3408 7.872zm16.5963 3.8558L13.1038 8.364 15.1192 7.2a.0757.0757 0 0 1 .071 0l4.8303 2.7913a4.4944 4.4944 0 0 1-.6765 8.1042v-5.6772a.79.79 0 0 0-.407-.667zm2.0107-3.0231l-.142-.0852-4.7735-2.7818a.7759.7759 0 0 0-.7854 0L9.409 9.2297V6.8974a.0662.0662 0 0 1 .0284-.0615l4.8303-2.7866a4.4992 4.4992 0 0 1 6.6802 4.66zM8.3065 12.863l-2.02-1.1638a.0804.0804 0 0 1-.038-.0567V6.0742a4.4992 4.4992 0 0 1 7.3757-3.4537l-.142.0805L8.704 5.459a.7948.7948 0 0 0-.3927.6813zm1.0976-2.3654l2.602-1.4998 2.6069 1.4998v2.9994l-2.5974 1.4997-2.6067-1.4997Z"/>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 1.6 KiB |
39
litellm/proxy/_experimental/out/assets/logos/openrouter.svg
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<svg id="Layer_1" xmlns="http://www.w3.org/2000/svg" version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 300 300">
|
||||||
|
<!-- Generator: Adobe Illustrator 29.2.1, SVG Export Plug-In . SVG Version: 2.1.0 Build 116) -->
|
||||||
|
<defs>
|
||||||
|
<style>
|
||||||
|
.st0 {
|
||||||
|
fill: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.st1 {
|
||||||
|
stroke-width: 52.7px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.st1, .st2 {
|
||||||
|
stroke: #000;
|
||||||
|
stroke-miterlimit: 2.3;
|
||||||
|
}
|
||||||
|
|
||||||
|
.st2 {
|
||||||
|
stroke-width: .6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.st3 {
|
||||||
|
clip-path: url(#clippath);
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<clipPath id="clippath">
|
||||||
|
<rect class="st0" width="300" height="300"/>
|
||||||
|
</clipPath>
|
||||||
|
</defs>
|
||||||
|
<g class="st3">
|
||||||
|
<g>
|
||||||
|
<path class="st1" d="M1.8,145.9c8.8,0,42.8-7.6,60.4-17.5s17.6-10,53.9-35.7c46-32.6,78.5-21.7,131.8-21.7"/>
|
||||||
|
<path class="st2" d="M299.4,71.2l-90.1,52V19.2l90.1,52Z"/>
|
||||||
|
<path class="st1" d="M0,145.9c8.8,0,42.8,7.6,60.4,17.5s17.6,10,53.9,35.7c46,32.6,78.5,21.7,131.8,21.7"/>
|
||||||
|
<path class="st2" d="M297.7,220.6l-90.1-52v104l90.1-52Z"/>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 1.1 KiB |
|
@ -0,0 +1,16 @@
|
||||||
|
<?xml version="1.0" encoding="iso-8859-1"?>
|
||||||
|
<!-- Generator: Adobe Illustrator 26.1.0, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||||
|
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||||
|
viewBox="0 0 48 48" style="enable-background:new 0 0 48 48;" xml:space="preserve">
|
||||||
|
<linearGradient id="SVGID_1_" gradientUnits="userSpaceOnUse" x1="10.5862" y1="1.61" x2="36.0543" y2="44.1206">
|
||||||
|
<stop offset="0.002" style="stop-color:#9C55D4"/>
|
||||||
|
<stop offset="0.003" style="stop-color:#20808D"/>
|
||||||
|
<stop offset="0.3731" style="stop-color:#218F9B"/>
|
||||||
|
<stop offset="1" style="stop-color:#22B1BC"/>
|
||||||
|
</linearGradient>
|
||||||
|
<path style="fill-rule:evenodd;clip-rule:evenodd;fill:url(#SVGID_1_);" d="M11.469,4l11.39,10.494v-0.002V4.024h2.217v10.517
|
||||||
|
L36.518,4v11.965h4.697v17.258h-4.683v10.654L25.077,33.813v10.18h-2.217V33.979L11.482,44V33.224H6.785V15.965h4.685V4z
|
||||||
|
M21.188,18.155H9.002v12.878h2.477v-4.062L21.188,18.155z M13.699,27.943v11.17l9.16-8.068V19.623L13.699,27.943z M25.141,30.938
|
||||||
|
V19.612l9.163,8.321v5.291h0.012v5.775L25.141,30.938z M36.532,31.033h2.466V18.155H26.903l9.629,8.725V31.033z M34.301,15.965
|
||||||
|
V9.038l-7.519,6.927H34.301z M21.205,15.965h-7.519V9.038L21.205,15.965z"/>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 1.2 KiB |
|
@ -0,0 +1 @@
|
||||||
|
<svg height="1em" style="flex:none;line-height:1" viewBox="0 0 24 24" width="1em" xmlns="http://www.w3.org/2000/svg"><title>SambaNova</title><path d="M23 23h-1.223V8.028c0-3.118-2.568-5.806-5.744-5.806H8.027c-3.176 0-5.744 2.565-5.744 5.686 0 3.119 2.568 5.684 5.744 5.684h.794c1.346 0 2.445 1.1 2.445 2.444 0 1.346-1.1 2.446-2.445 2.446H1v-1.223h7.761c.671 0 1.223-.551 1.223-1.16 0-.67-.552-1.16-1.223-1.16h-.794C4.177 14.872 1 11.756 1 7.909 1 4.058 4.176 1 8.027 1h8.066C19.88 1 23 4.239 23 8.028V23z" fill="#EE7624"></path><path d="M8.884 12.672c1.71.06 3.361 1.588 3.361 3.422 0 1.833-1.528 3.421-3.421 3.421H1v1.223h7.761c2.568 0 4.705-2.077 4.705-4.644 0-.672-.123-1.283-.43-1.894-.245-.551-.67-1.1-1.099-1.528-.489-.429-1.039-.734-1.65-.977-.525-.175-1.048-.193-1.594-.212-.218-.008-.441-.016-.669-.034-.428 0-1.406-.245-1.956-.61a3.369 3.369 0 01-1.223-1.406c-.183-.489-.305-.977-.305-1.528A3.417 3.417 0 017.96 4.482h8.066c1.895 0 3.422 1.65 3.422 3.483v15.032h1.223V8.027c0-2.568-2.077-4.768-4.645-4.768h-8c-2.568 0-4.705 2.077-4.705 4.646 0 .67.123 1.282.43 1.894a4.45 4.45 0 001.099 1.528c.429.428 1.039.734 1.588.976.306.123.611.183.976.246.857.06 1.406.123 1.466.123h.003z" fill="#EE7624"></path><path d="M1 23h7.761v-.003c3.85 0 7.03-3.116 7.09-7.026 0-3.79-3.117-6.906-6.967-6.906H8.09c-.672 0-1.222-.552-1.222-1.16 0-.608.487-1.16 1.159-1.16h8.069c.608 0 1.159.611 1.159 1.283v14.97h1.223V8.024c0-1.345-1.1-2.505-2.445-2.505H7.967a2.451 2.451 0 00-2.445 2.445 2.45 2.45 0 002.445 2.445h.794c3.176 0 5.744 2.568 5.744 5.684s-2.568 5.684-5.744 5.684H1V23z" fill="#EE7624"></path></svg>
|
After Width: | Height: | Size: 1.6 KiB |
14
litellm/proxy/_experimental/out/assets/logos/togetherai.svg
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
<svg width="32" height="32" viewBox="0 0 32 32" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||||
|
<g clip-path="url(#clip0_542_18748)">
|
||||||
|
<rect width="32" height="32" rx="5.64706" fill="#F1EFED"/>
|
||||||
|
<circle cx="22.8233" cy="9.64706" r="5.64706" fill="#D3D1D1"/>
|
||||||
|
<circle cx="22.8233" cy="22.8238" r="5.64706" fill="#D3D1D1"/>
|
||||||
|
<circle cx="9.64706" cy="22.8238" r="5.64706" fill="#D3D1D1"/>
|
||||||
|
<circle cx="9.64706" cy="9.64706" r="5.64706" fill="#0F6FFF"/>
|
||||||
|
</g>
|
||||||
|
<defs>
|
||||||
|
<clipPath id="clip0_542_18748">
|
||||||
|
<rect width="32" height="32" fill="white"/>
|
||||||
|
</clipPath>
|
||||||
|
</defs>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 560 B |
28
litellm/proxy/_experimental/out/assets/logos/xai.svg
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1000 1000">
|
||||||
|
<defs>
|
||||||
|
<style>
|
||||||
|
.cls-1 {
|
||||||
|
fill: #000;
|
||||||
|
}
|
||||||
|
polygon {
|
||||||
|
fill: #fff;
|
||||||
|
}
|
||||||
|
@media ( prefers-color-scheme: dark ) {
|
||||||
|
.cls-1 {
|
||||||
|
fill: #fff;
|
||||||
|
}
|
||||||
|
polygon {
|
||||||
|
fill: #000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</defs>
|
||||||
|
<rect class="cls-1" width="1000" height="1000"/>
|
||||||
|
<g>
|
||||||
|
<polygon points="226.83 411.15 501.31 803.15 623.31 803.15 348.82 411.15 226.83 411.15" />
|
||||||
|
<polygon points="348.72 628.87 226.69 803.15 348.77 803.15 409.76 716.05 348.72 628.87" />
|
||||||
|
<polygon points="651.23 196.85 440.28 498.12 501.32 585.29 773.31 196.85 651.23 196.85" />
|
||||||
|
<polygon points="673.31 383.25 673.31 803.15 773.31 803.15 773.31 240.44 673.31 383.25" />
|
||||||
|
</g>
|
||||||
|
</svg>
|
After Width: | Height: | Size: 937 B |
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-475d6efe4080647d.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[14164,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-0222ddf4d701e0b4.js\",\"250\",\"static/chunks/250-a75ee9d79f1140b0.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-75d771fb848b47a8.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"9yIyUkG6nV2cO0gn7kJ-Q\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-75a5453f51d60261.js"/><script src="/ui/_next/static/chunks/fd9d1056-524b80e1a6b8bb06.js" async=""></script><script src="/ui/_next/static/chunks/117-883150efc583d711.js" async=""></script><script src="/ui/_next/static/chunks/main-app-4f7318ae681a6d94.js" async=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-42372ed130431b0a.js" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-75a5453f51d60261.js" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/a34f9d1faa5f3315-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"style\"]\n3:HL[\"/ui/_next/static/css/169f9187db1ec37e.css\",\"style\"]\n"])</script><script>self.__next_f.push([1,"4:I[12846,[],\"\"]\n6:I[19107,[],\"ClientPageRoot\"]\n7:I[20314,[\"665\",\"static/chunks/3014691f-0b72c78cfebbd712.js\",\"990\",\"static/chunks/13b76428-ebdf3012af0e4489.js\",\"42\",\"static/chunks/42-1cbed529ecb084e0.js\",\"261\",\"static/chunks/261-57d48f76eec1e568.js\",\"899\",\"static/chunks/899-9af4feaf6f21839c.js\",\"394\",\"static/chunks/394-48a36e9c9b2cb488.js\",\"250\",\"static/chunks/250-601568e45a5ffece.js\",\"699\",\"static/chunks/699-2a1c30f260f44c15.js\",\"931\",\"static/chunks/app/page-e21d4be3d6c3c16e.js\"],\"default\",1]\n8:I[4707,[],\"\"]\n9:I[36423,[],\"\"]\nb:I[61060,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"0:[\"$\",\"$L4\",null,{\"buildId\":\"soi--ciJeUE6G2Fk4NMBG\",\"assetPrefix\":\"/ui\",\"urlParts\":[\"\",\"\"],\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[[\"$L5\",[\"$\",\"$L6\",null,{\"props\":{\"params\":{},\"searchParams\":{}},\"Component\":\"$7\"}],null],null],null]},[[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/86f6cc749f6b8493.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}],[\"$\",\"link\",\"1\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/169f9187db1ec37e.css\",\"precedence\":\"next\",\"crossOrigin\":\"$undefined\"}]],[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_cf7686\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[]}]}]}]],null],null],\"couldBeIntercepted\":false,\"initialHead\":[null,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[19107,[],"ClientPageRoot"]
|
2:I[19107,[],"ClientPageRoot"]
|
||||||
3:I[14164,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","394","static/chunks/394-0222ddf4d701e0b4.js","250","static/chunks/250-a75ee9d79f1140b0.js","699","static/chunks/699-2a1c30f260f44c15.js","931","static/chunks/app/page-75d771fb848b47a8.js"],"default",1]
|
3:I[20314,["665","static/chunks/3014691f-0b72c78cfebbd712.js","990","static/chunks/13b76428-ebdf3012af0e4489.js","42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","899","static/chunks/899-9af4feaf6f21839c.js","394","static/chunks/394-48a36e9c9b2cb488.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","931","static/chunks/app/page-e21d4be3d6c3c16e.js"],"default",1]
|
||||||
4:I[4707,[],""]
|
4:I[4707,[],""]
|
||||||
5:I[36423,[],""]
|
5:I[36423,[],""]
|
||||||
0:["9yIyUkG6nV2cO0gn7kJ-Q",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[19107,[],"ClientPageRoot"]
|
2:I[19107,[],"ClientPageRoot"]
|
||||||
3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-a75ee9d79f1140b0.js","699","static/chunks/699-2a1c30f260f44c15.js","418","static/chunks/app/model_hub/page-068a441595bd0fc3.js"],"default",1]
|
3:I[52829,["42","static/chunks/42-1cbed529ecb084e0.js","261","static/chunks/261-57d48f76eec1e568.js","250","static/chunks/250-601568e45a5ffece.js","699","static/chunks/699-2a1c30f260f44c15.js","418","static/chunks/app/model_hub/page-cde2fb783e81a6c1.js"],"default",1]
|
||||||
4:I[4707,[],""]
|
4:I[4707,[],""]
|
||||||
5:I[36423,[],""]
|
5:I[36423,[],""]
|
||||||
0:["9yIyUkG6nV2cO0gn7kJ-Q",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
0:["soi--ciJeUE6G2Fk4NMBG",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},[["$L1",["$","$L2",null,{"props":{"params":{},"searchParams":{}},"Component":"$3"}],null],null],null]},[null,["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined"}]],null]},[[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/86f6cc749f6b8493.css","precedence":"next","crossOrigin":"$undefined"}],["$","link","1",{"rel":"stylesheet","href":"/ui/_next/static/css/169f9187db1ec37e.css","precedence":"next","crossOrigin":"$undefined"}]],["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_cf7686","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[]}]}]}]],null],null],["$L6",null]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|