Merge branch 'main' into litellm_auto_create_user_fix

This commit is contained in:
Krish Dholakia 2024-03-28 14:57:39 -07:00 committed by GitHub
commit 7d3244c012
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
60 changed files with 1237 additions and 215 deletions

View file

@ -28,8 +28,9 @@ jobs:
pip install "pytest==7.3.1" pip install "pytest==7.3.1"
pip install "pytest-asyncio==0.21.1" pip install "pytest-asyncio==0.21.1"
pip install mypy pip install mypy
pip install "google-generativeai>=0.3.2" pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform>=1.38.0" pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3>=1.28.57" pip install "boto3>=1.28.57"
pip install "aioboto3>=12.3.0" pip install "aioboto3>=12.3.0"
pip install langchain pip install langchain
@ -48,6 +49,7 @@ jobs:
pip install argon2-cffi pip install argon2-cffi
pip install "pytest-mock==3.12.0" pip install "pytest-mock==3.12.0"
pip install python-multipart pip install python-multipart
pip install google-cloud-aiplatform
- save_cache: - save_cache:
paths: paths:
- ./venv - ./venv
@ -152,8 +154,9 @@ jobs:
pip install "pytest-mock==3.12.0" pip install "pytest-mock==3.12.0"
pip install "pytest-asyncio==0.21.1" pip install "pytest-asyncio==0.21.1"
pip install mypy pip install mypy
pip install "google-generativeai>=0.3.2" pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform>=1.38.0" pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3>=1.28.57" pip install "boto3>=1.28.57"
pip install "aioboto3>=12.3.0" pip install "aioboto3>=12.3.0"
pip install langchain pip install langchain

View file

@ -10,5 +10,5 @@ anthropic
boto3 boto3
orjson orjson
pydantic pydantic
google-cloud-aiplatform google-cloud-aiplatform==1.43.0
redisvl==0.0.7 # semantic caching redisvl==0.0.7 # semantic caching

View file

@ -43,6 +43,14 @@ jobs:
push: true push: true
file: Dockerfile.database file: Dockerfile.database
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }} tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
-
name: Build and push litellm-database image
uses: docker/build-push-action@v5
with:
push: true
context: ./litellm-js/spend-logs
file: Dockerfile
tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
build-and-push-image: build-and-push-image:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@ -120,6 +128,44 @@ jobs:
tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-latest tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-latest
labels: ${{ steps.meta-database.outputs.labels }} labels: ${{ steps.meta-database.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8 platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-image-spend-logs:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for database Dockerfile
id: meta-spend-logs
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
- name: Build and push Database Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: ./litellm-js/spend-logs
file: Dockerfile
push: true
tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-latest
labels: ${{ steps.meta-spend-logs.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-helm-chart: build-and-push-helm-chart:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:

View file

@ -2,6 +2,7 @@
## Pre-requisites ## Pre-requisites
* `pip install -q google-generativeai` * `pip install -q google-generativeai`
* Get API Key - https://aistudio.google.com/
# Gemini-Pro # Gemini-Pro
## Sample Usage ## Sample Usage

View file

@ -1,7 +1,10 @@
import Image from '@theme/IdealImage';
# Modify / Reject Incoming Requests # Modify / Reject Incoming Requests
- Modify data before making llm api calls on proxy - Modify data before making llm api calls on proxy
- Reject data before making llm api calls / before returning the response - Reject data before making llm api calls / before returning the response
- Enforce 'user' param for all openai endpoint calls
See a complete example with our [parallel request rate limiter](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py) See a complete example with our [parallel request rate limiter](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py)
@ -173,3 +176,18 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
], ],
}' }'
``` ```
## Advanced - Enforce 'user' param
Set `enforce_user_param` to true, to require all calls to the openai endpoints to have the 'user' param.
[**See Code**](https://github.com/BerriAI/litellm/blob/4777921a31c4c70e4d87b927cb233b6a09cd8b51/litellm/proxy/auth/auth_checks.py#L72)
```yaml
general_settings:
enforce_user_param: True
```
**Result**
<Image img={require('../../img/end_user_enforcement.png')}/>

View file

@ -602,6 +602,7 @@ general_settings:
"disable_spend_logs": "boolean", # turn off writing each transaction to the db "disable_spend_logs": "boolean", # turn off writing each transaction to the db
"disable_reset_budget": "boolean", # turn off reset budget scheduled task "disable_reset_budget": "boolean", # turn off reset budget scheduled task
"enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims "enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
"enforce_user_param": "boolean", # requires all openai endpoint requests to have a 'user' param
"allowed_routes": "list", # list of allowed proxy API routes - a user can access. (currently JWT-Auth only) "allowed_routes": "list", # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
"key_management_system": "google_kms", # either google_kms or azure_kms "key_management_system": "google_kms", # either google_kms or azure_kms
"master_key": "string", "master_key": "string",

View file

@ -40,7 +40,13 @@ Use this Docker `CMD`. This will start the proxy with 1 Uvicorn Async Worker
CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"] CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]
``` ```
## 3. Switch off spend logging and resetting budgets ## 3. Move spend logs to separate server
Writing each spend log to the db can slow down your proxy. In testing we saw a 70% improvement in median response time, by moving writing spend logs to a separate server.
## 4. Switch off resetting budgets
Add this to your config.yaml. (Only spend per Key, User and Team will be tracked - spend per API Call will not be written to the LiteLLM Database) Add this to your config.yaml. (Only spend per Key, User and Team will be tracked - spend per API Call will not be written to the LiteLLM Database)
```yaml ```yaml
@ -49,7 +55,7 @@ general_settings:
disable_reset_budget: true disable_reset_budget: true
``` ```
## 4. Switch of `litellm.telemetry` ## 5. Switch of `litellm.telemetry`
Switch of all telemetry tracking done by litellm Switch of all telemetry tracking done by litellm

View file

@ -47,8 +47,9 @@ Your Proxy Swagger is available on the root of the Proxy: e.g.: `http://localhos
Set the following in your .env on the Proxy Set the following in your .env on the Proxy
```shell ```shell
UI_USERNAME=ishaan-litellm LITELLM_MASTER_KEY="sk-1234" # this is your master key for using the proxy server
UI_PASSWORD=langchain UI_USERNAME=ishaan-litellm # username to sign in on UI
UI_PASSWORD=langchain # password to sign in on UI
``` ```
On accessing the LiteLLM UI, you will be prompted to enter your username, password On accessing the LiteLLM UI, you will be prompted to enter your username, password

Binary file not shown.

After

Width:  |  Height:  |  Size: 180 KiB

View file

@ -0,0 +1,26 @@
# Use the specific Node.js v20.11.0 image
FROM node:20.11.0
# Set the working directory inside the container
WORKDIR /usr/src/app
# Copy package.json and package-lock.json to the working directory
COPY package*.json ./
# Install dependencies
RUN npm install
# Install Prisma globally
RUN npm install -g prisma
# Copy the rest of the application code
COPY . .
# Generate Prisma client
RUN npx prisma generate
# Expose the port that the Node.js server will run on
EXPOSE 3000
# Command to run the Node.js app with npm run dev
CMD ["npm", "run", "dev"]

View file

@ -0,0 +1,8 @@
```
npm install
npm run dev
```
```
open http://localhost:3000
```

508
litellm-js/spend-logs/package-lock.json generated Normal file
View file

@ -0,0 +1,508 @@
{
"name": "spend-logs",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"@hono/node-server": "^1.9.0",
"hono": "^4.1.5"
},
"devDependencies": {
"@types/node": "^20.11.17",
"tsx": "^4.7.1"
}
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz",
"integrity": "sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==",
"cpu": [
"ppc64"
],
"dev": true,
"optional": true,
"os": [
"aix"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/android-arm": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.12.tgz",
"integrity": "sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==",
"cpu": [
"arm"
],
"dev": true,
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/android-arm64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz",
"integrity": "sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/android-x64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.12.tgz",
"integrity": "sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/darwin-arm64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz",
"integrity": "sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/darwin-x64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz",
"integrity": "sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/freebsd-arm64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz",
"integrity": "sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/freebsd-x64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz",
"integrity": "sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-arm": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz",
"integrity": "sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==",
"cpu": [
"arm"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-arm64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz",
"integrity": "sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-ia32": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz",
"integrity": "sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==",
"cpu": [
"ia32"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-loong64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz",
"integrity": "sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==",
"cpu": [
"loong64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-mips64el": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz",
"integrity": "sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==",
"cpu": [
"mips64el"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-ppc64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz",
"integrity": "sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==",
"cpu": [
"ppc64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-riscv64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz",
"integrity": "sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==",
"cpu": [
"riscv64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-s390x": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz",
"integrity": "sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==",
"cpu": [
"s390x"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-x64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz",
"integrity": "sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/netbsd-x64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz",
"integrity": "sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/openbsd-x64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz",
"integrity": "sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/sunos-x64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz",
"integrity": "sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"sunos"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/win32-arm64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz",
"integrity": "sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/win32-ia32": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz",
"integrity": "sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==",
"cpu": [
"ia32"
],
"dev": true,
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/win32-x64": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz",
"integrity": "sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@hono/node-server": {
"version": "1.9.0",
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.9.0.tgz",
"integrity": "sha512-oJjk7WXBlENeHhWiMqSyxPIZ3Kmf5ZYxqdlcSIXyN8Rn50bNJsPl99G4POBS03Jxh56FdfRJ0SEnC8mAVIiavQ==",
"engines": {
"node": ">=18.14.1"
}
},
"node_modules/@types/node": {
"version": "20.11.30",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.11.30.tgz",
"integrity": "sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw==",
"dev": true,
"dependencies": {
"undici-types": "~5.26.4"
}
},
"node_modules/esbuild": {
"version": "0.19.12",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.12.tgz",
"integrity": "sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==",
"dev": true,
"hasInstallScript": true,
"bin": {
"esbuild": "bin/esbuild"
},
"engines": {
"node": ">=12"
},
"optionalDependencies": {
"@esbuild/aix-ppc64": "0.19.12",
"@esbuild/android-arm": "0.19.12",
"@esbuild/android-arm64": "0.19.12",
"@esbuild/android-x64": "0.19.12",
"@esbuild/darwin-arm64": "0.19.12",
"@esbuild/darwin-x64": "0.19.12",
"@esbuild/freebsd-arm64": "0.19.12",
"@esbuild/freebsd-x64": "0.19.12",
"@esbuild/linux-arm": "0.19.12",
"@esbuild/linux-arm64": "0.19.12",
"@esbuild/linux-ia32": "0.19.12",
"@esbuild/linux-loong64": "0.19.12",
"@esbuild/linux-mips64el": "0.19.12",
"@esbuild/linux-ppc64": "0.19.12",
"@esbuild/linux-riscv64": "0.19.12",
"@esbuild/linux-s390x": "0.19.12",
"@esbuild/linux-x64": "0.19.12",
"@esbuild/netbsd-x64": "0.19.12",
"@esbuild/openbsd-x64": "0.19.12",
"@esbuild/sunos-x64": "0.19.12",
"@esbuild/win32-arm64": "0.19.12",
"@esbuild/win32-ia32": "0.19.12",
"@esbuild/win32-x64": "0.19.12"
}
},
"node_modules/fsevents": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
"dev": true,
"hasInstallScript": true,
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/get-tsconfig": {
"version": "4.7.3",
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.3.tgz",
"integrity": "sha512-ZvkrzoUA0PQZM6fy6+/Hce561s+faD1rsNwhnO5FelNjyy7EMGJ3Rz1AQ8GYDWjhRs/7dBLOEJvhK8MiEJOAFg==",
"dev": true,
"dependencies": {
"resolve-pkg-maps": "^1.0.0"
},
"funding": {
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
}
},
"node_modules/hono": {
"version": "4.1.5",
"resolved": "https://registry.npmjs.org/hono/-/hono-4.1.5.tgz",
"integrity": "sha512-3ChJiIoeCxvkt6vnkxJagplrt1YZg3NyNob7ssVeK2PUqEINp4q1F94HzFnvY9QE8asVmbW5kkTDlyWylfg2vg==",
"engines": {
"node": ">=16.0.0"
}
},
"node_modules/resolve-pkg-maps": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
"dev": true,
"funding": {
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
}
},
"node_modules/tsx": {
"version": "4.7.1",
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.7.1.tgz",
"integrity": "sha512-8d6VuibXHtlN5E3zFkgY8u4DX7Y3Z27zvvPKVmLon/D4AjuKzarkUBTLDBgj9iTQ0hg5xM7c/mYiRVM+HETf0g==",
"dev": true,
"dependencies": {
"esbuild": "~0.19.10",
"get-tsconfig": "^4.7.2"
},
"bin": {
"tsx": "dist/cli.mjs"
},
"engines": {
"node": ">=18.0.0"
},
"optionalDependencies": {
"fsevents": "~2.3.3"
}
},
"node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
"dev": true
}
}
}

View file

@ -0,0 +1,13 @@
{
"scripts": {
"dev": "tsx watch src/index.ts"
},
"dependencies": {
"@hono/node-server": "^1.9.0",
"hono": "^4.1.5"
},
"devDependencies": {
"@types/node": "^20.11.17",
"tsx": "^4.7.1"
}
}

View file

@ -0,0 +1,29 @@
generator client {
provider = "prisma-client-js"
}
datasource client {
provider = "postgresql"
url = env("DATABASE_URL")
}
model LiteLLM_SpendLogs {
request_id String @id
call_type String
api_key String @default("")
spend Float @default(0.0)
total_tokens Int @default(0)
prompt_tokens Int @default(0)
completion_tokens Int @default(0)
startTime DateTime
endTime DateTime
model String @default("")
api_base String @default("")
user String @default("")
metadata Json @default("{}")
cache_hit String @default("")
cache_key String @default("")
request_tags Json @default("[]")
team_id String?
end_user String?
}

View file

@ -0,0 +1,32 @@
export type LiteLLM_IncrementSpend = {
key_transactions: Array<LiteLLM_IncrementObject>, // [{"key": spend},..]
user_transactions: Array<LiteLLM_IncrementObject>,
team_transactions: Array<LiteLLM_IncrementObject>,
spend_logs_transactions: Array<LiteLLM_SpendLogs>
}
export type LiteLLM_IncrementObject = {
key: string,
spend: number
}
export type LiteLLM_SpendLogs = {
request_id: string; // @id means it's a unique identifier
call_type: string;
api_key: string; // @default("") means it defaults to an empty string if not provided
spend: number; // Float in Prisma corresponds to number in TypeScript
total_tokens: number; // Int in Prisma corresponds to number in TypeScript
prompt_tokens: number;
completion_tokens: number;
startTime: Date; // DateTime in Prisma corresponds to Date in TypeScript
endTime: Date;
model: string; // @default("") means it defaults to an empty string if not provided
api_base: string;
user: string;
metadata: any; // Json type in Prisma is represented by any in TypeScript; could also use a more specific type if the structure of JSON is known
cache_hit: string;
cache_key: string;
request_tags: any; // Similarly, this could be an array or a more specific type depending on the expected structure
team_id?: string | null; // ? indicates it's optional and can be undefined, but could also be null if not provided
end_user?: string | null;
};

View file

@ -0,0 +1,84 @@
import { serve } from '@hono/node-server'
import { Hono } from 'hono'
import { PrismaClient } from '@prisma/client'
import {LiteLLM_SpendLogs, LiteLLM_IncrementSpend, LiteLLM_IncrementObject} from './_types'
const app = new Hono()
const prisma = new PrismaClient()
// In-memory storage for logs
let spend_logs: LiteLLM_SpendLogs[] = [];
const key_logs: LiteLLM_IncrementObject[] = [];
const user_logs: LiteLLM_IncrementObject[] = [];
const transaction_logs: LiteLLM_IncrementObject[] = [];
app.get('/', (c) => {
return c.text('Hello Hono!')
})
const MIN_LOGS = 1; // Minimum number of logs needed to initiate a flush
const FLUSH_INTERVAL = 5000; // Time in ms to wait before trying to flush again
const BATCH_SIZE = 100; // Preferred size of each batch to write to the database
const MAX_LOGS_PER_INTERVAL = 1000; // Maximum number of logs to flush in a single interval
const flushLogsToDb = async () => {
if (spend_logs.length >= MIN_LOGS) {
// Limit the logs to process in this interval to MAX_LOGS_PER_INTERVAL or less
const logsToProcess = spend_logs.slice(0, MAX_LOGS_PER_INTERVAL);
for (let i = 0; i < logsToProcess.length; i += BATCH_SIZE) {
// Create subarray for current batch, ensuring it doesn't exceed the BATCH_SIZE
const batch = logsToProcess.slice(i, i + BATCH_SIZE);
// Convert datetime strings to Date objects
const batchWithDates = batch.map(entry => ({
...entry,
startTime: new Date(entry.startTime),
endTime: new Date(entry.endTime),
// Repeat for any other DateTime fields you may have
}));
await prisma.liteLLM_SpendLogs.createMany({
data: batchWithDates,
});
console.log(`Flushed ${batch.length} logs to the DB.`);
}
// Remove the processed logs from spend_logs
spend_logs = spend_logs.slice(logsToProcess.length);
console.log(`${logsToProcess.length} logs processed. Remaining in queue: ${spend_logs.length}`);
} else {
// This will ensure it doesn't falsely claim "No logs to flush." when it's merely below the MIN_LOGS threshold.
if(spend_logs.length > 0) {
console.log(`Accumulating logs. Currently at ${spend_logs.length}, waiting for at least ${MIN_LOGS}.`);
} else {
console.log("No logs to flush.");
}
}
};
// Setup interval for attempting to flush the logs
setInterval(flushLogsToDb, FLUSH_INTERVAL);
// Route to receive log messages
app.post('/spend/update', async (c) => {
const incomingLogs = await c.req.json<LiteLLM_SpendLogs[]>();
spend_logs.push(...incomingLogs);
console.log(`Received and stored ${incomingLogs.length} logs. Total logs in memory: ${spend_logs.length}`);
return c.json({ message: `Successfully stored ${incomingLogs.length} logs` });
});
const port = 3000
console.log(`Server is running on port ${port}`)
serve({
fetch: app.fetch,
port
})

View file

@ -0,0 +1,13 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "Bundler",
"strict": true,
"types": [
"node"
],
"jsx": "react-jsx",
"jsxImportSource": "hono/jsx",
}
}

View file

@ -0,0 +1,38 @@
from typing import Optional
import httpx
class HTTPHandler:
def __init__(self, concurrent_limit=1000):
# Create a client with a connection pool
self.client = httpx.AsyncClient(
limits=httpx.Limits(
max_connections=concurrent_limit,
max_keepalive_connections=concurrent_limit,
)
)
async def close(self):
# Close the client when you're done with it
await self.client.aclose()
async def get(
self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None
):
response = await self.client.get(url, params=params, headers=headers)
return response
async def post(
self,
url: str,
data: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
):
try:
response = await self.client.post(
url, data=data, params=params, headers=headers
)
return response
except Exception as e:
raise e

View file

@ -13,6 +13,7 @@ from functools import partial
import dotenv, traceback, random, asyncio, time, contextvars import dotenv, traceback, random, asyncio, time, contextvars
from copy import deepcopy from copy import deepcopy
import httpx import httpx
import litellm import litellm
from ._logging import verbose_logger from ._logging import verbose_logger
from litellm import ( # type: ignore from litellm import ( # type: ignore

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]); (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]); (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[19914,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-144687b251040a22.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"aIO8mtlEIEUTmgL8cGjve\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[92182,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-8672e345e79b8043.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"YrQPd5ySk25sMOIe_YoyJ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[19914,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-144687b251040a22.js"],""] 3:I[92182,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-8672e345e79b8043.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["aIO8mtlEIEUTmgL8cGjve",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/68a21c6e6697f7ca.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["YrQPd5ySk25sMOIe_YoyJ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/68a21c6e6697f7ca.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -14,9 +14,9 @@ model_list:
# cache_params: # cache_params:
# type: redis # type: redis
# callbacks: ["batch_redis_requests"] # callbacks: ["batch_redis_requests"]
# success_callbacks: ["langfuse"] # # success_callbacks: ["langfuse"]
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
# disable_spend_logs: true disable_spend_logs: true
database_url: "postgresql://neondb_owner:hz8tyUlJ5ivV@ep-cool-sunset-a5ywubeh.us-east-2.aws.neon.tech/neondb?sslmode=require" database_url: "postgresql://neondb_owner:hz8tyUlJ5ivV@ep-cool-sunset-a5ywubeh.us-east-2.aws.neon.tech/neondb?sslmode=require"

View file

@ -698,6 +698,8 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
team_tpm_limit: Optional[int] = None team_tpm_limit: Optional[int] = None
team_rpm_limit: Optional[int] = None team_rpm_limit: Optional[int] = None
team_max_budget: Optional[float] = None team_max_budget: Optional[float] = None
team_models: List = []
team_blocked: bool = False
soft_budget: Optional[float] = None soft_budget: Optional[float] = None
team_model_aliases: Optional[Dict] = None team_model_aliases: Optional[Dict] = None

View file

@ -15,7 +15,7 @@ from litellm.proxy._types import (
LiteLLM_TeamTable, LiteLLM_TeamTable,
LiteLLMRoutes, LiteLLMRoutes,
) )
from typing import Optional, Literal from typing import Optional, Literal, Union
from litellm.proxy.utils import PrismaClient from litellm.proxy.utils import PrismaClient
from litellm.caching import DualCache from litellm.caching import DualCache
@ -26,6 +26,8 @@ def common_checks(
request_body: dict, request_body: dict,
team_object: LiteLLM_TeamTable, team_object: LiteLLM_TeamTable,
end_user_object: Optional[LiteLLM_EndUserTable], end_user_object: Optional[LiteLLM_EndUserTable],
general_settings: dict,
route: str,
) -> bool: ) -> bool:
""" """
Common checks across jwt + key-based auth. Common checks across jwt + key-based auth.
@ -34,6 +36,7 @@ def common_checks(
2. If team can call model 2. If team can call model
3. If team is in budget 3. If team is in budget
4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget 4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
""" """
_model = request_body.get("model", None) _model = request_body.get("model", None)
if team_object.blocked == True: if team_object.blocked == True:
@ -65,6 +68,16 @@ def common_checks(
raise Exception( raise Exception(
f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}" f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
) )
# 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
if (
general_settings.get("enforce_user_param", None) is not None
and general_settings["enforce_user_param"] == True
):
if route in LiteLLMRoutes.openai_routes.value and "user" not in request_body:
raise Exception(
f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
)
return True return True

View file

@ -6,7 +6,6 @@ Currently only supports admin.
JWT token must have 'litellm_proxy_admin' in scope. JWT token must have 'litellm_proxy_admin' in scope.
""" """
import httpx
import jwt import jwt
import json import json
import os import os
@ -14,42 +13,10 @@ from litellm.caching import DualCache
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable
from litellm.proxy.utils import PrismaClient from litellm.proxy.utils import PrismaClient
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
from typing import Optional from typing import Optional
class HTTPHandler:
def __init__(self, concurrent_limit=1000):
# Create a client with a connection pool
self.client = httpx.AsyncClient(
limits=httpx.Limits(
max_connections=concurrent_limit,
max_keepalive_connections=concurrent_limit,
)
)
async def close(self):
# Close the client when you're done with it
await self.client.aclose()
async def get(
self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None
):
response = await self.client.get(url, params=params, headers=headers)
return response
async def post(
self,
url: str,
data: Optional[dict] = None,
params: Optional[dict] = None,
headers: Optional[dict] = None,
):
response = await self.client.post(
url, data=data, params=params, headers=headers
)
return response
class JWTHandler: class JWTHandler:
""" """
- treat the sub id passed in as the user id - treat the sub id passed in as the user id

View file

@ -21,8 +21,6 @@ telemetry = None
def append_query_params(url, params): def append_query_params(url, params):
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm._logging import verbose_proxy_logger
verbose_proxy_logger.debug(f"url: {url}") verbose_proxy_logger.debug(f"url: {url}")
verbose_proxy_logger.debug(f"params: {params}") verbose_proxy_logger.debug(f"params: {params}")
parsed_url = urlparse.urlparse(url) parsed_url = urlparse.urlparse(url)

View file

@ -97,7 +97,6 @@ from litellm.proxy.utils import (
_is_projected_spend_over_limit, _is_projected_spend_over_limit,
_get_projected_spend_over_limit, _get_projected_spend_over_limit,
update_spend, update_spend,
monitor_spend_list,
) )
from litellm.proxy.secret_managers.google_kms import load_google_kms from litellm.proxy.secret_managers.google_kms import load_google_kms
from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager
@ -118,6 +117,7 @@ from litellm.proxy.auth.auth_checks import (
allowed_routes_check, allowed_routes_check,
get_actual_routes, get_actual_routes,
) )
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
try: try:
from litellm._version import version from litellm._version import version
@ -130,7 +130,6 @@ from fastapi import (
HTTPException, HTTPException,
status, status,
Depends, Depends,
BackgroundTasks,
Header, Header,
Response, Response,
Form, Form,
@ -305,6 +304,8 @@ proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
async_result = None async_result = None
celery_app_conn = None celery_app_conn = None
celery_fn = None # Redis Queue for handling requests celery_fn = None # Redis Queue for handling requests
### DB WRITER ###
db_writer_client: Optional[HTTPHandler] = None
### logger ### ### logger ###
@ -440,6 +441,8 @@ async def user_api_key_auth(
request_body=request_data, request_body=request_data,
team_object=team_object, team_object=team_object,
end_user_object=end_user_object, end_user_object=end_user_object,
general_settings=general_settings,
route=route,
) )
# save user object in cache # save user object in cache
await user_api_key_cache.async_set_cache( await user_api_key_cache.async_set_cache(
@ -867,6 +870,23 @@ async def user_api_key_auth(
f"ExceededTokenBudget: Current Team Spend: {valid_token.team_spend}; Max Budget for Team: {valid_token.team_max_budget}" f"ExceededTokenBudget: Current Team Spend: {valid_token.team_spend}; Max Budget for Team: {valid_token.team_max_budget}"
) )
# Check 8: Additional Common Checks across jwt + key auth
_team_obj = LiteLLM_TeamTable(
team_id=valid_token.team_id,
max_budget=valid_token.team_max_budget,
spend=valid_token.team_spend,
tpm_limit=valid_token.team_tpm_limit,
rpm_limit=valid_token.team_rpm_limit,
blocked=valid_token.team_blocked,
models=valid_token.team_models,
)
_ = common_checks(
request_body=request_data,
team_object=_team_obj,
end_user_object=None,
general_settings=general_settings,
route=route,
)
# Token passed all checks # Token passed all checks
api_key = valid_token.token api_key = valid_token.token
@ -1233,6 +1253,7 @@ async def update_database(
user_ids.append(litellm_proxy_budget_name) user_ids.append(litellm_proxy_budget_name)
### KEY CHANGE ### ### KEY CHANGE ###
for _id in user_ids: for _id in user_ids:
if _id is not None:
prisma_client.user_list_transactons[_id] = ( prisma_client.user_list_transactons[_id] = (
response_cost response_cost
+ prisma_client.user_list_transactons.get(_id, 0) + prisma_client.user_list_transactons.get(_id, 0)
@ -1364,7 +1385,16 @@ async def update_database(
) )
payload["spend"] = response_cost payload["spend"] = response_cost
if prisma_client is not None: if (
os.getenv("SPEND_LOGS_URL", None) is not None
and prisma_client is not None
):
if isinstance(payload["startTime"], datetime):
payload["startTime"] = payload["startTime"].isoformat()
if isinstance(payload["endTime"], datetime):
payload["endTime"] = payload["endTime"].isoformat()
prisma_client.spend_log_transactons.append(payload)
elif prisma_client is not None:
await prisma_client.insert_data(data=payload, table_name="spend") await prisma_client.insert_data(data=payload, table_name="spend")
except Exception as e: except Exception as e:
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
@ -2615,11 +2645,7 @@ async def async_data_generator(response, user_api_key_dict):
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`" f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
) )
router_model_names = ( router_model_names = llm_router.model_names if llm_router is not None else []
[m["model_name"] for m in llm_model_list]
if llm_model_list is not None
else []
)
if user_debug: if user_debug:
traceback.print_exc() traceback.print_exc()
@ -2678,7 +2704,7 @@ def on_backoff(details):
@router.on_event("startup") @router.on_event("startup")
async def startup_event(): async def startup_event():
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client
import json import json
### LOAD MASTER KEY ### ### LOAD MASTER KEY ###
@ -2711,6 +2737,8 @@ async def startup_event():
## COST TRACKING ## ## COST TRACKING ##
cost_tracking() cost_tracking()
db_writer_client = HTTPHandler()
proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
## JWT AUTH ## ## JWT AUTH ##
@ -2821,7 +2849,7 @@ async def startup_event():
update_spend, update_spend,
"interval", "interval",
seconds=batch_writing_interval, seconds=batch_writing_interval,
args=[prisma_client], args=[prisma_client, db_writer_client],
) )
scheduler.start() scheduler.start()
@ -2881,7 +2909,6 @@ async def completion(
fastapi_response: Response, fastapi_response: Response,
model: Optional[str] = None, model: Optional[str] = None,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
background_tasks: BackgroundTasks = BackgroundTasks(),
): ):
global user_temperature, user_request_timeout, user_max_tokens, user_api_base global user_temperature, user_request_timeout, user_max_tokens, user_api_base
try: try:
@ -2943,11 +2970,7 @@ async def completion(
start_time = time.time() start_time = time.time()
### ROUTE THE REQUESTs ### ### ROUTE THE REQUESTs ###
router_model_names = ( router_model_names = llm_router.model_names if llm_router is not None else []
[m["model_name"] for m in llm_model_list]
if llm_model_list is not None
else []
)
# skip router if user passed their key # skip router if user passed their key
if "api_key" in data: if "api_key" in data:
response = await litellm.atext_completion(**data) response = await litellm.atext_completion(**data)
@ -3047,7 +3070,6 @@ async def chat_completion(
fastapi_response: Response, fastapi_response: Response,
model: Optional[str] = None, model: Optional[str] = None,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
background_tasks: BackgroundTasks = BackgroundTasks(),
): ):
global general_settings, user_debug, proxy_logging_obj, llm_model_list global general_settings, user_debug, proxy_logging_obj, llm_model_list
try: try:
@ -3161,11 +3183,8 @@ async def chat_completion(
start_time = time.time() start_time = time.time()
### ROUTE THE REQUEST ### ### ROUTE THE REQUEST ###
router_model_names = ( # Do not change this - it should be a constant time fetch - ALWAYS
[m["model_name"] for m in llm_model_list] router_model_names = llm_router.model_names if llm_router is not None else []
if llm_model_list is not None
else []
)
# skip router if user passed their key # skip router if user passed their key
if "api_key" in data: if "api_key" in data:
tasks.append(litellm.acompletion(**data)) tasks.append(litellm.acompletion(**data))
@ -3238,11 +3257,7 @@ async def chat_completion(
verbose_proxy_logger.debug( verbose_proxy_logger.debug(
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`" f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
) )
router_model_names = ( router_model_names = llm_router.model_names if llm_router is not None else []
[m["model_name"] for m in llm_model_list]
if llm_model_list is not None
else []
)
if user_debug: if user_debug:
traceback.print_exc() traceback.print_exc()
@ -3284,7 +3299,6 @@ async def embeddings(
request: Request, request: Request,
model: Optional[str] = None, model: Optional[str] = None,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
background_tasks: BackgroundTasks = BackgroundTasks(),
): ):
global proxy_logging_obj global proxy_logging_obj
try: try:
@ -3350,11 +3364,7 @@ async def embeddings(
if data["model"] in litellm.model_alias_map: if data["model"] in litellm.model_alias_map:
data["model"] = litellm.model_alias_map[data["model"]] data["model"] = litellm.model_alias_map[data["model"]]
router_model_names = ( router_model_names = llm_router.model_names if llm_router is not None else []
[m["model_name"] for m in llm_model_list]
if llm_model_list is not None
else []
)
if ( if (
"input" in data "input" in data
and isinstance(data["input"], list) and isinstance(data["input"], list)
@ -3460,7 +3470,6 @@ async def embeddings(
async def image_generation( async def image_generation(
request: Request, request: Request,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
background_tasks: BackgroundTasks = BackgroundTasks(),
): ):
global proxy_logging_obj global proxy_logging_obj
try: try:
@ -3526,11 +3535,7 @@ async def image_generation(
if data["model"] in litellm.model_alias_map: if data["model"] in litellm.model_alias_map:
data["model"] = litellm.model_alias_map[data["model"]] data["model"] = litellm.model_alias_map[data["model"]]
router_model_names = ( router_model_names = llm_router.model_names if llm_router is not None else []
[m["model_name"] for m in llm_model_list]
if llm_model_list is not None
else []
)
### CALL HOOKS ### - modify incoming data / reject request before calling the model ### CALL HOOKS ### - modify incoming data / reject request before calling the model
data = await proxy_logging_obj.pre_call_hook( data = await proxy_logging_obj.pre_call_hook(
@ -3674,11 +3679,7 @@ async def audio_transcriptions(
**data, **data,
} # add the team-specific configs to the completion call } # add the team-specific configs to the completion call
router_model_names = ( router_model_names = llm_router.model_names if llm_router is not None else []
[m["model_name"] for m in llm_model_list]
if llm_model_list is not None
else []
)
assert ( assert (
file.filename is not None file.filename is not None
@ -3843,11 +3844,7 @@ async def moderations(
**data, **data,
} # add the team-specific configs to the completion call } # add the team-specific configs to the completion call
router_model_names = ( router_model_names = llm_router.model_names if llm_router is not None else []
[m["model_name"] for m in llm_model_list]
if llm_model_list is not None
else []
)
### CALL HOOKS ### - modify incoming data / reject request before calling the model ### CALL HOOKS ### - modify incoming data / reject request before calling the model
data = await proxy_logging_obj.pre_call_hook( data = await proxy_logging_obj.pre_call_hook(
@ -4353,7 +4350,7 @@ async def info_key_fn(
@router.get( @router.get(
"/spend/keys", "/spend/keys",
tags=["budget & spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
) )
async def spend_key_fn(): async def spend_key_fn():
@ -4385,7 +4382,7 @@ async def spend_key_fn():
@router.get( @router.get(
"/spend/users", "/spend/users",
tags=["budget & spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
) )
async def spend_user_fn( async def spend_user_fn(
@ -4437,7 +4434,7 @@ async def spend_user_fn(
@router.get( @router.get(
"/spend/tags", "/spend/tags",
tags=["budget & spend Tracking"], tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
responses={ responses={
200: {"model": List[LiteLLM_SpendLogs]}, 200: {"model": List[LiteLLM_SpendLogs]},
@ -4510,6 +4507,77 @@ async def view_spend_tags(
) )
@router.post(
"/spend/calculate",
tags=["Budget & Spend Tracking"],
dependencies=[Depends(user_api_key_auth)],
responses={
200: {
"cost": {
"description": "The calculated cost",
"example": 0.0,
"type": "float",
}
}
},
)
async def calculate_spend(request: Request):
"""
Accepts all the params of completion_cost.
Calculate spend **before** making call:
```
curl --location 'http://localhost:4000/spend/calculate'
--header 'Authorization: Bearer sk-1234'
--header 'Content-Type: application/json'
--data '{
"model": "anthropic.claude-v2",
"messages": [{"role": "user", "content": "Hey, how'''s it going?"}]
}'
```
Calculate spend **after** making call:
```
curl --location 'http://localhost:4000/spend/calculate'
--header 'Authorization: Bearer sk-1234'
--header 'Content-Type: application/json'
--data '{
"completion_response": {
"id": "chatcmpl-123",
"object": "chat.completion",
"created": 1677652288,
"model": "gpt-3.5-turbo-0125",
"system_fingerprint": "fp_44709d6fcb",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": "Hello there, how may I assist you today?"
},
"logprobs": null,
"finish_reason": "stop"
}]
"usage": {
"prompt_tokens": 9,
"completion_tokens": 12,
"total_tokens": 21
}
}
}'
```
"""
from litellm import completion_cost
data = await request.json()
if "completion_response" in data:
data["completion_response"] = litellm.ModelResponse(
**data["completion_response"]
)
return {"cost": completion_cost(**data)}
@router.get( @router.get(
"/spend/logs", "/spend/logs",
tags=["Budget & Spend Tracking"], tags=["Budget & Spend Tracking"],
@ -5240,6 +5308,7 @@ async def user_info(
user_info = {"spend": spend} user_info = {"spend": spend}
## REMOVE HASHED TOKEN INFO before returning ## ## REMOVE HASHED TOKEN INFO before returning ##
returned_keys = []
for key in keys: for key in keys:
try: try:
key = key.model_dump() # noqa key = key.model_dump() # noqa
@ -5248,10 +5317,24 @@ async def user_info(
key = key.dict() key = key.dict()
key.pop("token", None) key.pop("token", None)
if (
"team_id" in key
and key["team_id"] is not None
and key["team_id"] != "litellm-dashboard"
):
team_info = await prisma_client.get_data(
team_id=key["team_id"], table_name="team"
)
team_alias = getattr(team_info, "team_alias", None)
key["team_alias"] = team_alias
else:
key["team_alias"] = "None"
returned_keys.append(key)
response_data = { response_data = {
"user_id": user_id, "user_id": user_id,
"user_info": user_info, "user_info": user_info,
"keys": keys, "keys": returned_keys,
"teams": team_list, "teams": team_list,
} }
return response_data return response_data
@ -5639,7 +5722,7 @@ async def new_team(
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail={ detail={
"error": f"tpm limit higher than user max. User tpm limit={user_api_key_dict.tpm_limit}" "error": f"tpm limit higher than user max. User tpm limit={user_api_key_dict.tpm_limit}. User role={user_api_key_dict.user_role}"
}, },
) )
@ -5651,7 +5734,7 @@ async def new_team(
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail={ detail={
"error": f"rpm limit higher than user max. User rpm limit={user_api_key_dict.rpm_limit}" "error": f"rpm limit higher than user max. User rpm limit={user_api_key_dict.rpm_limit}. User role={user_api_key_dict.user_role}"
}, },
) )
@ -5663,7 +5746,7 @@ async def new_team(
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail={ detail={
"error": f"max budget higher than user max. User max budget={user_api_key_dict.max_budget}" "error": f"max budget higher than user max. User max budget={user_api_key_dict.max_budget}. User role={user_api_key_dict.user_role}"
}, },
) )
@ -5673,7 +5756,7 @@ async def new_team(
raise HTTPException( raise HTTPException(
status_code=400, status_code=400,
detail={ detail={
"error": f"Model not in allowed user models. User allowed models={user_api_key_dict.models}" "error": f"Model not in allowed user models. User allowed models={user_api_key_dict.models}. User id={user_api_key_dict.user_id}"
}, },
) )
@ -6170,7 +6253,7 @@ async def block_team(
raise Exception("No DB Connected.") raise Exception("No DB Connected.")
record = await prisma_client.db.litellm_teamtable.update( record = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id}, data={"blocked": True} where={"team_id": data.team_id}, data={"blocked": True} # type: ignore
) )
return record return record
@ -6192,7 +6275,7 @@ async def unblock_team(
raise Exception("No DB Connected.") raise Exception("No DB Connected.")
record = await prisma_client.db.litellm_teamtable.update( record = await prisma_client.db.litellm_teamtable.update(
where={"team_id": data.team_id}, data={"blocked": False} where={"team_id": data.team_id}, data={"blocked": False} # type: ignore
) )
return record return record
@ -6795,7 +6878,6 @@ async def async_queue_request(
request: Request, request: Request,
model: Optional[str] = None, model: Optional[str] = None,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
background_tasks: BackgroundTasks = BackgroundTasks(),
): ):
global general_settings, user_debug, proxy_logging_obj global general_settings, user_debug, proxy_logging_obj
""" """
@ -7058,6 +7140,13 @@ async def login(request: Request):
except ImportError: except ImportError:
subprocess.run(["pip", "install", "python-multipart"]) subprocess.run(["pip", "install", "python-multipart"])
global master_key global master_key
if master_key is None:
raise ProxyException(
message="Master Key not set for Proxy. Please set Master Key to use Admin UI. Set `LITELLM_MASTER_KEY` in .env or set general_settings:master_key in config.yaml. https://docs.litellm.ai/docs/proxy/virtual_keys. If set, use `--detailed_debug` to debug issue.",
type="auth_error",
param="master_key",
code=status.HTTP_500_INTERNAL_SERVER_ERROR,
)
form = await request.form() form = await request.form()
username = str(form.get("username")) username = str(form.get("username"))
password = str(form.get("password")) password = str(form.get("password"))
@ -7997,6 +8086,8 @@ async def shutdown_event():
await jwt_handler.close() await jwt_handler.close()
if db_writer_client is not None:
await db_writer_client.close()
## RESET CUSTOM VARIABLES ## ## RESET CUSTOM VARIABLES ##
cleanup_router_config_variables() cleanup_router_config_variables()

View file

@ -5,7 +5,6 @@ datasource client {
generator client { generator client {
provider = "prisma-client-py" provider = "prisma-client-py"
previewFeatures = ["jsonProtocol"]
} }
// Budget / Rate Limits for an org // Budget / Rate Limits for an org

View file

@ -13,6 +13,7 @@ from litellm.proxy._types import (
Member, Member,
) )
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
from litellm.proxy.hooks.parallel_request_limiter import ( from litellm.proxy.hooks.parallel_request_limiter import (
_PROXY_MaxParallelRequestsHandler, _PROXY_MaxParallelRequestsHandler,
) )
@ -298,6 +299,7 @@ class ProxyLogging:
return return
else: else:
user_info = str(user_info) user_info = str(user_info)
# percent of max_budget left to spend # percent of max_budget left to spend
if user_max_budget > 0: if user_max_budget > 0:
percent_left = (user_max_budget - user_current_spend) / user_max_budget percent_left = (user_max_budget - user_current_spend) / user_max_budget
@ -317,22 +319,35 @@ class ProxyLogging:
) )
return return
## PREVENTITIVE ALERTING ## - https://github.com/BerriAI/litellm/issues/2727
# - Alert once within 28d period
# - Cache this information
# - Don't re-alert, if alert already sent
_cache: DualCache = self.call_details["user_api_key_cache"]
# check if 5% of max budget is left # check if 5% of max budget is left
if percent_left <= 0.05: if percent_left <= 0.05:
message = "5% budget left for" + user_info message = "5% budget left for" + user_info
result = await _cache.async_get_cache(key=message)
if result is None:
await self.alerting_handler( await self.alerting_handler(
message=message, message=message,
level="Medium", level="Medium",
) )
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
return return
# check if 15% of max budget is left # check if 15% of max budget is left
if percent_left <= 0.15: if percent_left <= 0.15:
message = "15% budget left for" + user_info message = "15% budget left for" + user_info
result = await _cache.async_get_cache(key=message)
if result is None:
await self.alerting_handler( await self.alerting_handler(
message=message, message=message,
level="Low", level="Low",
) )
await _cache.async_set_cache(key=message, value="SENT", ttl=2419200)
return return
return return
@ -449,16 +464,15 @@ class ProxyLogging:
Covers: Covers:
1. /chat/completions 1. /chat/completions
""" """
new_response = copy.deepcopy(response)
for callback in litellm.callbacks: for callback in litellm.callbacks:
try: try:
if isinstance(callback, CustomLogger): if isinstance(callback, CustomLogger):
await callback.async_post_call_success_hook( await callback.async_post_call_success_hook(
user_api_key_dict=user_api_key_dict, response=new_response user_api_key_dict=user_api_key_dict, response=response
) )
except Exception as e: except Exception as e:
raise e raise e
return new_response return response
async def post_call_streaming_hook( async def post_call_streaming_hook(
self, self,
@ -1013,6 +1027,8 @@ class PrismaClient:
t.max_budget AS team_max_budget, t.max_budget AS team_max_budget,
t.tpm_limit AS team_tpm_limit, t.tpm_limit AS team_tpm_limit,
t.rpm_limit AS team_rpm_limit, t.rpm_limit AS team_rpm_limit,
t.models AS team_models,
t.blocked AS team_blocked,
m.aliases as team_model_aliases m.aliases as team_model_aliases
FROM "LiteLLM_VerificationToken" AS v FROM "LiteLLM_VerificationToken" AS v
LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id LEFT JOIN "LiteLLM_TeamTable" AS t ON v.team_id = t.team_id
@ -1023,6 +1039,10 @@ class PrismaClient:
response = await self.db.query_first(query=sql_query) response = await self.db.query_first(query=sql_query)
if response is not None: if response is not None:
if response["team_models"] is None:
response["team_models"] = []
if response["team_blocked"] is None:
response["team_blocked"] = False
response = LiteLLM_VerificationTokenView(**response) response = LiteLLM_VerificationTokenView(**response)
# for prisma we need to cast the expires time to str # for prisma we need to cast the expires time to str
if response.expires is not None and isinstance( if response.expires is not None and isinstance(
@ -1867,7 +1887,7 @@ async def reset_budget(prisma_client: PrismaClient):
async def update_spend( async def update_spend(
prisma_client: PrismaClient, prisma_client: PrismaClient, db_writer_client: Optional[HTTPHandler]
): ):
""" """
Batch write updates to db. Batch write updates to db.
@ -1995,13 +2015,30 @@ async def update_spend(
except Exception as e: except Exception as e:
raise e raise e
### UPDATE SPEND LOGS ###
base_url = os.getenv("SPEND_LOGS_URL", None)
if (
len(prisma_client.spend_log_transactons) > 0
and base_url is not None
and db_writer_client is not None
):
if not base_url.endswith("/"):
base_url += "/"
response = await db_writer_client.post(
url=base_url + "spend/update",
data=json.dumps(prisma_client.spend_log_transactons), # type: ignore
headers={"Content-Type": "application/json"},
)
if response.status_code == 200:
prisma_client.spend_log_transactons = []
async def monitor_spend_list(prisma_client: PrismaClient):
""" # async def monitor_spend_list(prisma_client: PrismaClient):
Check the length of each spend list, if it exceeds a threshold (e.g. 100 items) - write to db # """
""" # Check the length of each spend list, if it exceeds a threshold (e.g. 100 items) - write to db
if len(prisma_client.user_list_transactons) > 10000: # """
await update_spend(prisma_client=prisma_client) # if len(prisma_client.user_list_transactons) > 10000:
# await update_spend(prisma_client=prisma_client)
async def _read_request_body(request): async def _read_request_body(request):

View file

@ -11,6 +11,7 @@ from litellm import token_counter
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
from litellm.utils import print_verbose
class LowestTPMLoggingHandler(CustomLogger): class LowestTPMLoggingHandler(CustomLogger):
@ -153,27 +154,21 @@ class LowestTPMLoggingHandler(CustomLogger):
# Find lowest used model # Find lowest used model
# ---------------------- # ----------------------
lowest_tpm = float("inf") lowest_tpm = float("inf")
deployment = None
if tpm_dict is None: # base case - none of the deployments have been used if tpm_dict is None: # base case - none of the deployments have been used
# Return the 1st deployment where deployment["tpm"] >= input_tokens # initialize a tpm dict with {model_id: 0}
tpm_dict = {}
for deployment in healthy_deployments: for deployment in healthy_deployments:
_deployment_tpm = ( tpm_dict[deployment["model_info"]["id"]] = 0
deployment.get("tpm", None) else:
or deployment.get("litellm_params", {}).get("tpm", None)
or deployment.get("model_info", {}).get("tpm", None)
or float("inf")
)
if _deployment_tpm >= input_tokens:
return deployment
return None
all_deployments = tpm_dict
for d in healthy_deployments: for d in healthy_deployments:
## if healthy deployment not yet used ## if healthy deployment not yet used
if d["model_info"]["id"] not in all_deployments: if d["model_info"]["id"] not in tpm_dict:
all_deployments[d["model_info"]["id"]] = 0 tpm_dict[d["model_info"]["id"]] = 0
all_deployments = tpm_dict
deployment = None
for item, item_tpm in all_deployments.items(): for item, item_tpm in all_deployments.items():
## get the item from model list ## get the item from model list
_deployment = None _deployment = None
@ -184,24 +179,27 @@ class LowestTPMLoggingHandler(CustomLogger):
if _deployment is None: if _deployment is None:
continue # skip to next one continue # skip to next one
_deployment_tpm = ( _deployment_tpm = None
_deployment.get("tpm", None) if _deployment_tpm is None:
or _deployment.get("litellm_params", {}).get("tpm", None) _deployment_tpm = _deployment.get("tpm")
or _deployment.get("model_info", {}).get("tpm", None) if _deployment_tpm is None:
or float("inf") _deployment_tpm = _deployment.get("litellm_params", {}).get("tpm")
) if _deployment_tpm is None:
_deployment_tpm = _deployment.get("model_info", {}).get("tpm")
if _deployment_tpm is None:
_deployment_tpm = float("inf")
_deployment_rpm = ( _deployment_rpm = None
_deployment.get("rpm", None) if _deployment_rpm is None:
or _deployment.get("litellm_params", {}).get("rpm", None) _deployment_rpm = _deployment.get("rpm")
or _deployment.get("model_info", {}).get("rpm", None) if _deployment_rpm is None:
or float("inf") _deployment_rpm = _deployment.get("litellm_params", {}).get("rpm")
) if _deployment_rpm is None:
_deployment_rpm = _deployment.get("model_info", {}).get("rpm")
if _deployment_rpm is None:
_deployment_rpm = float("inf")
if item_tpm == 0: if item_tpm + input_tokens > _deployment_tpm:
deployment = _deployment
break
elif item_tpm + input_tokens > _deployment_tpm:
continue continue
elif (rpm_dict is not None and item in rpm_dict) and ( elif (rpm_dict is not None and item in rpm_dict) and (
rpm_dict[item] + 1 > _deployment_rpm rpm_dict[item] + 1 > _deployment_rpm
@ -210,5 +208,5 @@ class LowestTPMLoggingHandler(CustomLogger):
elif item_tpm < lowest_tpm: elif item_tpm < lowest_tpm:
lowest_tpm = item_tpm lowest_tpm = item_tpm
deployment = _deployment deployment = _deployment
verbose_router_logger.info("returning picked lowest tpm/rpm deployment.") print_verbose("returning picked lowest tpm/rpm deployment.")
return deployment return deployment

View file

@ -1107,6 +1107,7 @@ def test_completion_openai_litellm_key():
# test_ completion_openai_litellm_key() # test_ completion_openai_litellm_key()
@pytest.mark.skip(reason="Unresponsive endpoint.[TODO] Rehost this somewhere else")
def test_completion_ollama_hosted(): def test_completion_ollama_hosted():
try: try:
litellm.request_timeout = 20 # give ollama 20 seconds to response litellm.request_timeout = 20 # give ollama 20 seconds to response

View file

@ -31,6 +31,8 @@ def test_image_generation_openai():
except litellm.ContentPolicyViolationError: except litellm.ContentPolicyViolationError:
pass # OpenAI randomly raises these errors - skip when they occur pass # OpenAI randomly raises these errors - skip when they occur
except Exception as e: except Exception as e:
if "Connection error" in str(e):
pass
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
@ -53,6 +55,8 @@ def test_image_generation_azure():
except Exception as e: except Exception as e:
if "Your task failed as a result of our safety system." in str(e): if "Your task failed as a result of our safety system." in str(e):
pass pass
if "Connection error" in str(e):
pass
else: else:
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
@ -79,6 +83,8 @@ def test_image_generation_azure_dall_e_3():
except Exception as e: except Exception as e:
if "Your task failed as a result of our safety system." in str(e): if "Your task failed as a result of our safety system." in str(e):
pass pass
if "Connection error" in str(e):
pass
else: else:
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
@ -97,6 +103,8 @@ async def test_async_image_generation_openai():
except litellm.ContentPolicyViolationError: except litellm.ContentPolicyViolationError:
pass # openai randomly raises these errors - skip when they occur pass # openai randomly raises these errors - skip when they occur
except Exception as e: except Exception as e:
if "Connection error" in str(e):
pass
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
@ -117,6 +125,8 @@ async def test_async_image_generation_azure():
except Exception as e: except Exception as e:
if "Your task failed as a result of our safety system." in str(e): if "Your task failed as a result of our safety system." in str(e):
pass pass
if "Connection error" in str(e):
pass
else: else:
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")

View file

@ -590,6 +590,8 @@ async def test_aimg_gen_on_router():
pass pass
elif "Operation polling timed out" in str(e): elif "Operation polling timed out" in str(e):
pass pass
elif "Connection error" in str(e):
pass
else: else:
traceback.print_exc() traceback.print_exc()
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@ -671,6 +673,8 @@ def test_aembedding_on_router():
pass pass
elif "Operation polling timed out" in str(e): elif "Operation polling timed out" in str(e):
pass pass
elif "Connection error" in str(e):
pass
else: else:
traceback.print_exc() traceback.print_exc()
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")

View file

@ -264,7 +264,7 @@ def test_router_skip_rate_limited_deployments():
end_time=end_time, end_time=end_time,
) )
## CHECK WHAT'S SELECTED ## - should skip 2, and pick 1 ## CHECK WHAT'S SELECTED ##
# print(router.lowesttpm_logger.get_available_deployments(model_group="azure-model")) # print(router.lowesttpm_logger.get_available_deployments(model_group="azure-model"))
try: try:
router.get_available_deployment( router.get_available_deployment(
@ -273,7 +273,41 @@ def test_router_skip_rate_limited_deployments():
) )
pytest.fail(f"Should have raised No Models Available error") pytest.fail(f"Should have raised No Models Available error")
except Exception as e: except Exception as e:
pass print(f"An exception occurred! {str(e)}")
def test_single_deployment_tpm_zero():
import litellm
import os
from datetime import datetime
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
"tpm": 0,
},
}
]
router = litellm.Router(
model_list=model_list,
routing_strategy="usage-based-routing",
cache_responses=True,
)
model = "gpt-3.5-turbo"
messages = [{"content": "Hello, how are you?", "role": "user"}]
try:
router.get_available_deployment(
model=model,
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)
pytest.fail(f"Should have raised No Models Available error")
except Exception as e:
print(f"it worked - {str(e)}! \n{traceback.format_exc()}")
@pytest.mark.asyncio @pytest.mark.asyncio

View file

@ -92,4 +92,7 @@ async def test_batch_update_spend(prisma_client):
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
await litellm.proxy.proxy_server.prisma_client.connect() await litellm.proxy.proxy_server.prisma_client.connect()
await update_spend(prisma_client=litellm.proxy.proxy_server.prisma_client) await update_spend(
prisma_client=litellm.proxy.proxy_server.prisma_client,
db_writer_client=None,
)

View file

@ -2775,7 +2775,6 @@ def client(original_function):
or isinstance(e, openai.Timeout) or isinstance(e, openai.Timeout)
or isinstance(e, openai.APIConnectionError) or isinstance(e, openai.APIConnectionError)
): ):
print_verbose(f"RETRY TRIGGERED!")
kwargs["num_retries"] = num_retries kwargs["num_retries"] = num_retries
return litellm.completion_with_retries(*args, **kwargs) return litellm.completion_with_retries(*args, **kwargs)
elif ( elif (
@ -3598,6 +3597,8 @@ def token_counter(
raise ValueError("text and messages cannot both be None") raise ValueError("text and messages cannot both be None")
elif isinstance(text, List): elif isinstance(text, List):
text = "".join(t for t in text if isinstance(t, str)) text = "".join(t for t in text if isinstance(t, str))
elif isinstance(text, str):
count_response_tokens = True # user just trying to count tokens for a text. don't add the chat_ml +3 tokens to this
if model is not None: if model is not None:
tokenizer_json = _select_tokenizer(model=model) tokenizer_json = _select_tokenizer(model=model)

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.34.7" version = "1.34.11"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.34.7" version = "1.34.11"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -5,7 +5,7 @@ fastapi>=0.109.1 # server dep
pydantic>=2.5 # openai req. pydantic>=2.5 # openai req.
backoff==2.2.1 # server dep backoff==2.2.1 # server dep
pyyaml>=6.0.1 # server dep pyyaml>=6.0.1 # server dep
uvicorn==0.22.0 # server dep uvicorn==0.29.0 # server dep
gunicorn==21.2.0 # server dep gunicorn==21.2.0 # server dep
boto3==1.34.34 # aws bedrock/sagemaker calls boto3==1.34.34 # aws bedrock/sagemaker calls
redis==5.0.0 # caching redis==5.0.0 # caching

View file

@ -5,7 +5,6 @@ datasource client {
generator client { generator client {
provider = "prisma-client-py" provider = "prisma-client-py"
previewFeatures = ["jsonProtocol"]
} }
// Budget / Rate Limits for an org // Budget / Rate Limits for an org

View file

@ -198,6 +198,10 @@ async def image_generation(session, key):
print() print()
if status != 200: if status != 200:
if (
"Connection error" in response_text
): # OpenAI endpoint returns a connection error
return
raise Exception(f"Request did not return a 200 status code: {status}") raise Exception(f"Request did not return a 200 status code: {status}")

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -0,0 +1 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[19914,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-144687b251040a22.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"aIO8mtlEIEUTmgL8cGjve\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[92182,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-8672e345e79b8043.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"YrQPd5ySk25sMOIe_YoyJ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[19914,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-144687b251040a22.js"],""] 3:I[92182,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-8672e345e79b8043.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["aIO8mtlEIEUTmgL8cGjve",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/68a21c6e6697f7ca.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["YrQPd5ySk25sMOIe_YoyJ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/68a21c6e6697f7ca.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -12,33 +12,32 @@ import {
Select, Select,
message, message,
} from "antd"; } from "antd";
import { keyCreateCall, slackBudgetAlertsHealthCheck } from "./networking"; import { keyCreateCall, slackBudgetAlertsHealthCheck, modelAvailableCall } from "./networking";
const { Option } = Select; const { Option } = Select;
interface CreateKeyProps { interface CreateKeyProps {
userID: string; userID: string;
teamID: string | null; team: any | null;
userRole: string | null; userRole: string | null;
accessToken: string; accessToken: string;
data: any[] | null; data: any[] | null;
userModels: string[];
setData: React.Dispatch<React.SetStateAction<any[] | null>>; setData: React.Dispatch<React.SetStateAction<any[] | null>>;
} }
const CreateKey: React.FC<CreateKeyProps> = ({ const CreateKey: React.FC<CreateKeyProps> = ({
userID, userID,
teamID, team,
userRole, userRole,
accessToken, accessToken,
data, data,
userModels,
setData, setData,
}) => { }) => {
const [form] = Form.useForm(); const [form] = Form.useForm();
const [isModalVisible, setIsModalVisible] = useState(false); const [isModalVisible, setIsModalVisible] = useState(false);
const [apiKey, setApiKey] = useState(null); const [apiKey, setApiKey] = useState(null);
const [softBudget, setSoftBudget] = useState(null); const [softBudget, setSoftBudget] = useState(null);
const [userModels, setUserModels] = useState([]);
const handleOk = () => { const handleOk = () => {
setIsModalVisible(false); setIsModalVisible(false);
form.resetFields(); form.resetFields();
@ -50,6 +49,29 @@ const CreateKey: React.FC<CreateKeyProps> = ({
form.resetFields(); form.resetFields();
}; };
useEffect(() => {
const fetchUserModels = async () => {
try {
if (userID === null || userRole === null) {
return;
}
if (accessToken !== null) {
const model_available = await modelAvailableCall(accessToken, userID, userRole);
let available_model_names = model_available["data"].map(
(element: { id: string }) => element.id
);
console.log("available_model_names:", available_model_names);
setUserModels(available_model_names);
}
} catch (error) {
console.error("Error fetching user models:", error);
}
};
fetchUserModels();
}, [accessToken, userID, userRole]);
const handleCreate = async (formValues: Record<string, any>) => { const handleCreate = async (formValues: Record<string, any>) => {
try { try {
message.info("Making API Call"); message.info("Making API Call");
@ -105,12 +127,15 @@ const CreateKey: React.FC<CreateKeyProps> = ({
<Form.Item label="Key Name" name="key_alias"> <Form.Item label="Key Name" name="key_alias">
<Input /> <Input />
</Form.Item> </Form.Item>
<Form.Item label="Team ID" name="team_id"> <Form.Item
<Input label="Team ID"
placeholder="ai_team" name="team_id"
defaultValue={teamID ? teamID : ""} initialValue={team ? team["team_id"] : null}
/> valuePropName="team_id"
>
<Input value={team ? team["team_alias"] : ""} disabled />
</Form.Item> </Form.Item>
<Form.Item label="Models" name="models"> <Form.Item label="Models" name="models">
<Select <Select
mode="multiple" mode="multiple"
@ -158,7 +183,7 @@ const CreateKey: React.FC<CreateKeyProps> = ({
<Input /> <Input />
</Form.Item> </Form.Item>
<Form.Item label="Team ID (Contact Group)" name="team_id"> <Form.Item label="Team ID (Contact Group)" name="team_id">
<Input placeholder="ai_team" /> <Input placeholder="default team (create a new team)" />
</Form.Item> </Form.Item>
<Form.Item label="Description" name="description"> <Form.Item label="Description" name="description">

View file

@ -59,9 +59,6 @@ const Navbar: React.FC<NavbarProps> = ({
target="_blank" target="_blank"
className="mr-2" className="mr-2"
> >
<Button variant="primary" size="lg">
Enable SSO
</Button>
</a> </a>
) : null} ) : null}

View file

@ -102,7 +102,7 @@ const Team: React.FC<TeamProps> = ({
const handleCreate = async (formValues: Record<string, any>) => { const handleCreate = async (formValues: Record<string, any>) => {
try { try {
if (accessToken != null) { if (accessToken != null) {
//message.info("Making API Call"); message.info("Creating Team");
const response: any = await teamCreateCall(accessToken, formValues); const response: any = await teamCreateCall(accessToken, formValues);
if (teams !== null) { if (teams !== null) {
setTeams([...teams, response]); setTeams([...teams, response]);
@ -122,7 +122,7 @@ const Team: React.FC<TeamProps> = ({
const handleMemberCreate = async (formValues: Record<string, any>) => { const handleMemberCreate = async (formValues: Record<string, any>) => {
try { try {
if (accessToken != null && teams != null) { if (accessToken != null && teams != null) {
message.info("Making API Call"); message.info("Adding Member");
const user_role: Member = { const user_role: Member = {
role: "user", role: "user",
user_email: formValues.user_email, user_email: formValues.user_email,
@ -157,7 +157,7 @@ const Team: React.FC<TeamProps> = ({
}; };
console.log(`received teams ${teams}`); console.log(`received teams ${teams}`);
return ( return (
<div className="w-full"> <div className="w-full mx-4">
<Grid numItems={1} className="gap-2 p-2 h-[75vh] w-full"> <Grid numItems={1} className="gap-2 p-2 h-[75vh] w-full">
<Col numColSpan={1}> <Col numColSpan={1}>
<Title level={4}>All Teams</Title> <Title level={4}>All Teams</Title>
@ -168,6 +168,7 @@ const Team: React.FC<TeamProps> = ({
<TableHeaderCell>Team Name</TableHeaderCell> <TableHeaderCell>Team Name</TableHeaderCell>
<TableHeaderCell>Spend (USD)</TableHeaderCell> <TableHeaderCell>Spend (USD)</TableHeaderCell>
<TableHeaderCell>Budget (USD)</TableHeaderCell> <TableHeaderCell>Budget (USD)</TableHeaderCell>
<TableHeaderCell>Models</TableHeaderCell>
<TableHeaderCell>TPM / RPM Limits</TableHeaderCell> <TableHeaderCell>TPM / RPM Limits</TableHeaderCell>
</TableRow> </TableRow>
</TableHead> </TableHead>
@ -176,12 +177,15 @@ const Team: React.FC<TeamProps> = ({
{teams && teams.length > 0 {teams && teams.length > 0
? teams.map((team: any) => ( ? teams.map((team: any) => (
<TableRow key={team.team_id}> <TableRow key={team.team_id}>
<TableCell>{team["team_alias"]}</TableCell> <TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}>{team["team_alias"]}</TableCell>
<TableCell>{team["spend"]}</TableCell> <TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}>{team["spend"]}</TableCell>
<TableCell> <TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}>
{team["max_budget"] ? team["max_budget"] : "No limit"} {team["max_budget"] ? team["max_budget"] : "No limit"}
</TableCell> </TableCell>
<TableCell> <TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}>
<Text>{JSON.stringify(team["models"] ? team["models"] : [])}</Text>
</TableCell>
<TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}>
<Text> <Text>
TPM Limit:{" "} TPM Limit:{" "}
{team.tpm_limit ? team.tpm_limit : "Unlimited"}{" "} {team.tpm_limit ? team.tpm_limit : "Unlimited"}{" "}
@ -189,9 +193,9 @@ const Team: React.FC<TeamProps> = ({
{team.rpm_limit ? team.rpm_limit : "Unlimited"} {team.rpm_limit ? team.rpm_limit : "Unlimited"}
</Text> </Text>
</TableCell> </TableCell>
<TableCell> {/* <TableCell>
<Icon icon={CogIcon} size="sm" /> <Icon icon={CogIcon} size="sm" />
</TableCell> </TableCell> */}
</TableRow> </TableRow>
)) ))
: null} : null}
@ -293,7 +297,7 @@ const Team: React.FC<TeamProps> = ({
<TableRow> <TableRow>
<TableHeaderCell>Member Name</TableHeaderCell> <TableHeaderCell>Member Name</TableHeaderCell>
<TableHeaderCell>Role</TableHeaderCell> <TableHeaderCell>Role</TableHeaderCell>
<TableHeaderCell>Action</TableHeaderCell> {/* <TableHeaderCell>Action</TableHeaderCell> */}
</TableRow> </TableRow>
</TableHead> </TableHead>
@ -310,9 +314,9 @@ const Team: React.FC<TeamProps> = ({
: null} : null}
</TableCell> </TableCell>
<TableCell>{member["role"]}</TableCell> <TableCell>{member["role"]}</TableCell>
<TableCell> {/* <TableCell>
<Icon icon={CogIcon} size="sm" /> <Icon icon={CogIcon} size="sm" />
</TableCell> </TableCell> */}
</TableRow> </TableRow>
) )
) )

View file

@ -203,6 +203,8 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
); );
} }
console.log("inside user dashboard, selected team", selectedTeam);
return ( return (
<div> <div>
<Grid numItems={1} className="gap-0 p-10 h-[75vh] w-full"> <Grid numItems={1} className="gap-0 p-10 h-[75vh] w-full">
@ -220,10 +222,10 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
setData={setKeys} setData={setKeys}
/> />
<CreateKey <CreateKey
key={selectedTeam ? selectedTeam.team_id : null}
userID={userID} userID={userID}
teamID={selectedTeam ? selectedTeam["team_id"] : null} team={selectedTeam ? selectedTeam : null}
userRole={userRole} userRole={userRole}
userModels={userModels}
accessToken={accessToken} accessToken={accessToken}
data={keys} data={keys}
setData={setKeys} setData={setKeys}

View file

@ -141,7 +141,7 @@ const ViewKeyTable: React.FC<ViewKeyTableProps> = ({
/> />
</TableCell> </TableCell>
<TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}> <TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}>
<Text>{item.team_id}</Text> <Text>{item.team_alias && item.team_alias != "None" ? item.team_alias : item.team_id}</Text>
</TableCell> </TableCell>
<TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}> <TableCell style={{ maxWidth: "4px", whiteSpace: "pre-wrap", overflow: "hidden" }}>
<Text>{JSON.stringify(item.metadata).slice(0, 400)}</Text> <Text>{JSON.stringify(item.metadata).slice(0, 400)}</Text>