mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
Merge pull request #2722 from BerriAI/litellm_db_perf_improvement
feat(proxy/utils.py): enable updating db in a separate server
This commit is contained in:
commit
b828290c81
17 changed files with 868 additions and 62 deletions
46
.github/workflows/ghcr_deploy.yml
vendored
46
.github/workflows/ghcr_deploy.yml
vendored
|
@ -43,6 +43,14 @@ jobs:
|
|||
push: true
|
||||
file: Dockerfile.database
|
||||
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
|
||||
-
|
||||
name: Build and push litellm-database image
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
push: true
|
||||
context: ./litellm-js/spend-logs
|
||||
file: Dockerfile
|
||||
tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
|
||||
|
||||
build-and-push-image:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -120,6 +128,44 @@ jobs:
|
|||
tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-latest
|
||||
labels: ${{ steps.meta-database.outputs.labels }}
|
||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||
|
||||
build-and-push-image-spend-logs:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Log in to the Container registry
|
||||
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Extract metadata (tags, labels) for database Dockerfile
|
||||
id: meta-spend-logs
|
||||
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs
|
||||
# Configure multi platform Docker builds
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
|
||||
|
||||
- name: Build and push Database Docker image
|
||||
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
|
||||
with:
|
||||
context: ./litellm-js/spend-logs
|
||||
file: Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-latest
|
||||
labels: ${{ steps.meta-spend-logs.outputs.labels }}
|
||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||
|
||||
build-and-push-helm-chart:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
|
|
@ -40,7 +40,13 @@ Use this Docker `CMD`. This will start the proxy with 1 Uvicorn Async Worker
|
|||
CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]
|
||||
```
|
||||
|
||||
## 3. Switch off spend logging and resetting budgets
|
||||
## 3. Move spend logs to separate server
|
||||
|
||||
Writing each spend log to the db can slow down your proxy. In testing we saw a 70% improvement in median response time, by moving writing spend logs to a separate server.
|
||||
|
||||
|
||||
|
||||
## 4. Switch off resetting budgets
|
||||
|
||||
Add this to your config.yaml. (Only spend per Key, User and Team will be tracked - spend per API Call will not be written to the LiteLLM Database)
|
||||
```yaml
|
||||
|
@ -49,7 +55,7 @@ general_settings:
|
|||
disable_reset_budget: true
|
||||
```
|
||||
|
||||
## 4. Switch of `litellm.telemetry`
|
||||
## 5. Switch of `litellm.telemetry`
|
||||
|
||||
Switch of all telemetry tracking done by litellm
|
||||
|
||||
|
|
26
litellm-js/spend-logs/Dockerfile
Normal file
26
litellm-js/spend-logs/Dockerfile
Normal file
|
@ -0,0 +1,26 @@
|
|||
# Use the specific Node.js v20.11.0 image
|
||||
FROM node:20.11.0
|
||||
|
||||
# Set the working directory inside the container
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
# Copy package.json and package-lock.json to the working directory
|
||||
COPY package*.json ./
|
||||
|
||||
# Install dependencies
|
||||
RUN npm install
|
||||
|
||||
# Install Prisma globally
|
||||
RUN npm install -g prisma
|
||||
|
||||
# Copy the rest of the application code
|
||||
COPY . .
|
||||
|
||||
# Generate Prisma client
|
||||
RUN npx prisma generate
|
||||
|
||||
# Expose the port that the Node.js server will run on
|
||||
EXPOSE 3000
|
||||
|
||||
# Command to run the Node.js app with npm run dev
|
||||
CMD ["npm", "run", "dev"]
|
8
litellm-js/spend-logs/README.md
Normal file
8
litellm-js/spend-logs/README.md
Normal file
|
@ -0,0 +1,8 @@
|
|||
```
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
```
|
||||
open http://localhost:3000
|
||||
```
|
508
litellm-js/spend-logs/package-lock.json
generated
Normal file
508
litellm-js/spend-logs/package-lock.json
generated
Normal file
|
@ -0,0 +1,508 @@
|
|||
{
|
||||
"name": "spend-logs",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"@hono/node-server": "^1.9.0",
|
||||
"hono": "^4.1.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.11.17",
|
||||
"tsx": "^4.7.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz",
|
||||
"integrity": "sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"aix"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.12.tgz",
|
||||
"integrity": "sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz",
|
||||
"integrity": "sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-x64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.12.tgz",
|
||||
"integrity": "sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-arm64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz",
|
||||
"integrity": "sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-x64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz",
|
||||
"integrity": "sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-arm64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz",
|
||||
"integrity": "sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-x64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz",
|
||||
"integrity": "sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz",
|
||||
"integrity": "sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz",
|
||||
"integrity": "sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ia32": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz",
|
||||
"integrity": "sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-loong64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz",
|
||||
"integrity": "sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-mips64el": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz",
|
||||
"integrity": "sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==",
|
||||
"cpu": [
|
||||
"mips64el"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ppc64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz",
|
||||
"integrity": "sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-riscv64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz",
|
||||
"integrity": "sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-s390x": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz",
|
||||
"integrity": "sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-x64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz",
|
||||
"integrity": "sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-x64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz",
|
||||
"integrity": "sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-x64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz",
|
||||
"integrity": "sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/sunos-x64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz",
|
||||
"integrity": "sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"sunos"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-arm64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz",
|
||||
"integrity": "sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-ia32": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz",
|
||||
"integrity": "sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-x64": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz",
|
||||
"integrity": "sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/@hono/node-server": {
|
||||
"version": "1.9.0",
|
||||
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.9.0.tgz",
|
||||
"integrity": "sha512-oJjk7WXBlENeHhWiMqSyxPIZ3Kmf5ZYxqdlcSIXyN8Rn50bNJsPl99G4POBS03Jxh56FdfRJ0SEnC8mAVIiavQ==",
|
||||
"engines": {
|
||||
"node": ">=18.14.1"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.11.30",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.11.30.tgz",
|
||||
"integrity": "sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"undici-types": "~5.26.4"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.19.12",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.12.tgz",
|
||||
"integrity": "sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"bin": {
|
||||
"esbuild": "bin/esbuild"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@esbuild/aix-ppc64": "0.19.12",
|
||||
"@esbuild/android-arm": "0.19.12",
|
||||
"@esbuild/android-arm64": "0.19.12",
|
||||
"@esbuild/android-x64": "0.19.12",
|
||||
"@esbuild/darwin-arm64": "0.19.12",
|
||||
"@esbuild/darwin-x64": "0.19.12",
|
||||
"@esbuild/freebsd-arm64": "0.19.12",
|
||||
"@esbuild/freebsd-x64": "0.19.12",
|
||||
"@esbuild/linux-arm": "0.19.12",
|
||||
"@esbuild/linux-arm64": "0.19.12",
|
||||
"@esbuild/linux-ia32": "0.19.12",
|
||||
"@esbuild/linux-loong64": "0.19.12",
|
||||
"@esbuild/linux-mips64el": "0.19.12",
|
||||
"@esbuild/linux-ppc64": "0.19.12",
|
||||
"@esbuild/linux-riscv64": "0.19.12",
|
||||
"@esbuild/linux-s390x": "0.19.12",
|
||||
"@esbuild/linux-x64": "0.19.12",
|
||||
"@esbuild/netbsd-x64": "0.19.12",
|
||||
"@esbuild/openbsd-x64": "0.19.12",
|
||||
"@esbuild/sunos-x64": "0.19.12",
|
||||
"@esbuild/win32-arm64": "0.19.12",
|
||||
"@esbuild/win32-ia32": "0.19.12",
|
||||
"@esbuild/win32-x64": "0.19.12"
|
||||
}
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-tsconfig": {
|
||||
"version": "4.7.3",
|
||||
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.3.tgz",
|
||||
"integrity": "sha512-ZvkrzoUA0PQZM6fy6+/Hce561s+faD1rsNwhnO5FelNjyy7EMGJ3Rz1AQ8GYDWjhRs/7dBLOEJvhK8MiEJOAFg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"resolve-pkg-maps": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/hono": {
|
||||
"version": "4.1.5",
|
||||
"resolved": "https://registry.npmjs.org/hono/-/hono-4.1.5.tgz",
|
||||
"integrity": "sha512-3ChJiIoeCxvkt6vnkxJagplrt1YZg3NyNob7ssVeK2PUqEINp4q1F94HzFnvY9QE8asVmbW5kkTDlyWylfg2vg==",
|
||||
"engines": {
|
||||
"node": ">=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/resolve-pkg-maps": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
|
||||
"dev": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.7.1",
|
||||
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.7.1.tgz",
|
||||
"integrity": "sha512-8d6VuibXHtlN5E3zFkgY8u4DX7Y3Z27zvvPKVmLon/D4AjuKzarkUBTLDBgj9iTQ0hg5xM7c/mYiRVM+HETf0g==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"esbuild": "~0.19.10",
|
||||
"get-tsconfig": "^4.7.2"
|
||||
},
|
||||
"bin": {
|
||||
"tsx": "dist/cli.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "~2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "5.26.5",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
|
||||
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
}
|
13
litellm-js/spend-logs/package.json
Normal file
13
litellm-js/spend-logs/package.json
Normal file
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"scripts": {
|
||||
"dev": "tsx watch src/index.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@hono/node-server": "^1.9.0",
|
||||
"hono": "^4.1.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.11.17",
|
||||
"tsx": "^4.7.1"
|
||||
}
|
||||
}
|
29
litellm-js/spend-logs/schema.prisma
Normal file
29
litellm-js/spend-logs/schema.prisma
Normal file
|
@ -0,0 +1,29 @@
|
|||
generator client {
|
||||
provider = "prisma-client-js"
|
||||
}
|
||||
|
||||
datasource client {
|
||||
provider = "postgresql"
|
||||
url = env("DATABASE_URL")
|
||||
}
|
||||
|
||||
model LiteLLM_SpendLogs {
|
||||
request_id String @id
|
||||
call_type String
|
||||
api_key String @default("")
|
||||
spend Float @default(0.0)
|
||||
total_tokens Int @default(0)
|
||||
prompt_tokens Int @default(0)
|
||||
completion_tokens Int @default(0)
|
||||
startTime DateTime
|
||||
endTime DateTime
|
||||
model String @default("")
|
||||
api_base String @default("")
|
||||
user String @default("")
|
||||
metadata Json @default("{}")
|
||||
cache_hit String @default("")
|
||||
cache_key String @default("")
|
||||
request_tags Json @default("[]")
|
||||
team_id String?
|
||||
end_user String?
|
||||
}
|
32
litellm-js/spend-logs/src/_types.ts
Normal file
32
litellm-js/spend-logs/src/_types.ts
Normal file
|
@ -0,0 +1,32 @@
|
|||
export type LiteLLM_IncrementSpend = {
|
||||
key_transactions: Array<LiteLLM_IncrementObject>, // [{"key": spend},..]
|
||||
user_transactions: Array<LiteLLM_IncrementObject>,
|
||||
team_transactions: Array<LiteLLM_IncrementObject>,
|
||||
spend_logs_transactions: Array<LiteLLM_SpendLogs>
|
||||
}
|
||||
|
||||
export type LiteLLM_IncrementObject = {
|
||||
key: string,
|
||||
spend: number
|
||||
}
|
||||
|
||||
export type LiteLLM_SpendLogs = {
|
||||
request_id: string; // @id means it's a unique identifier
|
||||
call_type: string;
|
||||
api_key: string; // @default("") means it defaults to an empty string if not provided
|
||||
spend: number; // Float in Prisma corresponds to number in TypeScript
|
||||
total_tokens: number; // Int in Prisma corresponds to number in TypeScript
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
startTime: Date; // DateTime in Prisma corresponds to Date in TypeScript
|
||||
endTime: Date;
|
||||
model: string; // @default("") means it defaults to an empty string if not provided
|
||||
api_base: string;
|
||||
user: string;
|
||||
metadata: any; // Json type in Prisma is represented by any in TypeScript; could also use a more specific type if the structure of JSON is known
|
||||
cache_hit: string;
|
||||
cache_key: string;
|
||||
request_tags: any; // Similarly, this could be an array or a more specific type depending on the expected structure
|
||||
team_id?: string | null; // ? indicates it's optional and can be undefined, but could also be null if not provided
|
||||
end_user?: string | null;
|
||||
};
|
84
litellm-js/spend-logs/src/index.ts
Normal file
84
litellm-js/spend-logs/src/index.ts
Normal file
|
@ -0,0 +1,84 @@
|
|||
import { serve } from '@hono/node-server'
|
||||
import { Hono } from 'hono'
|
||||
import { PrismaClient } from '@prisma/client'
|
||||
import {LiteLLM_SpendLogs, LiteLLM_IncrementSpend, LiteLLM_IncrementObject} from './_types'
|
||||
|
||||
const app = new Hono()
|
||||
const prisma = new PrismaClient()
|
||||
// In-memory storage for logs
|
||||
let spend_logs: LiteLLM_SpendLogs[] = [];
|
||||
const key_logs: LiteLLM_IncrementObject[] = [];
|
||||
const user_logs: LiteLLM_IncrementObject[] = [];
|
||||
const transaction_logs: LiteLLM_IncrementObject[] = [];
|
||||
|
||||
|
||||
app.get('/', (c) => {
|
||||
return c.text('Hello Hono!')
|
||||
})
|
||||
|
||||
const MIN_LOGS = 1; // Minimum number of logs needed to initiate a flush
|
||||
const FLUSH_INTERVAL = 5000; // Time in ms to wait before trying to flush again
|
||||
const BATCH_SIZE = 100; // Preferred size of each batch to write to the database
|
||||
const MAX_LOGS_PER_INTERVAL = 1000; // Maximum number of logs to flush in a single interval
|
||||
|
||||
const flushLogsToDb = async () => {
|
||||
if (spend_logs.length >= MIN_LOGS) {
|
||||
// Limit the logs to process in this interval to MAX_LOGS_PER_INTERVAL or less
|
||||
const logsToProcess = spend_logs.slice(0, MAX_LOGS_PER_INTERVAL);
|
||||
|
||||
for (let i = 0; i < logsToProcess.length; i += BATCH_SIZE) {
|
||||
// Create subarray for current batch, ensuring it doesn't exceed the BATCH_SIZE
|
||||
const batch = logsToProcess.slice(i, i + BATCH_SIZE);
|
||||
|
||||
// Convert datetime strings to Date objects
|
||||
const batchWithDates = batch.map(entry => ({
|
||||
...entry,
|
||||
startTime: new Date(entry.startTime),
|
||||
endTime: new Date(entry.endTime),
|
||||
// Repeat for any other DateTime fields you may have
|
||||
}));
|
||||
|
||||
await prisma.liteLLM_SpendLogs.createMany({
|
||||
data: batchWithDates,
|
||||
});
|
||||
|
||||
console.log(`Flushed ${batch.length} logs to the DB.`);
|
||||
}
|
||||
|
||||
// Remove the processed logs from spend_logs
|
||||
spend_logs = spend_logs.slice(logsToProcess.length);
|
||||
|
||||
console.log(`${logsToProcess.length} logs processed. Remaining in queue: ${spend_logs.length}`);
|
||||
} else {
|
||||
// This will ensure it doesn't falsely claim "No logs to flush." when it's merely below the MIN_LOGS threshold.
|
||||
if(spend_logs.length > 0) {
|
||||
console.log(`Accumulating logs. Currently at ${spend_logs.length}, waiting for at least ${MIN_LOGS}.`);
|
||||
} else {
|
||||
console.log("No logs to flush.");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Setup interval for attempting to flush the logs
|
||||
setInterval(flushLogsToDb, FLUSH_INTERVAL);
|
||||
|
||||
// Route to receive log messages
|
||||
app.post('/spend/update', async (c) => {
|
||||
const incomingLogs = await c.req.json<LiteLLM_SpendLogs[]>();
|
||||
|
||||
spend_logs.push(...incomingLogs);
|
||||
|
||||
console.log(`Received and stored ${incomingLogs.length} logs. Total logs in memory: ${spend_logs.length}`);
|
||||
|
||||
return c.json({ message: `Successfully stored ${incomingLogs.length} logs` });
|
||||
});
|
||||
|
||||
|
||||
|
||||
const port = 3000
|
||||
console.log(`Server is running on port ${port}`)
|
||||
|
||||
serve({
|
||||
fetch: app.fetch,
|
||||
port
|
||||
})
|
13
litellm-js/spend-logs/tsconfig.json
Normal file
13
litellm-js/spend-logs/tsconfig.json
Normal file
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Bundler",
|
||||
"strict": true,
|
||||
"types": [
|
||||
"node"
|
||||
],
|
||||
"jsx": "react-jsx",
|
||||
"jsxImportSource": "hono/jsx",
|
||||
}
|
||||
}
|
38
litellm/llms/custom_httpx/httpx_handler.py
Normal file
38
litellm/llms/custom_httpx/httpx_handler.py
Normal file
|
@ -0,0 +1,38 @@
|
|||
from typing import Optional
|
||||
import httpx
|
||||
|
||||
|
||||
class HTTPHandler:
|
||||
def __init__(self, concurrent_limit=1000):
|
||||
# Create a client with a connection pool
|
||||
self.client = httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=concurrent_limit,
|
||||
max_keepalive_connections=concurrent_limit,
|
||||
)
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
# Close the client when you're done with it
|
||||
await self.client.aclose()
|
||||
|
||||
async def get(
|
||||
self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None
|
||||
):
|
||||
response = await self.client.get(url, params=params, headers=headers)
|
||||
return response
|
||||
|
||||
async def post(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[dict] = None,
|
||||
params: Optional[dict] = None,
|
||||
headers: Optional[dict] = None,
|
||||
):
|
||||
try:
|
||||
response = await self.client.post(
|
||||
url, data=data, params=params, headers=headers
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
|
@ -1,21 +1,22 @@
|
|||
model_list:
|
||||
- model_name: fake_openai
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: http://0.0.0.0:8080
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo-1106
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
litellm_settings:
|
||||
cache: true
|
||||
cache_params:
|
||||
type: redis
|
||||
callbacks: ["batch_redis_requests"]
|
||||
# success_callbacks: ["langfuse"]
|
||||
# litellm_settings:
|
||||
# cache: true
|
||||
# cache_params:
|
||||
# type: redis
|
||||
# callbacks: ["batch_redis_requests"]
|
||||
# # success_callbacks: ["langfuse"]
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
disable_spend_logs: true
|
||||
database_url: "postgresql://neondb_owner:hz8tyUlJ5ivV@ep-cool-sunset-a5ywubeh.us-east-2.aws.neon.tech/neondb?sslmode=require"
|
|
@ -6,7 +6,6 @@ Currently only supports admin.
|
|||
JWT token must have 'litellm_proxy_admin' in scope.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
import jwt
|
||||
import json
|
||||
import os
|
||||
|
@ -14,42 +13,10 @@ from litellm.caching import DualCache
|
|||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable
|
||||
from litellm.proxy.utils import PrismaClient
|
||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class HTTPHandler:
|
||||
def __init__(self, concurrent_limit=1000):
|
||||
# Create a client with a connection pool
|
||||
self.client = httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=concurrent_limit,
|
||||
max_keepalive_connections=concurrent_limit,
|
||||
)
|
||||
)
|
||||
|
||||
async def close(self):
|
||||
# Close the client when you're done with it
|
||||
await self.client.aclose()
|
||||
|
||||
async def get(
|
||||
self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None
|
||||
):
|
||||
response = await self.client.get(url, params=params, headers=headers)
|
||||
return response
|
||||
|
||||
async def post(
|
||||
self,
|
||||
url: str,
|
||||
data: Optional[dict] = None,
|
||||
params: Optional[dict] = None,
|
||||
headers: Optional[dict] = None,
|
||||
):
|
||||
response = await self.client.post(
|
||||
url, data=data, params=params, headers=headers
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
class JWTHandler:
|
||||
"""
|
||||
- treat the sub id passed in as the user id
|
||||
|
|
|
@ -21,8 +21,6 @@ telemetry = None
|
|||
def append_query_params(url, params):
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
verbose_proxy_logger.debug(f"url: {url}")
|
||||
verbose_proxy_logger.debug(f"params: {params}")
|
||||
parsed_url = urlparse.urlparse(url)
|
||||
|
|
|
@ -97,7 +97,6 @@ from litellm.proxy.utils import (
|
|||
_is_projected_spend_over_limit,
|
||||
_get_projected_spend_over_limit,
|
||||
update_spend,
|
||||
monitor_spend_list,
|
||||
)
|
||||
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
||||
from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager
|
||||
|
@ -118,6 +117,7 @@ from litellm.proxy.auth.auth_checks import (
|
|||
allowed_routes_check,
|
||||
get_actual_routes,
|
||||
)
|
||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||
|
||||
try:
|
||||
from litellm._version import version
|
||||
|
@ -304,6 +304,8 @@ proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache)
|
|||
async_result = None
|
||||
celery_app_conn = None
|
||||
celery_fn = None # Redis Queue for handling requests
|
||||
### DB WRITER ###
|
||||
db_writer_client: Optional[HTTPHandler] = None
|
||||
### logger ###
|
||||
|
||||
|
||||
|
@ -1249,6 +1251,7 @@ async def update_database(
|
|||
user_ids.append(litellm_proxy_budget_name)
|
||||
### KEY CHANGE ###
|
||||
for _id in user_ids:
|
||||
if _id is not None:
|
||||
prisma_client.user_list_transactons[_id] = (
|
||||
response_cost
|
||||
+ prisma_client.user_list_transactons.get(_id, 0)
|
||||
|
@ -1380,7 +1383,16 @@ async def update_database(
|
|||
)
|
||||
|
||||
payload["spend"] = response_cost
|
||||
if prisma_client is not None:
|
||||
if (
|
||||
os.getenv("SPEND_LOGS_URL", None) is not None
|
||||
and prisma_client is not None
|
||||
):
|
||||
if isinstance(payload["startTime"], datetime):
|
||||
payload["startTime"] = payload["startTime"].isoformat()
|
||||
if isinstance(payload["endTime"], datetime):
|
||||
payload["endTime"] = payload["endTime"].isoformat()
|
||||
prisma_client.spend_log_transactons.append(payload)
|
||||
elif prisma_client is not None:
|
||||
await prisma_client.insert_data(data=payload, table_name="spend")
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.debug(
|
||||
|
@ -2707,7 +2719,7 @@ def on_backoff(details):
|
|||
|
||||
@router.on_event("startup")
|
||||
async def startup_event():
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name
|
||||
global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client
|
||||
import json
|
||||
|
||||
### LOAD MASTER KEY ###
|
||||
|
@ -2740,6 +2752,8 @@ async def startup_event():
|
|||
## COST TRACKING ##
|
||||
cost_tracking()
|
||||
|
||||
db_writer_client = HTTPHandler()
|
||||
|
||||
proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made
|
||||
|
||||
## JWT AUTH ##
|
||||
|
@ -2850,7 +2864,7 @@ async def startup_event():
|
|||
update_spend,
|
||||
"interval",
|
||||
seconds=batch_writing_interval,
|
||||
args=[prisma_client],
|
||||
args=[prisma_client, db_writer_client],
|
||||
)
|
||||
scheduler.start()
|
||||
|
||||
|
@ -8060,6 +8074,8 @@ async def shutdown_event():
|
|||
|
||||
await jwt_handler.close()
|
||||
|
||||
if db_writer_client is not None:
|
||||
await db_writer_client.close()
|
||||
## RESET CUSTOM VARIABLES ##
|
||||
cleanup_router_config_variables()
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ from litellm.proxy._types import (
|
|||
Member,
|
||||
)
|
||||
from litellm.caching import DualCache
|
||||
from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
|
||||
from litellm.proxy.hooks.parallel_request_limiter import (
|
||||
_PROXY_MaxParallelRequestsHandler,
|
||||
)
|
||||
|
@ -1886,7 +1887,7 @@ async def reset_budget(prisma_client: PrismaClient):
|
|||
|
||||
|
||||
async def update_spend(
|
||||
prisma_client: PrismaClient,
|
||||
prisma_client: PrismaClient, db_writer_client: Optional[HTTPHandler]
|
||||
):
|
||||
"""
|
||||
Batch write updates to db.
|
||||
|
@ -2014,13 +2015,30 @@ async def update_spend(
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
### UPDATE SPEND LOGS ###
|
||||
base_url = os.getenv("SPEND_LOGS_URL", None)
|
||||
if (
|
||||
len(prisma_client.spend_log_transactons) > 0
|
||||
and base_url is not None
|
||||
and db_writer_client is not None
|
||||
):
|
||||
if not base_url.endswith("/"):
|
||||
base_url += "/"
|
||||
response = await db_writer_client.post(
|
||||
url=base_url + "spend/update",
|
||||
data=json.dumps(prisma_client.spend_log_transactons), # type: ignore
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
if response.status_code == 200:
|
||||
prisma_client.spend_log_transactons = []
|
||||
|
||||
async def monitor_spend_list(prisma_client: PrismaClient):
|
||||
"""
|
||||
Check the length of each spend list, if it exceeds a threshold (e.g. 100 items) - write to db
|
||||
"""
|
||||
if len(prisma_client.user_list_transactons) > 10000:
|
||||
await update_spend(prisma_client=prisma_client)
|
||||
|
||||
# async def monitor_spend_list(prisma_client: PrismaClient):
|
||||
# """
|
||||
# Check the length of each spend list, if it exceeds a threshold (e.g. 100 items) - write to db
|
||||
# """
|
||||
# if len(prisma_client.user_list_transactons) > 10000:
|
||||
# await update_spend(prisma_client=prisma_client)
|
||||
|
||||
|
||||
async def _read_request_body(request):
|
||||
|
|
|
@ -92,4 +92,7 @@ async def test_batch_update_spend(prisma_client):
|
|||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
await update_spend(prisma_client=litellm.proxy.proxy_server.prisma_client)
|
||||
await update_spend(
|
||||
prisma_client=litellm.proxy.proxy_server.prisma_client,
|
||||
db_writer_client=None,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue