diff --git a/.github/workflows/ghcr_deploy.yml b/.github/workflows/ghcr_deploy.yml index b863fc3fa2..566199c8a6 100644 --- a/.github/workflows/ghcr_deploy.yml +++ b/.github/workflows/ghcr_deploy.yml @@ -43,6 +43,14 @@ jobs: push: true file: Dockerfile.database tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }} + - + name: Build and push litellm-database image + uses: docker/build-push-action@v5 + with: + push: true + context: ./litellm-js/spend-logs + file: Dockerfile + tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }} build-and-push-image: runs-on: ubuntu-latest @@ -120,6 +128,44 @@ jobs: tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-latest labels: ${{ steps.meta-database.outputs.labels }} platforms: local,linux/amd64,linux/arm64,linux/arm64/v8 + + build-and-push-image-spend-logs: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for database Dockerfile + id: meta-spend-logs + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs + # Configure multi platform Docker builds + - name: Set up QEMU + uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345 + + - name: Build and push Database Docker image + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: ./litellm-js/spend-logs + file: Dockerfile + push: true + tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-latest + labels: ${{ steps.meta-spend-logs.outputs.labels }} + platforms: local,linux/amd64,linux/arm64,linux/arm64/v8 + build-and-push-helm-chart: runs-on: ubuntu-latest steps: diff --git a/docs/my-website/docs/proxy/prod.md b/docs/my-website/docs/proxy/prod.md index a4dbf6b5f1..f287aea644 100644 --- a/docs/my-website/docs/proxy/prod.md +++ b/docs/my-website/docs/proxy/prod.md @@ -40,7 +40,13 @@ Use this Docker `CMD`. This will start the proxy with 1 Uvicorn Async Worker CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"] ``` -## 3. Switch off spend logging and resetting budgets +## 3. Move spend logs to separate server + +Writing each spend log to the db can slow down your proxy. In testing we saw a 70% improvement in median response time, by moving writing spend logs to a separate server. + + + +## 4. Switch off resetting budgets Add this to your config.yaml. (Only spend per Key, User and Team will be tracked - spend per API Call will not be written to the LiteLLM Database) ```yaml @@ -49,7 +55,7 @@ general_settings: disable_reset_budget: true ``` -## 4. Switch of `litellm.telemetry` +## 5. Switch of `litellm.telemetry` Switch of all telemetry tracking done by litellm diff --git a/litellm-js/spend-logs/Dockerfile b/litellm-js/spend-logs/Dockerfile new file mode 100644 index 0000000000..7bd4fefba5 --- /dev/null +++ b/litellm-js/spend-logs/Dockerfile @@ -0,0 +1,26 @@ +# Use the specific Node.js v20.11.0 image +FROM node:20.11.0 + +# Set the working directory inside the container +WORKDIR /usr/src/app + +# Copy package.json and package-lock.json to the working directory +COPY package*.json ./ + +# Install dependencies +RUN npm install + +# Install Prisma globally +RUN npm install -g prisma + +# Copy the rest of the application code +COPY . . + +# Generate Prisma client +RUN npx prisma generate + +# Expose the port that the Node.js server will run on +EXPOSE 3000 + +# Command to run the Node.js app with npm run dev +CMD ["npm", "run", "dev"] \ No newline at end of file diff --git a/litellm-js/spend-logs/README.md b/litellm-js/spend-logs/README.md new file mode 100644 index 0000000000..e12b31db70 --- /dev/null +++ b/litellm-js/spend-logs/README.md @@ -0,0 +1,8 @@ +``` +npm install +npm run dev +``` + +``` +open http://localhost:3000 +``` diff --git a/litellm-js/spend-logs/package-lock.json b/litellm-js/spend-logs/package-lock.json new file mode 100644 index 0000000000..b74f5e9d70 --- /dev/null +++ b/litellm-js/spend-logs/package-lock.json @@ -0,0 +1,508 @@ +{ + "name": "spend-logs", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "dependencies": { + "@hono/node-server": "^1.9.0", + "hono": "^4.1.5" + }, + "devDependencies": { + "@types/node": "^20.11.17", + "tsx": "^4.7.1" + } + }, + "node_modules/@esbuild/aix-ppc64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz", + "integrity": "sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.12.tgz", + "integrity": "sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz", + "integrity": "sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/android-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.12.tgz", + "integrity": "sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz", + "integrity": "sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/darwin-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz", + "integrity": "sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz", + "integrity": "sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/freebsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz", + "integrity": "sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz", + "integrity": "sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz", + "integrity": "sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ia32": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz", + "integrity": "sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-loong64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz", + "integrity": "sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-mips64el": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz", + "integrity": "sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-ppc64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz", + "integrity": "sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-riscv64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz", + "integrity": "sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-s390x": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz", + "integrity": "sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/linux-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz", + "integrity": "sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/netbsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz", + "integrity": "sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/openbsd-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz", + "integrity": "sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/sunos-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz", + "integrity": "sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-arm64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz", + "integrity": "sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-ia32": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz", + "integrity": "sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@esbuild/win32-x64": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz", + "integrity": "sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=12" + } + }, + "node_modules/@hono/node-server": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.9.0.tgz", + "integrity": "sha512-oJjk7WXBlENeHhWiMqSyxPIZ3Kmf5ZYxqdlcSIXyN8Rn50bNJsPl99G4POBS03Jxh56FdfRJ0SEnC8mAVIiavQ==", + "engines": { + "node": ">=18.14.1" + } + }, + "node_modules/@types/node": { + "version": "20.11.30", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.11.30.tgz", + "integrity": "sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw==", + "dev": true, + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/esbuild": { + "version": "0.19.12", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.12.tgz", + "integrity": "sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=12" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.19.12", + "@esbuild/android-arm": "0.19.12", + "@esbuild/android-arm64": "0.19.12", + "@esbuild/android-x64": "0.19.12", + "@esbuild/darwin-arm64": "0.19.12", + "@esbuild/darwin-x64": "0.19.12", + "@esbuild/freebsd-arm64": "0.19.12", + "@esbuild/freebsd-x64": "0.19.12", + "@esbuild/linux-arm": "0.19.12", + "@esbuild/linux-arm64": "0.19.12", + "@esbuild/linux-ia32": "0.19.12", + "@esbuild/linux-loong64": "0.19.12", + "@esbuild/linux-mips64el": "0.19.12", + "@esbuild/linux-ppc64": "0.19.12", + "@esbuild/linux-riscv64": "0.19.12", + "@esbuild/linux-s390x": "0.19.12", + "@esbuild/linux-x64": "0.19.12", + "@esbuild/netbsd-x64": "0.19.12", + "@esbuild/openbsd-x64": "0.19.12", + "@esbuild/sunos-x64": "0.19.12", + "@esbuild/win32-arm64": "0.19.12", + "@esbuild/win32-ia32": "0.19.12", + "@esbuild/win32-x64": "0.19.12" + } + }, + "node_modules/fsevents": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", + "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, + "hasInstallScript": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/get-tsconfig": { + "version": "4.7.3", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.3.tgz", + "integrity": "sha512-ZvkrzoUA0PQZM6fy6+/Hce561s+faD1rsNwhnO5FelNjyy7EMGJ3Rz1AQ8GYDWjhRs/7dBLOEJvhK8MiEJOAFg==", + "dev": true, + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, + "node_modules/hono": { + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.1.5.tgz", + "integrity": "sha512-3ChJiIoeCxvkt6vnkxJagplrt1YZg3NyNob7ssVeK2PUqEINp4q1F94HzFnvY9QE8asVmbW5kkTDlyWylfg2vg==", + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, + "node_modules/tsx": { + "version": "4.7.1", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.7.1.tgz", + "integrity": "sha512-8d6VuibXHtlN5E3zFkgY8u4DX7Y3Z27zvvPKVmLon/D4AjuKzarkUBTLDBgj9iTQ0hg5xM7c/mYiRVM+HETf0g==", + "dev": true, + "dependencies": { + "esbuild": "~0.19.10", + "get-tsconfig": "^4.7.2" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true + } + } +} diff --git a/litellm-js/spend-logs/package.json b/litellm-js/spend-logs/package.json new file mode 100644 index 0000000000..0fc9b11af8 --- /dev/null +++ b/litellm-js/spend-logs/package.json @@ -0,0 +1,13 @@ +{ + "scripts": { + "dev": "tsx watch src/index.ts" + }, + "dependencies": { + "@hono/node-server": "^1.9.0", + "hono": "^4.1.5" + }, + "devDependencies": { + "@types/node": "^20.11.17", + "tsx": "^4.7.1" + } +} diff --git a/litellm-js/spend-logs/schema.prisma b/litellm-js/spend-logs/schema.prisma new file mode 100644 index 0000000000..b0403f277a --- /dev/null +++ b/litellm-js/spend-logs/schema.prisma @@ -0,0 +1,29 @@ +generator client { + provider = "prisma-client-js" +} + +datasource client { + provider = "postgresql" + url = env("DATABASE_URL") +} + +model LiteLLM_SpendLogs { + request_id String @id + call_type String + api_key String @default("") + spend Float @default(0.0) + total_tokens Int @default(0) + prompt_tokens Int @default(0) + completion_tokens Int @default(0) + startTime DateTime + endTime DateTime + model String @default("") + api_base String @default("") + user String @default("") + metadata Json @default("{}") + cache_hit String @default("") + cache_key String @default("") + request_tags Json @default("[]") + team_id String? + end_user String? +} \ No newline at end of file diff --git a/litellm-js/spend-logs/src/_types.ts b/litellm-js/spend-logs/src/_types.ts new file mode 100644 index 0000000000..6a9b499171 --- /dev/null +++ b/litellm-js/spend-logs/src/_types.ts @@ -0,0 +1,32 @@ +export type LiteLLM_IncrementSpend = { + key_transactions: Array, // [{"key": spend},..] + user_transactions: Array, + team_transactions: Array, + spend_logs_transactions: Array +} + +export type LiteLLM_IncrementObject = { + key: string, + spend: number +} + +export type LiteLLM_SpendLogs = { + request_id: string; // @id means it's a unique identifier + call_type: string; + api_key: string; // @default("") means it defaults to an empty string if not provided + spend: number; // Float in Prisma corresponds to number in TypeScript + total_tokens: number; // Int in Prisma corresponds to number in TypeScript + prompt_tokens: number; + completion_tokens: number; + startTime: Date; // DateTime in Prisma corresponds to Date in TypeScript + endTime: Date; + model: string; // @default("") means it defaults to an empty string if not provided + api_base: string; + user: string; + metadata: any; // Json type in Prisma is represented by any in TypeScript; could also use a more specific type if the structure of JSON is known + cache_hit: string; + cache_key: string; + request_tags: any; // Similarly, this could be an array or a more specific type depending on the expected structure + team_id?: string | null; // ? indicates it's optional and can be undefined, but could also be null if not provided + end_user?: string | null; +}; \ No newline at end of file diff --git a/litellm-js/spend-logs/src/index.ts b/litellm-js/spend-logs/src/index.ts new file mode 100644 index 0000000000..3581d95c83 --- /dev/null +++ b/litellm-js/spend-logs/src/index.ts @@ -0,0 +1,84 @@ +import { serve } from '@hono/node-server' +import { Hono } from 'hono' +import { PrismaClient } from '@prisma/client' +import {LiteLLM_SpendLogs, LiteLLM_IncrementSpend, LiteLLM_IncrementObject} from './_types' + +const app = new Hono() +const prisma = new PrismaClient() +// In-memory storage for logs +let spend_logs: LiteLLM_SpendLogs[] = []; +const key_logs: LiteLLM_IncrementObject[] = []; +const user_logs: LiteLLM_IncrementObject[] = []; +const transaction_logs: LiteLLM_IncrementObject[] = []; + + +app.get('/', (c) => { + return c.text('Hello Hono!') +}) + +const MIN_LOGS = 1; // Minimum number of logs needed to initiate a flush +const FLUSH_INTERVAL = 5000; // Time in ms to wait before trying to flush again +const BATCH_SIZE = 100; // Preferred size of each batch to write to the database +const MAX_LOGS_PER_INTERVAL = 1000; // Maximum number of logs to flush in a single interval + +const flushLogsToDb = async () => { + if (spend_logs.length >= MIN_LOGS) { + // Limit the logs to process in this interval to MAX_LOGS_PER_INTERVAL or less + const logsToProcess = spend_logs.slice(0, MAX_LOGS_PER_INTERVAL); + + for (let i = 0; i < logsToProcess.length; i += BATCH_SIZE) { + // Create subarray for current batch, ensuring it doesn't exceed the BATCH_SIZE + const batch = logsToProcess.slice(i, i + BATCH_SIZE); + + // Convert datetime strings to Date objects + const batchWithDates = batch.map(entry => ({ + ...entry, + startTime: new Date(entry.startTime), + endTime: new Date(entry.endTime), + // Repeat for any other DateTime fields you may have + })); + + await prisma.liteLLM_SpendLogs.createMany({ + data: batchWithDates, + }); + + console.log(`Flushed ${batch.length} logs to the DB.`); + } + + // Remove the processed logs from spend_logs + spend_logs = spend_logs.slice(logsToProcess.length); + + console.log(`${logsToProcess.length} logs processed. Remaining in queue: ${spend_logs.length}`); + } else { + // This will ensure it doesn't falsely claim "No logs to flush." when it's merely below the MIN_LOGS threshold. + if(spend_logs.length > 0) { + console.log(`Accumulating logs. Currently at ${spend_logs.length}, waiting for at least ${MIN_LOGS}.`); + } else { + console.log("No logs to flush."); + } + } +}; + +// Setup interval for attempting to flush the logs +setInterval(flushLogsToDb, FLUSH_INTERVAL); + +// Route to receive log messages +app.post('/spend/update', async (c) => { + const incomingLogs = await c.req.json(); + + spend_logs.push(...incomingLogs); + + console.log(`Received and stored ${incomingLogs.length} logs. Total logs in memory: ${spend_logs.length}`); + + return c.json({ message: `Successfully stored ${incomingLogs.length} logs` }); +}); + + + +const port = 3000 +console.log(`Server is running on port ${port}`) + +serve({ + fetch: app.fetch, + port +}) diff --git a/litellm-js/spend-logs/tsconfig.json b/litellm-js/spend-logs/tsconfig.json new file mode 100644 index 0000000000..028c03b6a8 --- /dev/null +++ b/litellm-js/spend-logs/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "Bundler", + "strict": true, + "types": [ + "node" + ], + "jsx": "react-jsx", + "jsxImportSource": "hono/jsx", + } +} \ No newline at end of file diff --git a/litellm/llms/custom_httpx/httpx_handler.py b/litellm/llms/custom_httpx/httpx_handler.py new file mode 100644 index 0000000000..3f3bd09bac --- /dev/null +++ b/litellm/llms/custom_httpx/httpx_handler.py @@ -0,0 +1,38 @@ +from typing import Optional +import httpx + + +class HTTPHandler: + def __init__(self, concurrent_limit=1000): + # Create a client with a connection pool + self.client = httpx.AsyncClient( + limits=httpx.Limits( + max_connections=concurrent_limit, + max_keepalive_connections=concurrent_limit, + ) + ) + + async def close(self): + # Close the client when you're done with it + await self.client.aclose() + + async def get( + self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None + ): + response = await self.client.get(url, params=params, headers=headers) + return response + + async def post( + self, + url: str, + data: Optional[dict] = None, + params: Optional[dict] = None, + headers: Optional[dict] = None, + ): + try: + response = await self.client.post( + url, data=data, params=params, headers=headers + ) + return response + except Exception as e: + raise e diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index bd277bbdfc..07a24dd7e8 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,21 +1,22 @@ model_list: -- model_name: fake_openai +- model_name: fake-openai-endpoint litellm_params: model: openai/my-fake-model api_key: my-fake-key - api_base: http://0.0.0.0:8080 + api_base: https://exampleopenaiendpoint-production.up.railway.app/ - model_name: gpt-3.5-turbo litellm_params: model: gpt-3.5-turbo-1106 api_key: os.environ/OPENAI_API_KEY -litellm_settings: - cache: true - cache_params: - type: redis - callbacks: ["batch_redis_requests"] - # success_callbacks: ["langfuse"] +# litellm_settings: +# cache: true +# cache_params: +# type: redis +# callbacks: ["batch_redis_requests"] +# # success_callbacks: ["langfuse"] general_settings: master_key: sk-1234 + disable_spend_logs: true database_url: "postgresql://neondb_owner:hz8tyUlJ5ivV@ep-cool-sunset-a5ywubeh.us-east-2.aws.neon.tech/neondb?sslmode=require" \ No newline at end of file diff --git a/litellm/proxy/auth/handle_jwt.py b/litellm/proxy/auth/handle_jwt.py index 08ffc0955b..4689ffe7bf 100644 --- a/litellm/proxy/auth/handle_jwt.py +++ b/litellm/proxy/auth/handle_jwt.py @@ -6,7 +6,6 @@ Currently only supports admin. JWT token must have 'litellm_proxy_admin' in scope. """ -import httpx import jwt import json import os @@ -14,42 +13,10 @@ from litellm.caching import DualCache from litellm._logging import verbose_proxy_logger from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable from litellm.proxy.utils import PrismaClient +from litellm.llms.custom_httpx.httpx_handler import HTTPHandler from typing import Optional -class HTTPHandler: - def __init__(self, concurrent_limit=1000): - # Create a client with a connection pool - self.client = httpx.AsyncClient( - limits=httpx.Limits( - max_connections=concurrent_limit, - max_keepalive_connections=concurrent_limit, - ) - ) - - async def close(self): - # Close the client when you're done with it - await self.client.aclose() - - async def get( - self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None - ): - response = await self.client.get(url, params=params, headers=headers) - return response - - async def post( - self, - url: str, - data: Optional[dict] = None, - params: Optional[dict] = None, - headers: Optional[dict] = None, - ): - response = await self.client.post( - url, data=data, params=params, headers=headers - ) - return response - - class JWTHandler: """ - treat the sub id passed in as the user id diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index b1d7b8026c..b8d7926963 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -21,8 +21,6 @@ telemetry = None def append_query_params(url, params): from litellm._logging import verbose_proxy_logger - from litellm._logging import verbose_proxy_logger - verbose_proxy_logger.debug(f"url: {url}") verbose_proxy_logger.debug(f"params: {params}") parsed_url = urlparse.urlparse(url) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 2fe4a3523d..4e91afb843 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -97,7 +97,6 @@ from litellm.proxy.utils import ( _is_projected_spend_over_limit, _get_projected_spend_over_limit, update_spend, - monitor_spend_list, ) from litellm.proxy.secret_managers.google_kms import load_google_kms from litellm.proxy.secret_managers.aws_secret_manager import load_aws_secret_manager @@ -118,6 +117,7 @@ from litellm.proxy.auth.auth_checks import ( allowed_routes_check, get_actual_routes, ) +from litellm.llms.custom_httpx.httpx_handler import HTTPHandler try: from litellm._version import version @@ -304,6 +304,8 @@ proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) async_result = None celery_app_conn = None celery_fn = None # Redis Queue for handling requests +### DB WRITER ### +db_writer_client: Optional[HTTPHandler] = None ### logger ### @@ -1249,10 +1251,11 @@ async def update_database( user_ids.append(litellm_proxy_budget_name) ### KEY CHANGE ### for _id in user_ids: - prisma_client.user_list_transactons[_id] = ( - response_cost - + prisma_client.user_list_transactons.get(_id, 0) - ) + if _id is not None: + prisma_client.user_list_transactons[_id] = ( + response_cost + + prisma_client.user_list_transactons.get(_id, 0) + ) if end_user_id is not None: prisma_client.end_user_list_transactons[end_user_id] = ( response_cost @@ -1380,7 +1383,16 @@ async def update_database( ) payload["spend"] = response_cost - if prisma_client is not None: + if ( + os.getenv("SPEND_LOGS_URL", None) is not None + and prisma_client is not None + ): + if isinstance(payload["startTime"], datetime): + payload["startTime"] = payload["startTime"].isoformat() + if isinstance(payload["endTime"], datetime): + payload["endTime"] = payload["endTime"].isoformat() + prisma_client.spend_log_transactons.append(payload) + elif prisma_client is not None: await prisma_client.insert_data(data=payload, table_name="spend") except Exception as e: verbose_proxy_logger.debug( @@ -2707,7 +2719,7 @@ def on_backoff(details): @router.on_event("startup") async def startup_event(): - global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name + global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time, litellm_proxy_admin_name, db_writer_client import json ### LOAD MASTER KEY ### @@ -2740,6 +2752,8 @@ async def startup_event(): ## COST TRACKING ## cost_tracking() + db_writer_client = HTTPHandler() + proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made ## JWT AUTH ## @@ -2850,7 +2864,7 @@ async def startup_event(): update_spend, "interval", seconds=batch_writing_interval, - args=[prisma_client], + args=[prisma_client, db_writer_client], ) scheduler.start() @@ -8060,6 +8074,8 @@ async def shutdown_event(): await jwt_handler.close() + if db_writer_client is not None: + await db_writer_client.close() ## RESET CUSTOM VARIABLES ## cleanup_router_config_variables() diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index bf625d6441..5923e0a5a2 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -13,6 +13,7 @@ from litellm.proxy._types import ( Member, ) from litellm.caching import DualCache +from litellm.llms.custom_httpx.httpx_handler import HTTPHandler from litellm.proxy.hooks.parallel_request_limiter import ( _PROXY_MaxParallelRequestsHandler, ) @@ -1886,7 +1887,7 @@ async def reset_budget(prisma_client: PrismaClient): async def update_spend( - prisma_client: PrismaClient, + prisma_client: PrismaClient, db_writer_client: Optional[HTTPHandler] ): """ Batch write updates to db. @@ -2014,13 +2015,30 @@ async def update_spend( except Exception as e: raise e + ### UPDATE SPEND LOGS ### + base_url = os.getenv("SPEND_LOGS_URL", None) + if ( + len(prisma_client.spend_log_transactons) > 0 + and base_url is not None + and db_writer_client is not None + ): + if not base_url.endswith("/"): + base_url += "/" + response = await db_writer_client.post( + url=base_url + "spend/update", + data=json.dumps(prisma_client.spend_log_transactons), # type: ignore + headers={"Content-Type": "application/json"}, + ) + if response.status_code == 200: + prisma_client.spend_log_transactons = [] -async def monitor_spend_list(prisma_client: PrismaClient): - """ - Check the length of each spend list, if it exceeds a threshold (e.g. 100 items) - write to db - """ - if len(prisma_client.user_list_transactons) > 10000: - await update_spend(prisma_client=prisma_client) + +# async def monitor_spend_list(prisma_client: PrismaClient): +# """ +# Check the length of each spend list, if it exceeds a threshold (e.g. 100 items) - write to db +# """ +# if len(prisma_client.user_list_transactons) > 10000: +# await update_spend(prisma_client=prisma_client) async def _read_request_body(request): diff --git a/litellm/tests/test_update_spend.py b/litellm/tests/test_update_spend.py index 0fd5d9bcf9..4bc7f9fe4f 100644 --- a/litellm/tests/test_update_spend.py +++ b/litellm/tests/test_update_spend.py @@ -92,4 +92,7 @@ async def test_batch_update_spend(prisma_client): setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") await litellm.proxy.proxy_server.prisma_client.connect() - await update_spend(prisma_client=litellm.proxy.proxy_server.prisma_client) + await update_spend( + prisma_client=litellm.proxy.proxy_server.prisma_client, + db_writer_client=None, + )