Merge branch 'main' into litellm_llamaguard_custom_categories

2024-02-17 21:36:40 -08:00 · 2024-02-17 21:36:40 -08:00 · 038ba426ab
commit 038ba426ab
parent 074d93cc97 fff2ec08a4
57 changed files with 585 additions and 364 deletions
--- a/.gitignore
+++ b/.gitignore
@ -40,7 +40,7 @@ ui/litellm-dashboard/node_modules
 ui/litellm-dashboard/next-env.d.ts
 ui/litellm-dashboard/package.json
 ui/litellm-dashboard/package-lock.json
-deploy/charts/litellm-helm/*.tgz
-deploy/charts/litellm-helm/charts/*
+deploy/charts/litellm/*.tgz
+deploy/charts/litellm/charts/*
 deploy/charts/*.tgz
 litellm/proxy/vertex_key.json
--- a/deploy/charts/litellm-helm/templates/deployment-ui.yaml
+++ b/deploy/charts/litellm-helm/templates/deployment-ui.yaml
@ -1,89 +0,0 @@
-{{- if .Values.ui.enabled -}}
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "litellm.fullname" . }}-ui
-  labels:
-    {{- include "litellm.labels" . | nindent 4 }}
-spec:
-  {{- if not .Values.ui.autoscaling.enabled }}
-  replicas: {{ .Values.ui.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      {{- include "litellm.ui.selectorLabels" . | nindent 6 }}
-  template:
-    metadata:
-      {{- with .Values.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "litellm.ui.labels" . | nindent 8 }}
-        {{- with .Values.ui.podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "litellm.serviceAccountName" . }}
-      securityContext:
-        {{- toYaml .Values.ui.podSecurityContext | nindent 8 }}
-      containers:
-        - name: {{ include "litellm.name" . }}-ui
-          securityContext:
-            {{- toYaml .Values.ui.securityContext | nindent 12 }}
-          image: "{{ .Values.ui.image.repository }}:{{ .Values.ui.image.tag | default (printf "main-%s" .Chart.AppVersion) }}"
-          imagePullPolicy: {{ .Values.ui.image.pullPolicy }}
-          env:
-            - name: BASE_URL
-              value: {{ (index .Values.ui.ingress.hosts 0).host | default "example.com" }}
-          ports:
-            - name: http
-              containerPort: {{ .Values.ui.service.port }}
-              protocol: TCP
-          livenessProbe:
-            httpGet:
-              path: /
-              port: http
-          readinessProbe:
-            httpGet:
-              path: /
-              port: http
-          # Give the container time to start up.  Up to 5 minutes (10 * 30 seconds)
-          startupProbe:
-            httpGet:
-              path: /
-              port: http
-            failureThreshold: 30
-            periodSeconds: 10
-          resources:
-            {{- toYaml .Values.ui.resources | nindent 12 }}
-          volumeMounts:
-            - name: tmp
-              mountPath: /tmp
-          {{- with .Values.ui.volumeMounts }}
-            {{- toYaml . | nindent 12 }}
-          {{- end }}
-      volumes:
-        - name: tmp
-          emptyDir:
-            sizeLimit: 500Mi
-      {{- with .Values.ui.volumes }}
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.ui.nodeSelector }}
-      nodeSelector:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.ui.affinity }}
-      affinity:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      {{- with .Values.ui.tolerations }}
-      tolerations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-{{- end -}}
--- a/deploy/charts/litellm-helm/templates/ingress-ui.yaml
+++ b/deploy/charts/litellm-helm/templates/ingress-ui.yaml
@ -1,61 +0,0 @@
-{{- if .Values.ui.ingress.enabled -}}
-{{- $fullName := (printf "%s%s" (include "litellm.fullname" .) "-ui") -}}
-{{- $svcPort := .Values.ui.service.port -}}
-{{- if and .Values.ui.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
-  {{- if not (hasKey .Values.ui.ingress.annotations "kubernetes.io/ingress.class") }}
-  {{- $_ := set .Values.ui.ingress.annotations "kubernetes.io/ingress.class" .Values.ui.ingress.className}}
-  {{- end }}
-{{- end }}
-{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
-apiVersion: networking.k8s.io/v1
-{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
-apiVersion: networking.k8s.io/v1beta1
-{{- else -}}
-apiVersion: extensions/v1beta1
-{{- end }}
-kind: Ingress
-metadata:
-  name: {{ $fullName }}
-  labels:
-    {{- include "litellm.ui.labels" . | nindent 4 }}
-  {{- with .Values.ui.ingress.annotations }}
-  annotations:
-    {{- toYaml . | nindent 4 }}
-  {{- end }}
-spec:
-  {{- if and .Values.ui.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
-  ingressClassName: {{ .Values.ui.ingress.className }}
-  {{- end }}
-  {{- if .Values.ui.ingress.tls }}
-  tls:
-    {{- range .Values.ui.ingress.tls }}
-    - hosts:
-        {{- range .hosts }}
-        - {{ . | quote }}
-        {{- end }}
-      secretName: {{ .secretName }}
-    {{- end }}
-  {{- end }}
-  rules:
-    {{- range .Values.ui.ingress.hosts }}
-    - host: {{ .host | quote }}
-      http:
-        paths:
-          {{- range .paths }}
-          - path: {{ .path }}
-            {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
-            pathType: {{ .pathType }}
-            {{- end }}
-            backend:
-              {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
-              service:
-                name: {{ $fullName }}
-                port:
-                  number: {{ $svcPort }}
-              {{- else }}
-              serviceName: {{ $fullName }}
-              servicePort: {{ $svcPort }}
-              {{- end }}
-          {{- end }}
-    {{- end }}
-{{- end }}
--- a/deploy/charts/litellm-helm/templates/service-ui.yaml
+++ b/deploy/charts/litellm-helm/templates/service-ui.yaml
@ -1,17 +0,0 @@
-{{- if .Values.ui.enabled -}}
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "litellm.fullname" . }}-ui
-  labels:
-    {{- include "litellm.labels" . | nindent 4 }}
-spec:
-  type: {{ .Values.ui.service.type }}
-  ports:
-    - port: {{ .Values.ui.service.port }}
-      targetPort: http
-      protocol: TCP
-      name: http
-  selector:
-    {{- include "litellm.ui.selectorLabels" . | nindent 4 }}
-{{ end -}}
--- a/deploy/charts/litellm-helm/.helmignore
+++ b/deploy/charts/litellm-helm/.helmignore
--- a/deploy/charts/litellm-helm/Chart.lock
+++ b/deploy/charts/litellm-helm/Chart.lock
--- a/deploy/charts/litellm-helm/Chart.yaml
+++ b/deploy/charts/litellm-helm/Chart.yaml
@ -2,7 +2,7 @@ apiVersion: v2

 # We can't call ourselves just "litellm" because then we couldn't publish to the
 #  same OCI repository as the "litellm" OCI image
-name: litellm-helm
+name: litellm
 description: Call all LLM APIs using the OpenAI format

 # A chart can be either an 'application' or a 'library' chart.
@ -18,17 +18,16 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
+version: 0.2.0

 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: v1.18.9
+appVersion: v1.24.5

 dependencies:
  - name: "postgresql"
    version: ">=13.3.0"
    repository: oci://registry-1.docker.io/bitnamicharts
    condition: db.deployStandalone
-
--- a/deploy/charts/litellm-helm/README.md
+++ b/deploy/charts/litellm-helm/README.md
@ -43,20 +43,6 @@ data:
 type: Opaque
 ```

-### LiteLLM Admin UI Settings
-
-| Name                                                       | Description                                                                                                                                                                           | Value |
-| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
-| `ui.enabled`                                               | Should the LiteLLM Admin UI be deployed                                                                                                                                               | `true`  |
-| `ui.replicaCount`                                          | The number of LiteLLM Admin UI pods to be deployed                                                                                                                                    | `1`   |
-| `ui.image.repository`                                      | LiteLLM Admin UI image repository                                                                                                                                                     | `ghcr.io/berriai/litellm`  |
-| `ui.image.pullPolicy`                                      | LiteLLM Admin UI image pull policy                                                                                                                                                    | `IfNotPresent`  |
-| `ui.image.tag`                                             | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published.                                                                             | `""`  |
-| `ui.imagePullSecrets`                                      | Registry credentials for the above images.                                                                                                                                                         | `[]`  |
-| `ui.service.type`                                          | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.)                                                                                                                      | `ClusterIP`  |
-| `ui.service.port`                                          | TCP port that the Kubernetes Service will listen on.  Also the TCP port within the Pod that the web server will listen on.                                                                 | `8000`  |
-| `ui.ingress.*`                                             | See [values.yaml](./values.yaml) for example settings                                                                                                                                 | N/A |
-
 ### Database Settings
 | Name                                                       | Description                                                                                                                                                                           | Value |
 | ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
@ -86,18 +72,18 @@ type: Opaque
 ```

 ## Accessing the Admin UI
-When browsing to the URL published per the settings in `ui.ingress.*`, you will
+When browsing to the URL published per the settings in `ingress.*`, you will
 be prompted for **Admin Configuration**.  The **Proxy Endpoint** is the internal
-(from the `litellm-ui` pod's perspective) URL published by the `litellm-proxy`
+(from the `litellm` pod's perspective) URL published by the `<RELEASE>-litellm`
 Kubernetes Service.  If the deployment uses the default settings for this
-service, the **Proxy Endpoint** should be set to `http://litellm-proxy:8000`.
+service, the **Proxy Endpoint** should be set to `http://<RELEASE>-litellm:8000`.

 The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
 was not provided to the helm command line, the `masterkey` is a randomly
-generated string stored in the `litellm-masterkey` Kubernetes Secret.
+generated string stored in the `<RELEASE>-litellm-masterkey` Kubernetes Secret.

 ```bash
-kubectl -n litellm get secret litellm-masterkey -o jsonpath="{.data.masterkey}"
+kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.masterkey}"
 ```

 ## Admin UI Limitations
--- a/deploy/charts/litellm-helm/templates/NOTES.txt
+++ b/deploy/charts/litellm-helm/templates/NOTES.txt
--- a/deploy/charts/litellm-helm/templates/_helpers.tpl
+++ b/deploy/charts/litellm-helm/templates/_helpers.tpl
@ -41,14 +41,6 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- end }}
-{{- define "litellm.ui.labels" -}}
-helm.sh/chart: {{ include "litellm.chart" . }}
-{{ include "litellm.ui.selectorLabels" . }}
-{{- if .Chart.AppVersion }}
-app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
-{{- end }}
-app.kubernetes.io/managed-by: {{ .Release.Service }}
-{{- end }}

 {{/*
 Selector labels
@ -57,10 +49,6 @@ Selector labels
 app.kubernetes.io/name: {{ include "litellm.name" . }}
 app.kubernetes.io/instance: {{ .Release.Name }}
 {{- end }}
-{{- define "litellm.ui.selectorLabels" -}}
-app.kubernetes.io/name: {{ include "litellm.name" . }}-ui
-app.kubernetes.io/instance: {{ .Release.Name }}
-{{- end }}

 {{/*
 Create the name of the service account to use
--- a/deploy/charts/litellm-helm/templates/configmap-litellm.yaml
+++ b/deploy/charts/litellm-helm/templates/configmap-litellm.yaml
--- a/deploy/charts/litellm-helm/templates/deployment-proxy.yaml
+++ b/deploy/charts/litellm-helm/templates/deployment-proxy.yaml
@ -1,7 +1,7 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: {{ include "litellm.fullname" . }}-proxy
+  name: {{ include "litellm.fullname" . }}
  labels:
    {{- include "litellm.labels" . | nindent 4 }}
 spec:
@ -41,12 +41,12 @@ spec:
            - name: DATABASE_USERNAME
              valueFrom:
                secretKeyRef:
-                  name: {{ include "litellm.name" . }}-dbcredentials
+                  name: {{ include "litellm.fullname" . }}-dbcredentials
                  key: username
            - name: PGPASSWORD
              valueFrom:
                secretKeyRef:
-                  name: {{ include "litellm.name" . }}-dbcredentials
+                  name: {{ include "litellm.fullname" . }}-dbcredentials
                  key: password
            - name: DATABASE_HOST
              value: {{ .Release.Name }}-postgresql
@ -108,12 +108,12 @@ spec:
            - name: DATABASE_USERNAME
              valueFrom:
                secretKeyRef:
-                  name: {{ include "litellm.name" . }}-dbcredentials
+                  name: {{ include "litellm.fullname" . }}-dbcredentials
                  key: username
            - name: DATABASE_PASSWORD
              valueFrom:
                secretKeyRef:
-                  name: {{ include "litellm.name" . }}-dbcredentials
+                  name: {{ include "litellm.fullname" . }}-dbcredentials
                  key: password
            - name: DATABASE_HOST
              value: {{ .Release.Name }}-postgresql
@ -140,7 +140,7 @@ spec:
            - name: PROXY_MASTER_KEY
              valueFrom:
                secretKeyRef:
-                  name: {{ include "litellm.name" . }}-masterkey
+                  name: {{ include "litellm.fullname" . }}-masterkey
                  key: masterkey
          envFrom:
          {{- range .Values.environmentSecrets }}
@ -150,16 +150,7 @@ spec:
          args:
            - --config
            - /etc/litellm/config.yaml
-          # command: 
-          #   - bash
-          #   - -c
-          #   - |
-          #     ls -la /etc/litellm/; cat /etc/litellm/config.yaml; export
-          #     find / 2>/dev/null | grep -v -e '^/proc' -e '^/sys' -e '^/dev' >/tmp/before.list
-          #     prisma generate
-          #     find / 2>/dev/null | grep -v -e '^/proc' -e '^/sys' -e '^/dev' >/tmp/after.list
-          #     diff -ruN /tmp/before.list /tmp/after.list
-          #     sleep 3600
+            - --run_gunicorn
          ports:
            - name: http
              containerPort: {{ .Values.service.port }}
--- a/deploy/charts/litellm-helm/templates/hpa.yaml
+++ b/deploy/charts/litellm-helm/templates/hpa.yaml
--- a/deploy/charts/litellm-helm/templates/ingress-proxy.yaml
+++ b/deploy/charts/litellm-helm/templates/ingress-proxy.yaml
@ -1,5 +1,5 @@
 {{- if .Values.ingress.enabled -}}
-{{- $fullName := (printf "%s%s" (include "litellm.fullname" .) "-proxy") -}}
+{{- $fullName := include "litellm.fullname" . -}}
 {{- $svcPort := .Values.service.port -}}
 {{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
  {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
--- a/deploy/charts/litellm-helm/templates/secret-dbcredentials.yaml
+++ b/deploy/charts/litellm-helm/templates/secret-dbcredentials.yaml
@ -2,7 +2,7 @@
 apiVersion: v1
 kind: Secret
 metadata:
-  name: {{ include "litellm.name" . }}-dbcredentials
+  name: {{ include "litellm.fullname" . }}-dbcredentials
 data:
  # Password for the "postgres" user
  postgres-password: {{ ( index .Values.postgresql.auth "postgres-password") | default "litellm" | b64enc }}
--- a/deploy/charts/litellm-helm/templates/secret-masterkey.yaml
+++ b/deploy/charts/litellm-helm/templates/secret-masterkey.yaml
@ -2,7 +2,7 @@
 apiVersion: v1
 kind: Secret
 metadata:
-  name: {{ include "litellm.name" . }}-masterkey
+  name: {{ include "litellm.fullname" . }}-masterkey
 data:
  masterkey: {{ $masterkey | b64enc }}
 type: Opaque
--- a/deploy/charts/litellm-helm/templates/service-proxy.yaml
+++ b/deploy/charts/litellm-helm/templates/service-proxy.yaml
@ -1,7 +1,7 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: {{ include "litellm.fullname" . }}-proxy
+  name: {{ include "litellm.fullname" . }}
  labels:
    {{- include "litellm.labels" . | nindent 4 }}
 spec:
--- a/deploy/charts/litellm-helm/templates/serviceaccount.yaml
+++ b/deploy/charts/litellm-helm/templates/serviceaccount.yaml
--- a/deploy/charts/litellm-helm/templates/tests/test-connection.yaml
+++ b/deploy/charts/litellm-helm/templates/tests/test-connection.yaml
@ -11,5 +11,5 @@ spec:
    - name: wget
      image: busybox
      command: ['wget']
-      args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}']
+      args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}/health/readiness']
  restartPolicy: Never
--- a/deploy/charts/litellm-helm/values.yaml
+++ b/deploy/charts/litellm-helm/values.yaml
@ -5,7 +5,9 @@
 replicaCount: 1

 image:
-  repository: ghcr.io/berriai/litellm
+  # Use "ghcr.io/berriai/litellm-database" for optimized image with database
+  # Alternatively, use "ghcr.io/berriai/litellm" for the default image
+  repository: ghcr.io/berriai/litellm-database
  pullPolicy: IfNotPresent
  # Overrides the image tag whose default is the chart appVersion.
  # tag: "main-latest"
@ -56,7 +58,7 @@ service:
  port: 8000

 ingress:
-  enabled: true
+  enabled: false
  className: "nginx"
  annotations: {}
    # kubernetes.io/ingress.class: nginx
@ -71,6 +73,8 @@ ingress:
  #    hosts:
  #      - chart-example.local

+# masterkey: changeit
+
 # The elements within proxy_config are rendered as config.yaml for the proxy
 #  Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
 #  Reference: https://docs.litellm.ai/docs/proxy/configs
@ -159,61 +163,6 @@ postgresql:

    # A secret is created by this chart (litellm-helm) with the credentials that
    #  the new Postgres instance should use.
-    existingSecret: litellm-dbcredentials
-    secretKeys:
-      userPasswordKey: password
-
-ui:
-  enabled: true
-  replicaCount: 1
-  autoscaling:
-    enabled: false
-  image:
-    repository: ghcr.io/berriai/litellm-ui
-    pullPolicy: IfNotPresent
-    # Overrides the image tag whose default is the chart appVersion.
-    # tag: "main-latest"
-    # TODO: Switch to BerryAI repo and tags if/when they provide a ui image
-    # https://github.com/BerriAI/litellm/pull/1505
-    tag: ""
-  
-  service:
-    type: ClusterIP
-    port: 8501
-
-  ingress:
-    enabled: true
-    className: "nginx"
-    annotations: {}
-    hosts:
-      - host: ui.example.local
-        paths:
-          - path: /
-            pathType: ImplementationSpecific
-    tls: []
-
-  podAnnotations: {}
-  podLabels: {}
-
-  podSecurityContext:
-    fsGroup: 1000
-
-  securityContext:
-    capabilities:
-      drop:
-        - ALL
-    readOnlyRootFilesystem: true
-    runAsNonRoot: true
-    runAsUser: 1000
-
-  resources: {}
-
-  volumes: []
-
-  volumeMounts: []
-
-  nodeSelector: {}
-
-  tolerations: []
-
-  affinity: {}
+    # existingSecret: ""
+    # secretKeys:
+    #   userPasswordKey: password
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@ -16,6 +16,34 @@ response = completion(
 )
 ```

+## Specifying Safety Settings 
+In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
+
+```python
+response = completion(
+    model="gemini/gemini-pro", 
+    messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
+    safety_settings=[
+        {
+            "category": "HARM_CATEGORY_HARASSMENT",
+            "threshold": "BLOCK_NONE",
+        },
+        {
+            "category": "HARM_CATEGORY_HATE_SPEECH",
+            "threshold": "BLOCK_NONE",
+        },
+        {
+            "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+            "threshold": "BLOCK_NONE",
+        },
+        {
+            "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+            "threshold": "BLOCK_NONE",
+        },
+    ]
+)
+```
+
 # Gemini-Pro-Vision
 LiteLLM Supports the following image types passed in `url`
 - Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@ -538,17 +538,13 @@ model_list: # will route requests to the least busy ollama model
      api_base: "http://127.0.0.1:8003"
 ```

-## Max Parallel Requests

-To rate limit a user based on the number of parallel requests, e.g.: 
-if user's parallel requests > x, send a 429 error
-if user's parallel requests <= x, let them use the API freely.
-
-set the max parallel request limit on the config.yaml (note: this expects the user to be passing in an api key).
+## Configure DB Pool Limits + Connection Timeouts 

 ```yaml
-general_settings:
-  max_parallel_requests: 100 # max parallel requests for a user = 100
+general_settings: 
+  database_connection_pool_limit: 100 # sets connection pool for prisma client to postgres db at 100
+  database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db 
 ```

 ## All settings 
@ -577,6 +573,8 @@ general_settings:
    "key_management_system": "google_kms", # either google_kms or azure_kms
    "master_key": "string",
    "database_url": "string",
+    "database_connection_pool_limit": 0, # default 100
+    "database_connection_timeout": 0, # default 60s
    "database_type": "dynamo_db",
    "database_args": {
      "billing_mode": "PROVISIONED_THROUGHPUT",
--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@ -151,10 +151,54 @@ kubectl port-forward service/litellm-service 4000:4000

 Your OpenAI proxy server is now running on `http://0.0.0.0:4000`.

+</TabItem>
+<TabItem value="helm-deploy" label="Helm">
+
+### Step 1. Clone the repository
+
+```bash
+git clone https://github.com/BerriAI/litellm.git
+```
+
+### Step 2. Deploy with Helm
+
+```bash
+helm install \
+  --set masterkey=SuPeRsEcReT \
+  mydeploy \
+  deploy/charts/litellm
+```
+
+### Step 3. Expose the service to localhost
+
+```bash
+kubectl \
+  port-forward \
+  service/mydeploy-litellm \
+  8000:8000
+```
+
+Your OpenAI proxy server is now running on `http://127.0.0.1:8000`.
+
 </TabItem>
 </Tabs>

-## Setting SSL Certification 
+## Advanced Deployment Settings
+
+### Customization of the server root path
+
+:::info
+
+In a Kubernetes deployment, it's possible to utilize a shared DNS to host multiple applications by modifying the virtual service
+
+:::
+
+Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
+
+👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
+
+
+### Setting SSL Certification 

 Use this, If you need to set ssl certificates for your on prem litellm proxy

--- a/docs/my-website/docs/proxy/pii_masking.md
+++ b/docs/my-website/docs/proxy/pii_masking.md
@ -72,3 +72,78 @@ curl --location 'http://0.0.0.0:8000/key/generate' \
 ```


+## Turn on/off per request 
+
+The proxy support 2 request-level PII controls:
+
+- *no-pii*: Optional(bool) - Allow user to turn off pii masking per request.
+- *output_parse_pii*: Optional(bool) - Allow user to turn off pii output parsing per request.
+
+### Usage 
+
+**Step 1. Create key with pii permissions**
+
+Set `allow_pii_controls` to true for a given key. This will allow the user to set request-level PII controls.
+
+```bash
+curl --location 'http://0.0.0.0:8000/key/generate' \
+--header 'Authorization: Bearer my-master-key' \
+--header 'Content-Type: application/json' \
+--data '{
+    "permissions": {"allow_pii_controls": true}
+}'
+```
+
+**Step 2. Turn off pii output parsing**
+
+```python
+import os
+from openai import OpenAI
+
+client = OpenAI(
+    # This is the default and can be omitted
+    api_key=os.environ.get("OPENAI_API_KEY"),
+        base_url="http://0.0.0.0:8000"
+)
+
+chat_completion = client.chat.completions.create(
+    messages=[
+        {
+            "role": "user",
+            "content": "My name is Jane Doe, my number is 8382043839",
+        }
+    ],
+    model="gpt-3.5-turbo",
+    extra_body={
+        "content_safety": {"output_parse_pii": False} 
+    }
+)
+```
+
+**Step 3: See response**
+
+```
+{
+  "id": "chatcmpl-8c5qbGTILZa1S4CK3b31yj5N40hFN",
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "message": {
+        "content": "Hi [PERSON], what can I help you with?",
+        "role": "assistant"
+      }
+    }
+  ],
+  "created": 1704089632,
+  "model": "gpt-35-turbo",
+  "object": "chat.completion",
+  "system_fingerprint": null,
+  "usage": {
+    "completion_tokens": 47,
+    "prompt_tokens": 12,
+    "total_tokens": 59
+  },
+  "_response_ms": 1753.426
+}
+```
--- a/docs/my-website/docs/proxy/virtual_keys.md
+++ b/docs/my-website/docs/proxy/virtual_keys.md
@ -93,6 +93,7 @@ Request Params:
 - `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
 - `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
 - `max_budget`: *Optional[float]* - Specify max budget for a given key.
+- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
 - `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
 - `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }

@ -676,8 +677,6 @@ general_settings:

 ### [BETA] Dynamo DB 

-Only live in `v1.16.21.dev1`. 
-
 #### Step 1. Save keys to env

 ```shell
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -129,7 +129,7 @@ const sidebars = {
        "proxy/caching",
        {
          "type": "category",
-          "label": "Logging, Alerting, Caching",
+          "label": "Logging, Alerting",
          "items": [
            "proxy/logging", 
            "proxy/alerting",
--- a/enterprise/README.md
+++ b/enterprise/README.md
@ -6,9 +6,4 @@ Code in this folder is licensed under a commercial license. Please review the [L

 👉 **Using in an Enterprise / Need specific features ?** Meet with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat?month=2024-02)

-## Enterprise Features:
-
- Track, View spend per tag https://docs.litellm.ai/docs/proxy/spend
- Custom API / microservice callbacks
- Google Text Moderation API 
-
+See all Enterprise Features here 👉 [Docs](https://docs.litellm.ai/docs/proxy/enterprise)
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@ -110,7 +110,6 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
            -1
        ]  # get the last response - llama guard has a 4k token limit
        self.set_custom_prompt_template(messages=[safety_check_messages])
-        # print(f"self.model: {self.model}")
        response = await litellm.acompletion(
            model=self.model,
            messages=[safety_check_messages],
--- a/litellm/llms/gemini.py
+++ b/litellm/llms/gemini.py
@ -121,6 +121,13 @@ def completion(
    ## Load Config
    inference_params = copy.deepcopy(optional_params)
    stream = inference_params.pop("stream", None)
+
+    # Handle safety settings
+    safety_settings_param = inference_params.pop("safety_settings", None)
+    safety_settings = None
+    if safety_settings_param:
+        safety_settings = [genai.types.SafetySettingDict(x) for x in safety_settings_param]
+
    config = litellm.GeminiConfig.get_config()
    for k, v in config.items():
        if (
@ -141,11 +148,13 @@ def completion(
            response = _model.generate_content(
                contents=prompt,
                generation_config=genai.types.GenerationConfig(**inference_params),
+                safety_settings=safety_settings,
            )
        else:
            response = _model.generate_content(
                contents=prompt,
                generation_config=genai.types.GenerationConfig(**inference_params),
+                safety_settings=safety_settings,
                stream=True,
            )
            return response
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/S_8LZOnl2nyURq-NYnh2p/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/S_8LZOnl2nyURq-NYnh2p/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/S_8LZOnl2nyURq-NYnh2p/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/S_8LZOnl2nyURq-NYnh2p/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-2322bcdc2ec71284.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-2322bcdc2ec71284.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-7bb820bd6902dbf2.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-7bb820bd6902dbf2.js
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-2322bcdc2ec71284.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S_8LZOnl2nyURq-NYnh2p\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
+3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-2322bcdc2ec71284.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["S_8LZOnl2nyURq-NYnh2p",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -155,6 +155,9 @@ class GenerateKeyRequest(GenerateRequestBase):
    aliases: Optional[dict] = {}
    config: Optional[dict] = {}
    permissions: Optional[dict] = {}
+    model_max_budget: Optional[dict] = (
+        {}
+    )  # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}


 class GenerateKeyResponse(GenerateKeyRequest):
@ -167,7 +170,13 @@ class GenerateKeyResponse(GenerateKeyRequest):
    def set_model_info(cls, values):
        if values.get("token") is not None:
            values.update({"key": values.get("token")})
-        dict_fields = ["metadata", "aliases", "config", "permissions"]
+        dict_fields = [
+            "metadata",
+            "aliases",
+            "config",
+            "permissions",
+            "model_max_budget",
+        ]
        for field in dict_fields:
            value = values.get(field)
            if value is not None and isinstance(value, str):
@ -302,6 +311,13 @@ class ConfigGeneralSettings(LiteLLMBase):
        None,
        description="connect to a postgres db - needed for generating temporary keys + tracking spend / key",
    )
+    database_connection_pool_limit: Optional[int] = Field(
+        100,
+        description="default connection pool for prisma client connecting to postgres db",
+    )
+    database_connection_timeout: Optional[float] = Field(
+        60, description="default timeout for a connection to the database"
+    )
    database_type: Optional[Literal["dynamo_db"]] = Field(
        None, description="to use dynamodb instead of postgres db"
    )
@ -383,6 +399,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
    budget_reset_at: Optional[datetime] = None
    allowed_cache_controls: Optional[list] = []
    permissions: Dict = {}
+    model_spend: Dict = {}
+    model_max_budget: Dict = {}


 class UserAPIKeyAuth(
@ -410,6 +428,8 @@ class LiteLLM_UserTable(LiteLLMBase):
    user_id: str
    max_budget: Optional[float]
    spend: float = 0.0
+    model_max_budget: Optional[Dict] = {}
+    model_spend: Optional[Dict] = {}
    user_email: Optional[str]
    models: list = []

--- a/litellm/proxy/db/dynamo_db.py
+++ b/litellm/proxy/db/dynamo_db.py
@ -287,6 +287,8 @@ class DynamoDBWrapper(CustomDB):
                            or k == "config"
                            or k == "metadata"
                            or k == "permissions"
+                            or k == "model_spend"
+                            or k == "model_max_budget"
                        )
                        and v is not None
                        and isinstance(v, str)
--- a/litellm/proxy/hooks/presidio_pii_masking.py
+++ b/litellm/proxy/hooks/presidio_pii_masking.py
@ -119,6 +119,9 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
        call_type: str,
    ):
        """
+        - Check if request turned off pii
+            - Check if user allowed to turn off pii (key permissions -> 'allow_pii_controls')
+
        - Take the request data
        - Call /analyze -> get the results
        - Call /anonymize w/ the analyze results -> get the redacted text
@ -126,13 +129,59 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
        For multiple messages in /chat/completions, we'll need to call them in parallel.
        """
        permissions = user_api_key_dict.permissions
-
-        if permissions.get("pii", True) == False:  # allow key to turn off pii masking
-            return data
-
        output_parse_pii = permissions.get(
            "output_parse_pii", litellm.output_parse_pii
        )  # allow key to turn on/off output parsing for pii
+        no_pii = permissions.get(
+            "no-pii", None
+        )  # allow key to turn on/off pii masking (if user is allowed to set pii controls, then they can override the key defaults)
+
+        if no_pii is None:
+            # check older way of turning on/off pii
+            no_pii = not permissions.get("pii", True)
+
+        content_safety = data.get("content_safety", None)
+        verbose_proxy_logger.debug(f"content_safety: {content_safety}")
+        ## Request-level turn on/off PII controls ##
+        if content_safety is not None and isinstance(content_safety, dict):
+            # pii masking ##
+            if (
+                content_safety.get("no-pii", None) is not None
+                and content_safety.get("no-pii") == True
+            ):
+                # check if user allowed to turn this off
+                if permissions.get("allow_pii_controls", False) == False:
+                    raise HTTPException(
+                        status_code=400,
+                        detail={"error": "Not allowed to set PII controls per request"},
+                    )
+                else:  # user allowed to turn off pii masking
+                    no_pii = content_safety.get("no-pii")
+                    if not isinstance(no_pii, bool):
+                        raise HTTPException(
+                            status_code=400,
+                            detail={"error": "no_pii needs to be a boolean value"},
+                        )
+            ## pii output parsing ##
+            if content_safety.get("output_parse_pii", None) is not None:
+                # check if user allowed to turn this off
+                if permissions.get("allow_pii_controls", False) == False:
+                    raise HTTPException(
+                        status_code=400,
+                        detail={"error": "Not allowed to set PII controls per request"},
+                    )
+                else:  # user allowed to turn on/off pii output parsing
+                    output_parse_pii = content_safety.get("output_parse_pii")
+                    if not isinstance(output_parse_pii, bool):
+                        raise HTTPException(
+                            status_code=400,
+                            detail={
+                                "error": "output_parse_pii needs to be a boolean value"
+                            },
+                        )
+
+        if no_pii == False:  # turn off pii masking
+            return data

        if call_type == "completion":  # /chat/completions requests
            messages = data["messages"]
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -409,6 +409,8 @@ def run_server(
                "uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`"
            )

+        db_connection_pool_limit = 100
+        db_connection_timeout = 60
        if config is not None:
            """
            Allow user to pass in db url via config
@ -427,6 +429,12 @@ def run_server(
                proxy_config.load_config(router=None, config_file_path=config)
            )
            database_url = general_settings.get("database_url", None)
+            db_connection_pool_limit = general_settings.get(
+                "database_connection_pool_limit", 100
+            )
+            db_connection_timeout = general_settings.get(
+                "database_connection_timeout", 60
+            )
            if database_url and database_url.startswith("os.environ/"):
                original_dir = os.getcwd()
                # set the working directory to where this script is
@ -447,14 +455,19 @@ def run_server(
            try:
                if os.getenv("DATABASE_URL", None) is not None:
                    ### add connection pool + pool timeout args
-                    params = {"connection_limit": 100, "pool_timeout": 60}
+                    params = {
+                        "connection_limit": db_connection_pool_limit,
+                        "pool_timeout": db_connection_timeout,
+                    }
                    database_url = os.getenv("DATABASE_URL")
                    modified_url = append_query_params(database_url, params)
                    os.environ["DATABASE_URL"] = modified_url
-                    ###
                if os.getenv("DIRECT_URL", None) is not None:
                    ### add connection pool + pool timeout args
-                    params = {"connection_limit": 100, "pool_timeout": 60}
+                    params = {
+                        "connection_limit": db_connection_pool_limit,
+                        "pool_timeout": db_connection_timeout,
+                    }
                    database_url = os.getenv("DIRECT_URL")
                    modified_url = append_query_params(database_url, params)
                    os.environ["DIRECT_URL"] = modified_url
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -93,6 +93,7 @@ from litellm.proxy.utils import (
    html_form,
    _read_request_body,
    _is_valid_team_configs,
+    _is_user_proxy_admin,
 )
 from litellm.proxy.secret_managers.google_kms import load_google_kms
 import pydantic
@ -143,6 +144,9 @@ app = FastAPI(
    title="LiteLLM API",
    description=f"Proxy Server to call 100+ LLMs in the OpenAI format\n\n{ui_message}",
    version=version,
+    root_path=os.environ.get(
+        "SERVER_ROOT_PATH", ""
+    ),  # check if user passed root path, FastAPI defaults this value to ""
 )


@ -376,6 +380,11 @@ async def user_api_key_auth(
            # 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
            # 4. If token is expired
            # 5. If token spend is under Budget for the token
+            # 6. If token spend per model is under budget per model
+
+            request_data = await _read_request_body(
+                request=request
+            )  # request data, used across all checks. Making this easily available

            # Check 1. If token can call model
            litellm.model_alias_map = valid_token.aliases
@ -450,7 +459,6 @@ async def user_api_key_auth(
                if (
                    litellm.max_user_budget is not None
                ):  # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
-                    request_data = await _read_request_body(request=request)
                    user_passed_to_chat_completions = request_data.get("user", None)
                    if user_passed_to_chat_completions is not None:
                        user_id_list.append(user_passed_to_chat_completions)
@ -496,11 +504,7 @@ async def user_api_key_auth(
                                continue
                            assert isinstance(_user, dict)
                            # check if user is admin #
-                            if (
-                                _user.get("user_role", None) is not None
-                                and _user.get("user_role") == "proxy_admin"
-                            ):
-                                return UserAPIKeyAuth(api_key=master_key)
+
                            # Token exists, not expired now check if its in budget for the user
                            user_max_budget = _user.get("max_budget", None)
                            user_current_spend = _user.get("spend", None)
@ -587,6 +591,25 @@ async def user_api_key_auth(
                        f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
                    )

+            # Check 5. Token Model Spend is under Model budget
+            max_budget_per_model = valid_token.model_max_budget
+            spend_per_model = valid_token.model_spend
+
+            if max_budget_per_model is not None and spend_per_model is not None:
+                current_model = request_data.get("model")
+                if current_model is not None:
+                    current_model_spend = spend_per_model.get(current_model, None)
+                    current_model_budget = max_budget_per_model.get(current_model, None)
+
+                    if (
+                        current_model_spend is not None
+                        and current_model_budget is not None
+                    ):
+                        if current_model_spend > current_model_budget:
+                            raise Exception(
+                                f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
+                            )
+
            # Token passed all checks
            api_key = valid_token.token

@ -616,11 +639,15 @@ async def user_api_key_auth(
                    )
                )
            if (
-                route.startswith("/key/")
-                or route.startswith("/user/")
-                or route.startswith("/model/")
-                or route.startswith("/spend/")
-            ) and (not is_master_key_valid):
+                (
+                    route.startswith("/key/")
+                    or route.startswith("/user/")
+                    or route.startswith("/model/")
+                    or route.startswith("/spend/")
+                )
+                and (not is_master_key_valid)
+                and (not _is_user_proxy_admin(user_id_information))
+            ):
                allow_user_auth = False
                if (
                    general_settings.get("allow_user_auth", False) == True
@ -712,9 +739,12 @@ async def user_api_key_auth(
                # Do something if the current route starts with any of the allowed routes
                pass
            else:
-                raise Exception(
-                    f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
-                )
+                if _is_user_proxy_admin(user_id_information):
+                    pass
+                else:
+                    raise Exception(
+                        f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
+                    )
        return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
    except Exception as e:
        # verbose_proxy_logger.debug(f"An exception occurred - {traceback.format_exc()}")
@ -937,13 +967,26 @@ async def update_database(
                    # Calculate the new cost by adding the existing cost and response_cost
                    existing_spend_obj.spend = existing_spend + response_cost

+                    # track cost per model, for the given user
+                    spend_per_model = existing_spend_obj.model_spend or {}
+                    current_model = kwargs.get("model")
+
+                    if current_model is not None and spend_per_model is not None:
+                        if spend_per_model.get(current_model) is None:
+                            spend_per_model[current_model] = response_cost
+                        else:
+                            spend_per_model[current_model] += response_cost
+                    existing_spend_obj.model_spend = spend_per_model
+
                    valid_token = user_api_key_cache.get_cache(key=id)
                    if valid_token is not None and isinstance(valid_token, dict):
                        user_api_key_cache.set_cache(
                            key=id, value=existing_spend_obj.json()
                        )

-                    verbose_proxy_logger.debug(f"new cost: {existing_spend_obj.spend}")
+                    verbose_proxy_logger.debug(
+                        f"user - new cost: {existing_spend_obj.spend}, user_id: {id}"
+                    )
                    data_list.append(existing_spend_obj)

                    # Update the cost column for the given user id
@ -980,15 +1023,28 @@ async def update_database(
                    # Calculate the new cost by adding the existing cost and response_cost
                    new_spend = existing_spend + response_cost

-                    verbose_proxy_logger.debug(f"new cost: {new_spend}")
+                    # track cost per model, for the given key
+                    spend_per_model = existing_spend_obj.model_spend or {}
+                    current_model = kwargs.get("model")
+                    if current_model is not None and spend_per_model is not None:
+                        if spend_per_model.get(current_model) is None:
+                            spend_per_model[current_model] = response_cost
+                        else:
+                            spend_per_model[current_model] += response_cost
+
+                    verbose_proxy_logger.debug(
+                        f"new cost: {new_spend}, new spend per model: {spend_per_model}"
+                    )
                    # Update the cost column for the given token
                    await prisma_client.update_data(
-                        token=token, data={"spend": new_spend}
+                        token=token,
+                        data={"spend": new_spend, "model_spend": spend_per_model},
                    )

                    valid_token = user_api_key_cache.get_cache(key=token)
                    if valid_token is not None:
                        valid_token.spend = new_spend
+                        valid_token.model_spend = spend_per_model
                        user_api_key_cache.set_cache(key=token, value=valid_token)
                elif custom_db_client is not None:
                    # Fetch the existing cost for the given token
@ -1068,10 +1124,21 @@ async def update_database(
                    # Calculate the new cost by adding the existing cost and response_cost
                    new_spend = existing_spend + response_cost

+                    # track cost per model, for the given team
+                    spend_per_model = existing_spend_obj.model_spend or {}
+                    current_model = kwargs.get("model")
+                    if current_model is not None and spend_per_model is not None:
+                        if spend_per_model.get(current_model) is None:
+                            spend_per_model[current_model] = response_cost
+                        else:
+                            spend_per_model[current_model] += response_cost
+
                    verbose_proxy_logger.debug(f"new cost: {new_spend}")
                    # Update the cost column for the given token
                    await prisma_client.update_data(
-                        team_id=team_id, data={"spend": new_spend}, table_name="team"
+                        team_id=team_id,
+                        data={"spend": new_spend, "model_spend": spend_per_model},
+                        table_name="team",
                    )

                elif custom_db_client is not None:
@ -1645,6 +1712,7 @@ async def generate_key_helper_fn(
    key_alias: Optional[str] = None,
    allowed_cache_controls: Optional[list] = [],
    permissions: Optional[dict] = {},
+    model_max_budget: Optional[dict] = {},
 ):
    global prisma_client, custom_db_client, user_api_key_cache

@ -1678,6 +1746,8 @@ async def generate_key_helper_fn(
    config_json = json.dumps(config)
    permissions_json = json.dumps(permissions)
    metadata_json = json.dumps(metadata)
+    model_max_budget_json = json.dumps(model_max_budget)
+
    user_id = user_id or str(uuid.uuid4())
    user_role = user_role or "app_user"
    tpm_limit = tpm_limit
@ -1720,6 +1790,7 @@ async def generate_key_helper_fn(
            "budget_reset_at": key_reset_at,
            "allowed_cache_controls": allowed_cache_controls,
            "permissions": permissions_json,
+            "model_max_budget": model_max_budget_json,
        }
        if (
            general_settings.get("allow_user_auth", False) == True
@ -1735,6 +1806,11 @@ async def generate_key_helper_fn(
            saved_token["metadata"] = json.loads(saved_token["metadata"])
        if isinstance(saved_token["permissions"], str):
            saved_token["permissions"] = json.loads(saved_token["permissions"])
+        if isinstance(saved_token["model_max_budget"], str):
+            saved_token["model_max_budget"] = json.loads(
+                saved_token["model_max_budget"]
+            )
+
        if saved_token.get("expires", None) is not None and isinstance(
            saved_token["expires"], datetime
        ):
@ -3078,6 +3154,20 @@ async def generate_key_fn(
    - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
    - metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
    - permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
+    - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
+
+    Examples: 
+
+    1. Allow users to turn on/off pii masking
+
+    ```bash
+    curl --location 'http://0.0.0.0:8000/key/generate' \
+        --header 'Authorization: Bearer sk-1234' \
+        --header 'Content-Type: application/json' \
+        --data '{
+            "permissions": {"allow_pii_controls": true}
+    }'
+    ```

    Returns:
    - key: (str) The generated api key
@ -4871,7 +4961,7 @@ async def auth_callback(request: Request):
    if user_id is None:
        user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")
    response = await generate_key_helper_fn(
-        **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email}  # type: ignore
+        **{"duration": "1hr", "key_max_budget": 0.01, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email}  # type: ignore
    )
    key = response["token"]  # type: ignore
    user_id = response["user_id"]  # type: ignore
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@ -24,6 +24,8 @@ model LiteLLM_TeamTable {
    budget_reset_at DateTime?
    created_at    DateTime               @default(now()) @map("created_at")
    updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
 }

 // Track spend, rate limit, budget Users
@ -41,6 +43,8 @@ model LiteLLM_UserTable {
    budget_duration String? 
    budget_reset_at DateTime?
    allowed_cache_controls String[] @default([])
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
 }

 // Generate Tokens for Proxy
@ -64,6 +68,8 @@ model LiteLLM_VerificationToken {
    budget_duration String? 
    budget_reset_at DateTime?
    allowed_cache_controls String[] @default([])
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
 }

 // store proxy config.yaml
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -1379,19 +1379,22 @@ async def _read_request_body(request):
    """
    import ast, json

-    request_data = {}
-    if request is None:
-        return request_data
-    body = await request.body()
-
-    if body == b"" or body is None:
-        return request_data
-    body_str = body.decode()
    try:
-        request_data = ast.literal_eval(body_str)
+        request_data = {}
+        if request is None:
+            return request_data
+        body = await request.body()
+
+        if body == b"" or body is None:
+            return request_data
+        body_str = body.decode()
+        try:
+            request_data = ast.literal_eval(body_str)
+        except:
+            request_data = json.loads(body_str)
+        return request_data
    except:
-        request_data = json.loads(body_str)
-    return request_data
+        return {}


 def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
@ -1408,6 +1411,22 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
    return


+def _is_user_proxy_admin(user_id_information=None):
+    if (
+        user_id_information == None
+        or len(user_id_information) == 0
+        or user_id_information[0] == None
+    ):
+        return False
+    _user = user_id_information[0]
+    if (
+        _user.get("user_role", None) is not None
+        and _user.get("user_role") == "proxy_admin"
+    ):
+        return True
+    return False
+
+
 # LiteLLM Admin UI - Non SSO Login
 html_form = """
 <!DOCTYPE html>
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -1912,7 +1912,7 @@ def test_mistral_anyscale_stream():
 # test_baseten_wizardLMcompletion_withbase()

 # def test_baseten_mosaic_ML_completion_withbase():
-#     model_name = "31dxrj3"
+#     model_name = "31dxrj3",
 #     litellm.api_base = "https://app.baseten.co"
 #     try:
 #         response = completion(model=model_name, messages=messages)
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@ -1101,6 +1101,116 @@ def test_call_with_key_over_budget(prisma_client):
        print(vars(e))


+def test_call_with_key_over_model_budget(prisma_client):
+    # 12. Make a call with a key over budget, expect to fail
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    try:
+
+        async def test():
+            await litellm.proxy.proxy_server.prisma_client.connect()
+
+            # set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
+            request = GenerateKeyRequest(
+                max_budget=1000,
+                model_max_budget={
+                    "chatgpt-v-2": 0.000001,
+                },
+                metadata={"user_api_key": 0.0001},
+            )
+            key = await generate_key_fn(request)
+            print(key)
+
+            generated_key = key.key
+            user_id = key.user_id
+            bearer_token = "Bearer " + generated_key
+
+            request = Request(scope={"type": "http"})
+            request._url = URL(url="/chat/completions")
+
+            async def return_body():
+                return b'{"model": "chatgpt-v-2"}'
+
+            request.body = return_body
+
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+
+            # update spend using track_cost callback, make 2nd request, it should fail
+            from litellm.proxy.proxy_server import (
+                _PROXY_track_cost_callback as track_cost_callback,
+            )
+            from litellm import ModelResponse, Choices, Message, Usage
+            from litellm.caching import Cache
+
+            litellm.cache = Cache()
+            import time
+
+            request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
+
+            resp = ModelResponse(
+                id=request_id,
+                choices=[
+                    Choices(
+                        finish_reason=None,
+                        index=0,
+                        message=Message(
+                            content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
+                            role="assistant",
+                        ),
+                    )
+                ],
+                model="gpt-35-turbo",  # azure always has model written like this
+                usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
+            )
+            await track_cost_callback(
+                kwargs={
+                    "model": "chatgpt-v-2",
+                    "stream": False,
+                    "litellm_params": {
+                        "metadata": {
+                            "user_api_key": hash_token(generated_key),
+                            "user_api_key_user_id": user_id,
+                        }
+                    },
+                    "response_cost": 0.00002,
+                },
+                completion_response=resp,
+                start_time=datetime.now(),
+                end_time=datetime.now(),
+            )
+            await asyncio.sleep(10)
+            # test spend_log was written and we can read it
+            spend_logs = await view_spend_logs(request_id=request_id)
+
+            print("read spend logs", spend_logs)
+            assert len(spend_logs) == 1
+
+            spend_log = spend_logs[0]
+
+            assert spend_log.request_id == request_id
+            assert spend_log.spend == float("2e-05")
+            assert spend_log.model == "chatgpt-v-2"
+            assert (
+                spend_log.cache_key
+                == "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
+            )
+
+            # use generated key to auth in
+            result = await user_api_key_auth(request=request, api_key=bearer_token)
+            print("result from user auth with new key", result)
+            pytest.fail(f"This should have failed!. They key crossed it's budget")
+
+        asyncio.run(test())
+    except Exception as e:
+        # print(f"Error - {str(e)}")
+        traceback.print_exc()
+        error_detail = e.message
+        assert "Authentication Error, ExceededModelBudget:" in error_detail
+        print(vars(e))
+
+
@pytest.mark.asyncio()
 async def test_call_with_key_never_over_budget(prisma_client):
    # Make a call with a key with budget=None, it should never fail
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -89,7 +89,6 @@ from .exceptions import (
    UnprocessableEntityError,
 )

-verbose_logger.debug(f"sys.path: {sys.path}")
 try:
    from .proxy.enterprise.enterprise_callbacks.generic_api_callback import (
        GenericAPILogger,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,11 +1,16 @@
 [tool.poetry]
 name = "litellm"
-version = "1.25.0"
+version = "1.25.2"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
 readme = "README.md"

+[tool.poetry.urls]
+homepage = "https://litellm.ai"
+repository = "https://github.com/BerriAI/litellm"
+documentation = "https://docs.litellm.ai"
+
 [tool.poetry.dependencies]
 python = ">=3.8.1,<4.0, !=3.9.7"
 openai = ">=1.0.0"
@ -69,7 +74,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.25.0"
+version = "1.25.2"
 version_files = [
    "pyproject.toml:^version"
 ]
--- a/schema.prisma
+++ b/schema.prisma
@ -24,6 +24,8 @@ model LiteLLM_TeamTable {
    budget_reset_at DateTime?
    created_at    DateTime               @default(now()) @map("created_at")
    updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
 }

 // Track spend, rate limit, budget Users
@ -41,6 +43,8 @@ model LiteLLM_UserTable {
    budget_duration String? 
    budget_reset_at DateTime?
    allowed_cache_controls String[] @default([])
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
 }

 // Generate Tokens for Proxy
@ -64,6 +68,8 @@ model LiteLLM_VerificationToken {
    budget_duration String? 
    budget_reset_at DateTime?
    allowed_cache_controls String[] @default([])
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
 }

 // store proxy config.yaml
--- a/ui/litellm-dashboard/out/404.html
+++ b/ui/litellm-dashboard/out/404.html
--- a/ui/litellm-dashboard/out/_next/static/S_8LZOnl2nyURq-NYnh2p/_buildManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/S_8LZOnl2nyURq-NYnh2p/_buildManifest.js
--- a/ui/litellm-dashboard/out/_next/static/S_8LZOnl2nyURq-NYnh2p/_ssgManifest.js
+++ b/ui/litellm-dashboard/out/_next/static/S_8LZOnl2nyURq-NYnh2p/_ssgManifest.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-2322bcdc2ec71284.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-2322bcdc2ec71284.js
--- a/ui/litellm-dashboard/out/_next/static/chunks/app/page-7bb820bd6902dbf2.js
+++ b/ui/litellm-dashboard/out/_next/static/chunks/app/page-7bb820bd6902dbf2.js
--- a/ui/litellm-dashboard/out/index.html
+++ b/ui/litellm-dashboard/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-2322bcdc2ec71284.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S_8LZOnl2nyURq-NYnh2p\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/ui/litellm-dashboard/out/index.txt
+++ b/ui/litellm-dashboard/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
+3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-2322bcdc2ec71284.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["S_8LZOnl2nyURq-NYnh2p",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/ui/litellm-dashboard/src/components/networking.tsx
+++ b/ui/litellm-dashboard/src/components/networking.tsx
@ -239,7 +239,7 @@ export const userSpendLogsCall = async (

 export const keyInfoCall = async (accessToken: String, keys: String[]) => {
  try {
-    let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/key/info`;
+    let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;

    const response = await fetch(url, {
      method: "POST",
--- a/ui/litellm-dashboard/src/components/user_dashboard.tsx
+++ b/ui/litellm-dashboard/src/components/user_dashboard.tsx
@ -49,6 +49,14 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
  const [accessToken, setAccessToken] = useState<string | null>(null);
  const [userModels, setUserModels] = useState<string[]>([]);

+  // check if window is not undefined
+  if (typeof window !== "undefined") {
+    window.addEventListener('beforeunload', function() {
+      // Clear session storage
+      sessionStorage.clear();
+    });
+  }
+
  function formatUserRole(userRole: string) {
    if (!userRole) {
      return "Undefined Role";
@ -70,6 +78,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({

  // Moved useEffect inside the component and used a condition to run fetch only if the params are available
  useEffect(() => {
+
    if (token) {
      const decoded = jwtDecode(token) as { [key: string]: any };
      if (decoded) {
@ -97,22 +106,22 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
      }
    }
    if (userID && accessToken && userRole && !data) {
-      const cachedData = localStorage.getItem("userData" + userID);
-      const cachedSpendData = localStorage.getItem("userSpendData" + userID);
-      const cachedUserModels = localStorage.getItem("userModels" + userID);
+      const cachedData = sessionStorage.getItem("userData" + userID);
+      const cachedSpendData = sessionStorage.getItem("userSpendData" + userID);
+      const cachedUserModels = sessionStorage.getItem("userModels" + userID);
      if (cachedData && cachedSpendData && cachedUserModels) {
        setData(JSON.parse(cachedData));
        setUserSpendData(JSON.parse(cachedSpendData));
        setUserModels(JSON.parse(cachedUserModels));
-
+  
      } else {
        const fetchData = async () => {
          try {
            const response = await userInfoCall(accessToken, userID, userRole);
            setUserSpendData(response["user_info"]);
            setData(response["keys"]); // Assuming this is the correct path to your data
-            localStorage.setItem("userData" + userID, JSON.stringify(response["keys"]));
-            localStorage.setItem(
+            sessionStorage.setItem("userData" + userID, JSON.stringify(response["keys"]));
+            sessionStorage.setItem(
              "userSpendData" + userID,
              JSON.stringify(response["user_info"])
            );
@ -126,7 +135,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({

            console.log("userModels:", userModels);

-            localStorage.setItem("userModels" + userID, JSON.stringify(available_model_names));
+            sessionStorage.setItem("userModels" + userID, JSON.stringify(available_model_names));