forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_llamaguard_custom_categories
This commit is contained in:
commit
038ba426ab
57 changed files with 585 additions and 364 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -40,7 +40,7 @@ ui/litellm-dashboard/node_modules
|
||||||
ui/litellm-dashboard/next-env.d.ts
|
ui/litellm-dashboard/next-env.d.ts
|
||||||
ui/litellm-dashboard/package.json
|
ui/litellm-dashboard/package.json
|
||||||
ui/litellm-dashboard/package-lock.json
|
ui/litellm-dashboard/package-lock.json
|
||||||
deploy/charts/litellm-helm/*.tgz
|
deploy/charts/litellm/*.tgz
|
||||||
deploy/charts/litellm-helm/charts/*
|
deploy/charts/litellm/charts/*
|
||||||
deploy/charts/*.tgz
|
deploy/charts/*.tgz
|
||||||
litellm/proxy/vertex_key.json
|
litellm/proxy/vertex_key.json
|
||||||
|
|
|
@ -1,89 +0,0 @@
|
||||||
{{- if .Values.ui.enabled -}}
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: {{ include "litellm.fullname" . }}-ui
|
|
||||||
labels:
|
|
||||||
{{- include "litellm.labels" . | nindent 4 }}
|
|
||||||
spec:
|
|
||||||
{{- if not .Values.ui.autoscaling.enabled }}
|
|
||||||
replicas: {{ .Values.ui.replicaCount }}
|
|
||||||
{{- end }}
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
{{- include "litellm.ui.selectorLabels" . | nindent 6 }}
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
{{- with .Values.podAnnotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
labels:
|
|
||||||
{{- include "litellm.ui.labels" . | nindent 8 }}
|
|
||||||
{{- with .Values.ui.podLabels }}
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
{{- with .Values.imagePullSecrets }}
|
|
||||||
imagePullSecrets:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
serviceAccountName: {{ include "litellm.serviceAccountName" . }}
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.ui.podSecurityContext | nindent 8 }}
|
|
||||||
containers:
|
|
||||||
- name: {{ include "litellm.name" . }}-ui
|
|
||||||
securityContext:
|
|
||||||
{{- toYaml .Values.ui.securityContext | nindent 12 }}
|
|
||||||
image: "{{ .Values.ui.image.repository }}:{{ .Values.ui.image.tag | default (printf "main-%s" .Chart.AppVersion) }}"
|
|
||||||
imagePullPolicy: {{ .Values.ui.image.pullPolicy }}
|
|
||||||
env:
|
|
||||||
- name: BASE_URL
|
|
||||||
value: {{ (index .Values.ui.ingress.hosts 0).host | default "example.com" }}
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
containerPort: {{ .Values.ui.service.port }}
|
|
||||||
protocol: TCP
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /
|
|
||||||
port: http
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /
|
|
||||||
port: http
|
|
||||||
# Give the container time to start up. Up to 5 minutes (10 * 30 seconds)
|
|
||||||
startupProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /
|
|
||||||
port: http
|
|
||||||
failureThreshold: 30
|
|
||||||
periodSeconds: 10
|
|
||||||
resources:
|
|
||||||
{{- toYaml .Values.ui.resources | nindent 12 }}
|
|
||||||
volumeMounts:
|
|
||||||
- name: tmp
|
|
||||||
mountPath: /tmp
|
|
||||||
{{- with .Values.ui.volumeMounts }}
|
|
||||||
{{- toYaml . | nindent 12 }}
|
|
||||||
{{- end }}
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
emptyDir:
|
|
||||||
sizeLimit: 500Mi
|
|
||||||
{{- with .Values.ui.volumes }}
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.ui.nodeSelector }}
|
|
||||||
nodeSelector:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.ui.affinity }}
|
|
||||||
affinity:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- with .Values.ui.tolerations }}
|
|
||||||
tolerations:
|
|
||||||
{{- toYaml . | nindent 8 }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end -}}
|
|
|
@ -1,61 +0,0 @@
|
||||||
{{- if .Values.ui.ingress.enabled -}}
|
|
||||||
{{- $fullName := (printf "%s%s" (include "litellm.fullname" .) "-ui") -}}
|
|
||||||
{{- $svcPort := .Values.ui.service.port -}}
|
|
||||||
{{- if and .Values.ui.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
|
||||||
{{- if not (hasKey .Values.ui.ingress.annotations "kubernetes.io/ingress.class") }}
|
|
||||||
{{- $_ := set .Values.ui.ingress.annotations "kubernetes.io/ingress.class" .Values.ui.ingress.className}}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
|
||||||
apiVersion: networking.k8s.io/v1beta1
|
|
||||||
{{- else -}}
|
|
||||||
apiVersion: extensions/v1beta1
|
|
||||||
{{- end }}
|
|
||||||
kind: Ingress
|
|
||||||
metadata:
|
|
||||||
name: {{ $fullName }}
|
|
||||||
labels:
|
|
||||||
{{- include "litellm.ui.labels" . | nindent 4 }}
|
|
||||||
{{- with .Values.ui.ingress.annotations }}
|
|
||||||
annotations:
|
|
||||||
{{- toYaml . | nindent 4 }}
|
|
||||||
{{- end }}
|
|
||||||
spec:
|
|
||||||
{{- if and .Values.ui.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
|
|
||||||
ingressClassName: {{ .Values.ui.ingress.className }}
|
|
||||||
{{- end }}
|
|
||||||
{{- if .Values.ui.ingress.tls }}
|
|
||||||
tls:
|
|
||||||
{{- range .Values.ui.ingress.tls }}
|
|
||||||
- hosts:
|
|
||||||
{{- range .hosts }}
|
|
||||||
- {{ . | quote }}
|
|
||||||
{{- end }}
|
|
||||||
secretName: {{ .secretName }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
rules:
|
|
||||||
{{- range .Values.ui.ingress.hosts }}
|
|
||||||
- host: {{ .host | quote }}
|
|
||||||
http:
|
|
||||||
paths:
|
|
||||||
{{- range .paths }}
|
|
||||||
- path: {{ .path }}
|
|
||||||
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
|
|
||||||
pathType: {{ .pathType }}
|
|
||||||
{{- end }}
|
|
||||||
backend:
|
|
||||||
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
|
|
||||||
service:
|
|
||||||
name: {{ $fullName }}
|
|
||||||
port:
|
|
||||||
number: {{ $svcPort }}
|
|
||||||
{{- else }}
|
|
||||||
serviceName: {{ $fullName }}
|
|
||||||
servicePort: {{ $svcPort }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
|
@ -1,17 +0,0 @@
|
||||||
{{- if .Values.ui.enabled -}}
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: {{ include "litellm.fullname" . }}-ui
|
|
||||||
labels:
|
|
||||||
{{- include "litellm.labels" . | nindent 4 }}
|
|
||||||
spec:
|
|
||||||
type: {{ .Values.ui.service.type }}
|
|
||||||
ports:
|
|
||||||
- port: {{ .Values.ui.service.port }}
|
|
||||||
targetPort: http
|
|
||||||
protocol: TCP
|
|
||||||
name: http
|
|
||||||
selector:
|
|
||||||
{{- include "litellm.ui.selectorLabels" . | nindent 4 }}
|
|
||||||
{{ end -}}
|
|
|
@ -2,7 +2,7 @@ apiVersion: v2
|
||||||
|
|
||||||
# We can't call ourselves just "litellm" because then we couldn't publish to the
|
# We can't call ourselves just "litellm" because then we couldn't publish to the
|
||||||
# same OCI repository as the "litellm" OCI image
|
# same OCI repository as the "litellm" OCI image
|
||||||
name: litellm-helm
|
name: litellm
|
||||||
description: Call all LLM APIs using the OpenAI format
|
description: Call all LLM APIs using the OpenAI format
|
||||||
|
|
||||||
# A chart can be either an 'application' or a 'library' chart.
|
# A chart can be either an 'application' or a 'library' chart.
|
||||||
|
@ -18,17 +18,16 @@ type: application
|
||||||
# This is the chart version. This version number should be incremented each time you make changes
|
# This is the chart version. This version number should be incremented each time you make changes
|
||||||
# to the chart and its templates, including the app version.
|
# to the chart and its templates, including the app version.
|
||||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||||
version: 0.1.0
|
version: 0.2.0
|
||||||
|
|
||||||
# This is the version number of the application being deployed. This version number should be
|
# This is the version number of the application being deployed. This version number should be
|
||||||
# incremented each time you make changes to the application. Versions are not expected to
|
# incremented each time you make changes to the application. Versions are not expected to
|
||||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||||
# It is recommended to use it with quotes.
|
# It is recommended to use it with quotes.
|
||||||
appVersion: v1.18.9
|
appVersion: v1.24.5
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
- name: "postgresql"
|
- name: "postgresql"
|
||||||
version: ">=13.3.0"
|
version: ">=13.3.0"
|
||||||
repository: oci://registry-1.docker.io/bitnamicharts
|
repository: oci://registry-1.docker.io/bitnamicharts
|
||||||
condition: db.deployStandalone
|
condition: db.deployStandalone
|
||||||
|
|
|
@ -43,20 +43,6 @@ data:
|
||||||
type: Opaque
|
type: Opaque
|
||||||
```
|
```
|
||||||
|
|
||||||
### LiteLLM Admin UI Settings
|
|
||||||
|
|
||||||
| Name | Description | Value |
|
|
||||||
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
|
|
||||||
| `ui.enabled` | Should the LiteLLM Admin UI be deployed | `true` |
|
|
||||||
| `ui.replicaCount` | The number of LiteLLM Admin UI pods to be deployed | `1` |
|
|
||||||
| `ui.image.repository` | LiteLLM Admin UI image repository | `ghcr.io/berriai/litellm` |
|
|
||||||
| `ui.image.pullPolicy` | LiteLLM Admin UI image pull policy | `IfNotPresent` |
|
|
||||||
| `ui.image.tag` | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published. | `""` |
|
|
||||||
| `ui.imagePullSecrets` | Registry credentials for the above images. | `[]` |
|
|
||||||
| `ui.service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` |
|
|
||||||
| `ui.service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the web server will listen on. | `8000` |
|
|
||||||
| `ui.ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A |
|
|
||||||
|
|
||||||
### Database Settings
|
### Database Settings
|
||||||
| Name | Description | Value |
|
| Name | Description | Value |
|
||||||
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
|
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
|
||||||
|
@ -86,18 +72,18 @@ type: Opaque
|
||||||
```
|
```
|
||||||
|
|
||||||
## Accessing the Admin UI
|
## Accessing the Admin UI
|
||||||
When browsing to the URL published per the settings in `ui.ingress.*`, you will
|
When browsing to the URL published per the settings in `ingress.*`, you will
|
||||||
be prompted for **Admin Configuration**. The **Proxy Endpoint** is the internal
|
be prompted for **Admin Configuration**. The **Proxy Endpoint** is the internal
|
||||||
(from the `litellm-ui` pod's perspective) URL published by the `litellm-proxy`
|
(from the `litellm` pod's perspective) URL published by the `<RELEASE>-litellm`
|
||||||
Kubernetes Service. If the deployment uses the default settings for this
|
Kubernetes Service. If the deployment uses the default settings for this
|
||||||
service, the **Proxy Endpoint** should be set to `http://litellm-proxy:8000`.
|
service, the **Proxy Endpoint** should be set to `http://<RELEASE>-litellm:8000`.
|
||||||
|
|
||||||
The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
|
The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
|
||||||
was not provided to the helm command line, the `masterkey` is a randomly
|
was not provided to the helm command line, the `masterkey` is a randomly
|
||||||
generated string stored in the `litellm-masterkey` Kubernetes Secret.
|
generated string stored in the `<RELEASE>-litellm-masterkey` Kubernetes Secret.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
kubectl -n litellm get secret litellm-masterkey -o jsonpath="{.data.masterkey}"
|
kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.masterkey}"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Admin UI Limitations
|
## Admin UI Limitations
|
|
@ -41,14 +41,6 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- define "litellm.ui.labels" -}}
|
|
||||||
helm.sh/chart: {{ include "litellm.chart" . }}
|
|
||||||
{{ include "litellm.ui.selectorLabels" . }}
|
|
||||||
{{- if .Chart.AppVersion }}
|
|
||||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
|
||||||
{{- end }}
|
|
||||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
{{/*
|
||||||
Selector labels
|
Selector labels
|
||||||
|
@ -57,10 +49,6 @@ Selector labels
|
||||||
app.kubernetes.io/name: {{ include "litellm.name" . }}
|
app.kubernetes.io/name: {{ include "litellm.name" . }}
|
||||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{- define "litellm.ui.selectorLabels" -}}
|
|
||||||
app.kubernetes.io/name: {{ include "litellm.name" . }}-ui
|
|
||||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{/*
|
{{/*
|
||||||
Create the name of the service account to use
|
Create the name of the service account to use
|
|
@ -1,7 +1,7 @@
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ include "litellm.fullname" . }}-proxy
|
name: {{ include "litellm.fullname" . }}
|
||||||
labels:
|
labels:
|
||||||
{{- include "litellm.labels" . | nindent 4 }}
|
{{- include "litellm.labels" . | nindent 4 }}
|
||||||
spec:
|
spec:
|
||||||
|
@ -41,12 +41,12 @@ spec:
|
||||||
- name: DATABASE_USERNAME
|
- name: DATABASE_USERNAME
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ include "litellm.name" . }}-dbcredentials
|
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||||
key: username
|
key: username
|
||||||
- name: PGPASSWORD
|
- name: PGPASSWORD
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ include "litellm.name" . }}-dbcredentials
|
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||||
key: password
|
key: password
|
||||||
- name: DATABASE_HOST
|
- name: DATABASE_HOST
|
||||||
value: {{ .Release.Name }}-postgresql
|
value: {{ .Release.Name }}-postgresql
|
||||||
|
@ -108,12 +108,12 @@ spec:
|
||||||
- name: DATABASE_USERNAME
|
- name: DATABASE_USERNAME
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ include "litellm.name" . }}-dbcredentials
|
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||||
key: username
|
key: username
|
||||||
- name: DATABASE_PASSWORD
|
- name: DATABASE_PASSWORD
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ include "litellm.name" . }}-dbcredentials
|
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||||
key: password
|
key: password
|
||||||
- name: DATABASE_HOST
|
- name: DATABASE_HOST
|
||||||
value: {{ .Release.Name }}-postgresql
|
value: {{ .Release.Name }}-postgresql
|
||||||
|
@ -140,7 +140,7 @@ spec:
|
||||||
- name: PROXY_MASTER_KEY
|
- name: PROXY_MASTER_KEY
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: {{ include "litellm.name" . }}-masterkey
|
name: {{ include "litellm.fullname" . }}-masterkey
|
||||||
key: masterkey
|
key: masterkey
|
||||||
envFrom:
|
envFrom:
|
||||||
{{- range .Values.environmentSecrets }}
|
{{- range .Values.environmentSecrets }}
|
||||||
|
@ -150,16 +150,7 @@ spec:
|
||||||
args:
|
args:
|
||||||
- --config
|
- --config
|
||||||
- /etc/litellm/config.yaml
|
- /etc/litellm/config.yaml
|
||||||
# command:
|
- --run_gunicorn
|
||||||
# - bash
|
|
||||||
# - -c
|
|
||||||
# - |
|
|
||||||
# ls -la /etc/litellm/; cat /etc/litellm/config.yaml; export
|
|
||||||
# find / 2>/dev/null | grep -v -e '^/proc' -e '^/sys' -e '^/dev' >/tmp/before.list
|
|
||||||
# prisma generate
|
|
||||||
# find / 2>/dev/null | grep -v -e '^/proc' -e '^/sys' -e '^/dev' >/tmp/after.list
|
|
||||||
# diff -ruN /tmp/before.list /tmp/after.list
|
|
||||||
# sleep 3600
|
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
containerPort: {{ .Values.service.port }}
|
containerPort: {{ .Values.service.port }}
|
|
@ -1,5 +1,5 @@
|
||||||
{{- if .Values.ingress.enabled -}}
|
{{- if .Values.ingress.enabled -}}
|
||||||
{{- $fullName := (printf "%s%s" (include "litellm.fullname" .) "-proxy") -}}
|
{{- $fullName := include "litellm.fullname" . -}}
|
||||||
{{- $svcPort := .Values.service.port -}}
|
{{- $svcPort := .Values.service.port -}}
|
||||||
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
||||||
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
|
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
|
|
@ -2,7 +2,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ include "litellm.name" . }}-dbcredentials
|
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||||
data:
|
data:
|
||||||
# Password for the "postgres" user
|
# Password for the "postgres" user
|
||||||
postgres-password: {{ ( index .Values.postgresql.auth "postgres-password") | default "litellm" | b64enc }}
|
postgres-password: {{ ( index .Values.postgresql.auth "postgres-password") | default "litellm" | b64enc }}
|
|
@ -2,7 +2,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Secret
|
kind: Secret
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ include "litellm.name" . }}-masterkey
|
name: {{ include "litellm.fullname" . }}-masterkey
|
||||||
data:
|
data:
|
||||||
masterkey: {{ $masterkey | b64enc }}
|
masterkey: {{ $masterkey | b64enc }}
|
||||||
type: Opaque
|
type: Opaque
|
|
@ -1,7 +1,7 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
metadata:
|
metadata:
|
||||||
name: {{ include "litellm.fullname" . }}-proxy
|
name: {{ include "litellm.fullname" . }}
|
||||||
labels:
|
labels:
|
||||||
{{- include "litellm.labels" . | nindent 4 }}
|
{{- include "litellm.labels" . | nindent 4 }}
|
||||||
spec:
|
spec:
|
|
@ -11,5 +11,5 @@ spec:
|
||||||
- name: wget
|
- name: wget
|
||||||
image: busybox
|
image: busybox
|
||||||
command: ['wget']
|
command: ['wget']
|
||||||
args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}']
|
args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}/health/readiness']
|
||||||
restartPolicy: Never
|
restartPolicy: Never
|
|
@ -5,7 +5,9 @@
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
|
|
||||||
image:
|
image:
|
||||||
repository: ghcr.io/berriai/litellm
|
# Use "ghcr.io/berriai/litellm-database" for optimized image with database
|
||||||
|
# Alternatively, use "ghcr.io/berriai/litellm" for the default image
|
||||||
|
repository: ghcr.io/berriai/litellm-database
|
||||||
pullPolicy: IfNotPresent
|
pullPolicy: IfNotPresent
|
||||||
# Overrides the image tag whose default is the chart appVersion.
|
# Overrides the image tag whose default is the chart appVersion.
|
||||||
# tag: "main-latest"
|
# tag: "main-latest"
|
||||||
|
@ -56,7 +58,7 @@ service:
|
||||||
port: 8000
|
port: 8000
|
||||||
|
|
||||||
ingress:
|
ingress:
|
||||||
enabled: true
|
enabled: false
|
||||||
className: "nginx"
|
className: "nginx"
|
||||||
annotations: {}
|
annotations: {}
|
||||||
# kubernetes.io/ingress.class: nginx
|
# kubernetes.io/ingress.class: nginx
|
||||||
|
@ -71,6 +73,8 @@ ingress:
|
||||||
# hosts:
|
# hosts:
|
||||||
# - chart-example.local
|
# - chart-example.local
|
||||||
|
|
||||||
|
# masterkey: changeit
|
||||||
|
|
||||||
# The elements within proxy_config are rendered as config.yaml for the proxy
|
# The elements within proxy_config are rendered as config.yaml for the proxy
|
||||||
# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
|
# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
|
||||||
# Reference: https://docs.litellm.ai/docs/proxy/configs
|
# Reference: https://docs.litellm.ai/docs/proxy/configs
|
||||||
|
@ -159,61 +163,6 @@ postgresql:
|
||||||
|
|
||||||
# A secret is created by this chart (litellm-helm) with the credentials that
|
# A secret is created by this chart (litellm-helm) with the credentials that
|
||||||
# the new Postgres instance should use.
|
# the new Postgres instance should use.
|
||||||
existingSecret: litellm-dbcredentials
|
# existingSecret: ""
|
||||||
secretKeys:
|
# secretKeys:
|
||||||
userPasswordKey: password
|
# userPasswordKey: password
|
||||||
|
|
||||||
ui:
|
|
||||||
enabled: true
|
|
||||||
replicaCount: 1
|
|
||||||
autoscaling:
|
|
||||||
enabled: false
|
|
||||||
image:
|
|
||||||
repository: ghcr.io/berriai/litellm-ui
|
|
||||||
pullPolicy: IfNotPresent
|
|
||||||
# Overrides the image tag whose default is the chart appVersion.
|
|
||||||
# tag: "main-latest"
|
|
||||||
# TODO: Switch to BerryAI repo and tags if/when they provide a ui image
|
|
||||||
# https://github.com/BerriAI/litellm/pull/1505
|
|
||||||
tag: ""
|
|
||||||
|
|
||||||
service:
|
|
||||||
type: ClusterIP
|
|
||||||
port: 8501
|
|
||||||
|
|
||||||
ingress:
|
|
||||||
enabled: true
|
|
||||||
className: "nginx"
|
|
||||||
annotations: {}
|
|
||||||
hosts:
|
|
||||||
- host: ui.example.local
|
|
||||||
paths:
|
|
||||||
- path: /
|
|
||||||
pathType: ImplementationSpecific
|
|
||||||
tls: []
|
|
||||||
|
|
||||||
podAnnotations: {}
|
|
||||||
podLabels: {}
|
|
||||||
|
|
||||||
podSecurityContext:
|
|
||||||
fsGroup: 1000
|
|
||||||
|
|
||||||
securityContext:
|
|
||||||
capabilities:
|
|
||||||
drop:
|
|
||||||
- ALL
|
|
||||||
readOnlyRootFilesystem: true
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1000
|
|
||||||
|
|
||||||
resources: {}
|
|
||||||
|
|
||||||
volumes: []
|
|
||||||
|
|
||||||
volumeMounts: []
|
|
||||||
|
|
||||||
nodeSelector: {}
|
|
||||||
|
|
||||||
tolerations: []
|
|
||||||
|
|
||||||
affinity: {}
|
|
|
@ -16,6 +16,34 @@ response = completion(
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Specifying Safety Settings
|
||||||
|
In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = completion(
|
||||||
|
model="gemini/gemini-pro",
|
||||||
|
messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
|
||||||
|
safety_settings=[
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_HARASSMENT",
|
||||||
|
"threshold": "BLOCK_NONE",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_HATE_SPEECH",
|
||||||
|
"threshold": "BLOCK_NONE",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||||
|
"threshold": "BLOCK_NONE",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||||
|
"threshold": "BLOCK_NONE",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
# Gemini-Pro-Vision
|
# Gemini-Pro-Vision
|
||||||
LiteLLM Supports the following image types passed in `url`
|
LiteLLM Supports the following image types passed in `url`
|
||||||
- Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
- Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
||||||
|
|
|
@ -538,17 +538,13 @@ model_list: # will route requests to the least busy ollama model
|
||||||
api_base: "http://127.0.0.1:8003"
|
api_base: "http://127.0.0.1:8003"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Max Parallel Requests
|
|
||||||
|
|
||||||
To rate limit a user based on the number of parallel requests, e.g.:
|
## Configure DB Pool Limits + Connection Timeouts
|
||||||
if user's parallel requests > x, send a 429 error
|
|
||||||
if user's parallel requests <= x, let them use the API freely.
|
|
||||||
|
|
||||||
set the max parallel request limit on the config.yaml (note: this expects the user to be passing in an api key).
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
general_settings:
|
general_settings:
|
||||||
max_parallel_requests: 100 # max parallel requests for a user = 100
|
database_connection_pool_limit: 100 # sets connection pool for prisma client to postgres db at 100
|
||||||
|
database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db
|
||||||
```
|
```
|
||||||
|
|
||||||
## All settings
|
## All settings
|
||||||
|
@ -577,6 +573,8 @@ general_settings:
|
||||||
"key_management_system": "google_kms", # either google_kms or azure_kms
|
"key_management_system": "google_kms", # either google_kms or azure_kms
|
||||||
"master_key": "string",
|
"master_key": "string",
|
||||||
"database_url": "string",
|
"database_url": "string",
|
||||||
|
"database_connection_pool_limit": 0, # default 100
|
||||||
|
"database_connection_timeout": 0, # default 60s
|
||||||
"database_type": "dynamo_db",
|
"database_type": "dynamo_db",
|
||||||
"database_args": {
|
"database_args": {
|
||||||
"billing_mode": "PROVISIONED_THROUGHPUT",
|
"billing_mode": "PROVISIONED_THROUGHPUT",
|
||||||
|
|
|
@ -151,10 +151,54 @@ kubectl port-forward service/litellm-service 4000:4000
|
||||||
|
|
||||||
Your OpenAI proxy server is now running on `http://0.0.0.0:4000`.
|
Your OpenAI proxy server is now running on `http://0.0.0.0:4000`.
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="helm-deploy" label="Helm">
|
||||||
|
|
||||||
|
### Step 1. Clone the repository
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/BerriAI/litellm.git
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 2. Deploy with Helm
|
||||||
|
|
||||||
|
```bash
|
||||||
|
helm install \
|
||||||
|
--set masterkey=SuPeRsEcReT \
|
||||||
|
mydeploy \
|
||||||
|
deploy/charts/litellm
|
||||||
|
```
|
||||||
|
|
||||||
|
### Step 3. Expose the service to localhost
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl \
|
||||||
|
port-forward \
|
||||||
|
service/mydeploy-litellm \
|
||||||
|
8000:8000
|
||||||
|
```
|
||||||
|
|
||||||
|
Your OpenAI proxy server is now running on `http://127.0.0.1:8000`.
|
||||||
|
|
||||||
</TabItem>
|
</TabItem>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
|
|
||||||
## Setting SSL Certification
|
## Advanced Deployment Settings
|
||||||
|
|
||||||
|
### Customization of the server root path
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
In a Kubernetes deployment, it's possible to utilize a shared DNS to host multiple applications by modifying the virtual service
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
|
||||||
|
|
||||||
|
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
|
||||||
|
|
||||||
|
|
||||||
|
### Setting SSL Certification
|
||||||
|
|
||||||
Use this, If you need to set ssl certificates for your on prem litellm proxy
|
Use this, If you need to set ssl certificates for your on prem litellm proxy
|
||||||
|
|
||||||
|
|
|
@ -72,3 +72,78 @@ curl --location 'http://0.0.0.0:8000/key/generate' \
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Turn on/off per request
|
||||||
|
|
||||||
|
The proxy support 2 request-level PII controls:
|
||||||
|
|
||||||
|
- *no-pii*: Optional(bool) - Allow user to turn off pii masking per request.
|
||||||
|
- *output_parse_pii*: Optional(bool) - Allow user to turn off pii output parsing per request.
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
**Step 1. Create key with pii permissions**
|
||||||
|
|
||||||
|
Set `allow_pii_controls` to true for a given key. This will allow the user to set request-level PII controls.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:8000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer my-master-key' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"permissions": {"allow_pii_controls": true}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 2. Turn off pii output parsing**
|
||||||
|
|
||||||
|
```python
|
||||||
|
import os
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
# This is the default and can be omitted
|
||||||
|
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||||
|
base_url="http://0.0.0.0:8000"
|
||||||
|
)
|
||||||
|
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "My name is Jane Doe, my number is 8382043839",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
extra_body={
|
||||||
|
"content_safety": {"output_parse_pii": False}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3: See response**
|
||||||
|
|
||||||
|
```
|
||||||
|
{
|
||||||
|
"id": "chatcmpl-8c5qbGTILZa1S4CK3b31yj5N40hFN",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"message": {
|
||||||
|
"content": "Hi [PERSON], what can I help you with?",
|
||||||
|
"role": "assistant"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 1704089632,
|
||||||
|
"model": "gpt-35-turbo",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 47,
|
||||||
|
"prompt_tokens": 12,
|
||||||
|
"total_tokens": 59
|
||||||
|
},
|
||||||
|
"_response_ms": 1753.426
|
||||||
|
}
|
||||||
|
```
|
|
@ -93,6 +93,7 @@ Request Params:
|
||||||
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
||||||
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
||||||
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
||||||
|
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
|
||||||
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
||||||
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||||
|
|
||||||
|
@ -676,8 +677,6 @@ general_settings:
|
||||||
|
|
||||||
### [BETA] Dynamo DB
|
### [BETA] Dynamo DB
|
||||||
|
|
||||||
Only live in `v1.16.21.dev1`.
|
|
||||||
|
|
||||||
#### Step 1. Save keys to env
|
#### Step 1. Save keys to env
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|
|
@ -129,7 +129,7 @@ const sidebars = {
|
||||||
"proxy/caching",
|
"proxy/caching",
|
||||||
{
|
{
|
||||||
"type": "category",
|
"type": "category",
|
||||||
"label": "Logging, Alerting, Caching",
|
"label": "Logging, Alerting",
|
||||||
"items": [
|
"items": [
|
||||||
"proxy/logging",
|
"proxy/logging",
|
||||||
"proxy/alerting",
|
"proxy/alerting",
|
||||||
|
|
|
@ -6,9 +6,4 @@ Code in this folder is licensed under a commercial license. Please review the [L
|
||||||
|
|
||||||
👉 **Using in an Enterprise / Need specific features ?** Meet with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat?month=2024-02)
|
👉 **Using in an Enterprise / Need specific features ?** Meet with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat?month=2024-02)
|
||||||
|
|
||||||
## Enterprise Features:
|
See all Enterprise Features here 👉 [Docs](https://docs.litellm.ai/docs/proxy/enterprise)
|
||||||
|
|
||||||
- Track, View spend per tag https://docs.litellm.ai/docs/proxy/spend
|
|
||||||
- Custom API / microservice callbacks
|
|
||||||
- Google Text Moderation API
|
|
||||||
|
|
||||||
|
|
|
@ -110,7 +110,6 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
|
||||||
-1
|
-1
|
||||||
] # get the last response - llama guard has a 4k token limit
|
] # get the last response - llama guard has a 4k token limit
|
||||||
self.set_custom_prompt_template(messages=[safety_check_messages])
|
self.set_custom_prompt_template(messages=[safety_check_messages])
|
||||||
# print(f"self.model: {self.model}")
|
|
||||||
response = await litellm.acompletion(
|
response = await litellm.acompletion(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[safety_check_messages],
|
messages=[safety_check_messages],
|
||||||
|
|
|
@ -121,6 +121,13 @@ def completion(
|
||||||
## Load Config
|
## Load Config
|
||||||
inference_params = copy.deepcopy(optional_params)
|
inference_params = copy.deepcopy(optional_params)
|
||||||
stream = inference_params.pop("stream", None)
|
stream = inference_params.pop("stream", None)
|
||||||
|
|
||||||
|
# Handle safety settings
|
||||||
|
safety_settings_param = inference_params.pop("safety_settings", None)
|
||||||
|
safety_settings = None
|
||||||
|
if safety_settings_param:
|
||||||
|
safety_settings = [genai.types.SafetySettingDict(x) for x in safety_settings_param]
|
||||||
|
|
||||||
config = litellm.GeminiConfig.get_config()
|
config = litellm.GeminiConfig.get_config()
|
||||||
for k, v in config.items():
|
for k, v in config.items():
|
||||||
if (
|
if (
|
||||||
|
@ -141,11 +148,13 @@ def completion(
|
||||||
response = _model.generate_content(
|
response = _model.generate_content(
|
||||||
contents=prompt,
|
contents=prompt,
|
||||||
generation_config=genai.types.GenerationConfig(**inference_params),
|
generation_config=genai.types.GenerationConfig(**inference_params),
|
||||||
|
safety_settings=safety_settings,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
response = _model.generate_content(
|
response = _model.generate_content(
|
||||||
contents=prompt,
|
contents=prompt,
|
||||||
generation_config=genai.types.GenerationConfig(**inference_params),
|
generation_config=genai.types.GenerationConfig(**inference_params),
|
||||||
|
safety_settings=safety_settings,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
return response
|
return response
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-2322bcdc2ec71284.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S_8LZOnl2nyURq-NYnh2p\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
|
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-2322bcdc2ec71284.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["S_8LZOnl2nyURq-NYnh2p",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -155,6 +155,9 @@ class GenerateKeyRequest(GenerateRequestBase):
|
||||||
aliases: Optional[dict] = {}
|
aliases: Optional[dict] = {}
|
||||||
config: Optional[dict] = {}
|
config: Optional[dict] = {}
|
||||||
permissions: Optional[dict] = {}
|
permissions: Optional[dict] = {}
|
||||||
|
model_max_budget: Optional[dict] = (
|
||||||
|
{}
|
||||||
|
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
|
||||||
|
|
||||||
|
|
||||||
class GenerateKeyResponse(GenerateKeyRequest):
|
class GenerateKeyResponse(GenerateKeyRequest):
|
||||||
|
@ -167,7 +170,13 @@ class GenerateKeyResponse(GenerateKeyRequest):
|
||||||
def set_model_info(cls, values):
|
def set_model_info(cls, values):
|
||||||
if values.get("token") is not None:
|
if values.get("token") is not None:
|
||||||
values.update({"key": values.get("token")})
|
values.update({"key": values.get("token")})
|
||||||
dict_fields = ["metadata", "aliases", "config", "permissions"]
|
dict_fields = [
|
||||||
|
"metadata",
|
||||||
|
"aliases",
|
||||||
|
"config",
|
||||||
|
"permissions",
|
||||||
|
"model_max_budget",
|
||||||
|
]
|
||||||
for field in dict_fields:
|
for field in dict_fields:
|
||||||
value = values.get(field)
|
value = values.get(field)
|
||||||
if value is not None and isinstance(value, str):
|
if value is not None and isinstance(value, str):
|
||||||
|
@ -302,6 +311,13 @@ class ConfigGeneralSettings(LiteLLMBase):
|
||||||
None,
|
None,
|
||||||
description="connect to a postgres db - needed for generating temporary keys + tracking spend / key",
|
description="connect to a postgres db - needed for generating temporary keys + tracking spend / key",
|
||||||
)
|
)
|
||||||
|
database_connection_pool_limit: Optional[int] = Field(
|
||||||
|
100,
|
||||||
|
description="default connection pool for prisma client connecting to postgres db",
|
||||||
|
)
|
||||||
|
database_connection_timeout: Optional[float] = Field(
|
||||||
|
60, description="default timeout for a connection to the database"
|
||||||
|
)
|
||||||
database_type: Optional[Literal["dynamo_db"]] = Field(
|
database_type: Optional[Literal["dynamo_db"]] = Field(
|
||||||
None, description="to use dynamodb instead of postgres db"
|
None, description="to use dynamodb instead of postgres db"
|
||||||
)
|
)
|
||||||
|
@ -383,6 +399,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
|
||||||
budget_reset_at: Optional[datetime] = None
|
budget_reset_at: Optional[datetime] = None
|
||||||
allowed_cache_controls: Optional[list] = []
|
allowed_cache_controls: Optional[list] = []
|
||||||
permissions: Dict = {}
|
permissions: Dict = {}
|
||||||
|
model_spend: Dict = {}
|
||||||
|
model_max_budget: Dict = {}
|
||||||
|
|
||||||
|
|
||||||
class UserAPIKeyAuth(
|
class UserAPIKeyAuth(
|
||||||
|
@ -410,6 +428,8 @@ class LiteLLM_UserTable(LiteLLMBase):
|
||||||
user_id: str
|
user_id: str
|
||||||
max_budget: Optional[float]
|
max_budget: Optional[float]
|
||||||
spend: float = 0.0
|
spend: float = 0.0
|
||||||
|
model_max_budget: Optional[Dict] = {}
|
||||||
|
model_spend: Optional[Dict] = {}
|
||||||
user_email: Optional[str]
|
user_email: Optional[str]
|
||||||
models: list = []
|
models: list = []
|
||||||
|
|
||||||
|
|
|
@ -287,6 +287,8 @@ class DynamoDBWrapper(CustomDB):
|
||||||
or k == "config"
|
or k == "config"
|
||||||
or k == "metadata"
|
or k == "metadata"
|
||||||
or k == "permissions"
|
or k == "permissions"
|
||||||
|
or k == "model_spend"
|
||||||
|
or k == "model_max_budget"
|
||||||
)
|
)
|
||||||
and v is not None
|
and v is not None
|
||||||
and isinstance(v, str)
|
and isinstance(v, str)
|
||||||
|
|
|
@ -119,6 +119,9 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
||||||
call_type: str,
|
call_type: str,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
- Check if request turned off pii
|
||||||
|
- Check if user allowed to turn off pii (key permissions -> 'allow_pii_controls')
|
||||||
|
|
||||||
- Take the request data
|
- Take the request data
|
||||||
- Call /analyze -> get the results
|
- Call /analyze -> get the results
|
||||||
- Call /anonymize w/ the analyze results -> get the redacted text
|
- Call /anonymize w/ the analyze results -> get the redacted text
|
||||||
|
@ -126,13 +129,59 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
||||||
For multiple messages in /chat/completions, we'll need to call them in parallel.
|
For multiple messages in /chat/completions, we'll need to call them in parallel.
|
||||||
"""
|
"""
|
||||||
permissions = user_api_key_dict.permissions
|
permissions = user_api_key_dict.permissions
|
||||||
|
|
||||||
if permissions.get("pii", True) == False: # allow key to turn off pii masking
|
|
||||||
return data
|
|
||||||
|
|
||||||
output_parse_pii = permissions.get(
|
output_parse_pii = permissions.get(
|
||||||
"output_parse_pii", litellm.output_parse_pii
|
"output_parse_pii", litellm.output_parse_pii
|
||||||
) # allow key to turn on/off output parsing for pii
|
) # allow key to turn on/off output parsing for pii
|
||||||
|
no_pii = permissions.get(
|
||||||
|
"no-pii", None
|
||||||
|
) # allow key to turn on/off pii masking (if user is allowed to set pii controls, then they can override the key defaults)
|
||||||
|
|
||||||
|
if no_pii is None:
|
||||||
|
# check older way of turning on/off pii
|
||||||
|
no_pii = not permissions.get("pii", True)
|
||||||
|
|
||||||
|
content_safety = data.get("content_safety", None)
|
||||||
|
verbose_proxy_logger.debug(f"content_safety: {content_safety}")
|
||||||
|
## Request-level turn on/off PII controls ##
|
||||||
|
if content_safety is not None and isinstance(content_safety, dict):
|
||||||
|
# pii masking ##
|
||||||
|
if (
|
||||||
|
content_safety.get("no-pii", None) is not None
|
||||||
|
and content_safety.get("no-pii") == True
|
||||||
|
):
|
||||||
|
# check if user allowed to turn this off
|
||||||
|
if permissions.get("allow_pii_controls", False) == False:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "Not allowed to set PII controls per request"},
|
||||||
|
)
|
||||||
|
else: # user allowed to turn off pii masking
|
||||||
|
no_pii = content_safety.get("no-pii")
|
||||||
|
if not isinstance(no_pii, bool):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "no_pii needs to be a boolean value"},
|
||||||
|
)
|
||||||
|
## pii output parsing ##
|
||||||
|
if content_safety.get("output_parse_pii", None) is not None:
|
||||||
|
# check if user allowed to turn this off
|
||||||
|
if permissions.get("allow_pii_controls", False) == False:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={"error": "Not allowed to set PII controls per request"},
|
||||||
|
)
|
||||||
|
else: # user allowed to turn on/off pii output parsing
|
||||||
|
output_parse_pii = content_safety.get("output_parse_pii")
|
||||||
|
if not isinstance(output_parse_pii, bool):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail={
|
||||||
|
"error": "output_parse_pii needs to be a boolean value"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if no_pii == False: # turn off pii masking
|
||||||
|
return data
|
||||||
|
|
||||||
if call_type == "completion": # /chat/completions requests
|
if call_type == "completion": # /chat/completions requests
|
||||||
messages = data["messages"]
|
messages = data["messages"]
|
||||||
|
|
|
@ -409,6 +409,8 @@ def run_server(
|
||||||
"uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`"
|
"uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
db_connection_pool_limit = 100
|
||||||
|
db_connection_timeout = 60
|
||||||
if config is not None:
|
if config is not None:
|
||||||
"""
|
"""
|
||||||
Allow user to pass in db url via config
|
Allow user to pass in db url via config
|
||||||
|
@ -427,6 +429,12 @@ def run_server(
|
||||||
proxy_config.load_config(router=None, config_file_path=config)
|
proxy_config.load_config(router=None, config_file_path=config)
|
||||||
)
|
)
|
||||||
database_url = general_settings.get("database_url", None)
|
database_url = general_settings.get("database_url", None)
|
||||||
|
db_connection_pool_limit = general_settings.get(
|
||||||
|
"database_connection_pool_limit", 100
|
||||||
|
)
|
||||||
|
db_connection_timeout = general_settings.get(
|
||||||
|
"database_connection_timeout", 60
|
||||||
|
)
|
||||||
if database_url and database_url.startswith("os.environ/"):
|
if database_url and database_url.startswith("os.environ/"):
|
||||||
original_dir = os.getcwd()
|
original_dir = os.getcwd()
|
||||||
# set the working directory to where this script is
|
# set the working directory to where this script is
|
||||||
|
@ -447,14 +455,19 @@ def run_server(
|
||||||
try:
|
try:
|
||||||
if os.getenv("DATABASE_URL", None) is not None:
|
if os.getenv("DATABASE_URL", None) is not None:
|
||||||
### add connection pool + pool timeout args
|
### add connection pool + pool timeout args
|
||||||
params = {"connection_limit": 100, "pool_timeout": 60}
|
params = {
|
||||||
|
"connection_limit": db_connection_pool_limit,
|
||||||
|
"pool_timeout": db_connection_timeout,
|
||||||
|
}
|
||||||
database_url = os.getenv("DATABASE_URL")
|
database_url = os.getenv("DATABASE_URL")
|
||||||
modified_url = append_query_params(database_url, params)
|
modified_url = append_query_params(database_url, params)
|
||||||
os.environ["DATABASE_URL"] = modified_url
|
os.environ["DATABASE_URL"] = modified_url
|
||||||
###
|
|
||||||
if os.getenv("DIRECT_URL", None) is not None:
|
if os.getenv("DIRECT_URL", None) is not None:
|
||||||
### add connection pool + pool timeout args
|
### add connection pool + pool timeout args
|
||||||
params = {"connection_limit": 100, "pool_timeout": 60}
|
params = {
|
||||||
|
"connection_limit": db_connection_pool_limit,
|
||||||
|
"pool_timeout": db_connection_timeout,
|
||||||
|
}
|
||||||
database_url = os.getenv("DIRECT_URL")
|
database_url = os.getenv("DIRECT_URL")
|
||||||
modified_url = append_query_params(database_url, params)
|
modified_url = append_query_params(database_url, params)
|
||||||
os.environ["DIRECT_URL"] = modified_url
|
os.environ["DIRECT_URL"] = modified_url
|
||||||
|
|
|
@ -93,6 +93,7 @@ from litellm.proxy.utils import (
|
||||||
html_form,
|
html_form,
|
||||||
_read_request_body,
|
_read_request_body,
|
||||||
_is_valid_team_configs,
|
_is_valid_team_configs,
|
||||||
|
_is_user_proxy_admin,
|
||||||
)
|
)
|
||||||
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
||||||
import pydantic
|
import pydantic
|
||||||
|
@ -143,6 +144,9 @@ app = FastAPI(
|
||||||
title="LiteLLM API",
|
title="LiteLLM API",
|
||||||
description=f"Proxy Server to call 100+ LLMs in the OpenAI format\n\n{ui_message}",
|
description=f"Proxy Server to call 100+ LLMs in the OpenAI format\n\n{ui_message}",
|
||||||
version=version,
|
version=version,
|
||||||
|
root_path=os.environ.get(
|
||||||
|
"SERVER_ROOT_PATH", ""
|
||||||
|
), # check if user passed root path, FastAPI defaults this value to ""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -376,6 +380,11 @@ async def user_api_key_auth(
|
||||||
# 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
|
# 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
|
||||||
# 4. If token is expired
|
# 4. If token is expired
|
||||||
# 5. If token spend is under Budget for the token
|
# 5. If token spend is under Budget for the token
|
||||||
|
# 6. If token spend per model is under budget per model
|
||||||
|
|
||||||
|
request_data = await _read_request_body(
|
||||||
|
request=request
|
||||||
|
) # request data, used across all checks. Making this easily available
|
||||||
|
|
||||||
# Check 1. If token can call model
|
# Check 1. If token can call model
|
||||||
litellm.model_alias_map = valid_token.aliases
|
litellm.model_alias_map = valid_token.aliases
|
||||||
|
@ -450,7 +459,6 @@ async def user_api_key_auth(
|
||||||
if (
|
if (
|
||||||
litellm.max_user_budget is not None
|
litellm.max_user_budget is not None
|
||||||
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
||||||
request_data = await _read_request_body(request=request)
|
|
||||||
user_passed_to_chat_completions = request_data.get("user", None)
|
user_passed_to_chat_completions = request_data.get("user", None)
|
||||||
if user_passed_to_chat_completions is not None:
|
if user_passed_to_chat_completions is not None:
|
||||||
user_id_list.append(user_passed_to_chat_completions)
|
user_id_list.append(user_passed_to_chat_completions)
|
||||||
|
@ -496,11 +504,7 @@ async def user_api_key_auth(
|
||||||
continue
|
continue
|
||||||
assert isinstance(_user, dict)
|
assert isinstance(_user, dict)
|
||||||
# check if user is admin #
|
# check if user is admin #
|
||||||
if (
|
|
||||||
_user.get("user_role", None) is not None
|
|
||||||
and _user.get("user_role") == "proxy_admin"
|
|
||||||
):
|
|
||||||
return UserAPIKeyAuth(api_key=master_key)
|
|
||||||
# Token exists, not expired now check if its in budget for the user
|
# Token exists, not expired now check if its in budget for the user
|
||||||
user_max_budget = _user.get("max_budget", None)
|
user_max_budget = _user.get("max_budget", None)
|
||||||
user_current_spend = _user.get("spend", None)
|
user_current_spend = _user.get("spend", None)
|
||||||
|
@ -587,6 +591,25 @@ async def user_api_key_auth(
|
||||||
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check 5. Token Model Spend is under Model budget
|
||||||
|
max_budget_per_model = valid_token.model_max_budget
|
||||||
|
spend_per_model = valid_token.model_spend
|
||||||
|
|
||||||
|
if max_budget_per_model is not None and spend_per_model is not None:
|
||||||
|
current_model = request_data.get("model")
|
||||||
|
if current_model is not None:
|
||||||
|
current_model_spend = spend_per_model.get(current_model, None)
|
||||||
|
current_model_budget = max_budget_per_model.get(current_model, None)
|
||||||
|
|
||||||
|
if (
|
||||||
|
current_model_spend is not None
|
||||||
|
and current_model_budget is not None
|
||||||
|
):
|
||||||
|
if current_model_spend > current_model_budget:
|
||||||
|
raise Exception(
|
||||||
|
f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
|
||||||
|
)
|
||||||
|
|
||||||
# Token passed all checks
|
# Token passed all checks
|
||||||
api_key = valid_token.token
|
api_key = valid_token.token
|
||||||
|
|
||||||
|
@ -616,11 +639,15 @@ async def user_api_key_auth(
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
|
(
|
||||||
route.startswith("/key/")
|
route.startswith("/key/")
|
||||||
or route.startswith("/user/")
|
or route.startswith("/user/")
|
||||||
or route.startswith("/model/")
|
or route.startswith("/model/")
|
||||||
or route.startswith("/spend/")
|
or route.startswith("/spend/")
|
||||||
) and (not is_master_key_valid):
|
)
|
||||||
|
and (not is_master_key_valid)
|
||||||
|
and (not _is_user_proxy_admin(user_id_information))
|
||||||
|
):
|
||||||
allow_user_auth = False
|
allow_user_auth = False
|
||||||
if (
|
if (
|
||||||
general_settings.get("allow_user_auth", False) == True
|
general_settings.get("allow_user_auth", False) == True
|
||||||
|
@ -711,6 +738,9 @@ async def user_api_key_auth(
|
||||||
):
|
):
|
||||||
# Do something if the current route starts with any of the allowed routes
|
# Do something if the current route starts with any of the allowed routes
|
||||||
pass
|
pass
|
||||||
|
else:
|
||||||
|
if _is_user_proxy_admin(user_id_information):
|
||||||
|
pass
|
||||||
else:
|
else:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
|
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
|
||||||
|
@ -937,13 +967,26 @@ async def update_database(
|
||||||
# Calculate the new cost by adding the existing cost and response_cost
|
# Calculate the new cost by adding the existing cost and response_cost
|
||||||
existing_spend_obj.spend = existing_spend + response_cost
|
existing_spend_obj.spend = existing_spend + response_cost
|
||||||
|
|
||||||
|
# track cost per model, for the given user
|
||||||
|
spend_per_model = existing_spend_obj.model_spend or {}
|
||||||
|
current_model = kwargs.get("model")
|
||||||
|
|
||||||
|
if current_model is not None and spend_per_model is not None:
|
||||||
|
if spend_per_model.get(current_model) is None:
|
||||||
|
spend_per_model[current_model] = response_cost
|
||||||
|
else:
|
||||||
|
spend_per_model[current_model] += response_cost
|
||||||
|
existing_spend_obj.model_spend = spend_per_model
|
||||||
|
|
||||||
valid_token = user_api_key_cache.get_cache(key=id)
|
valid_token = user_api_key_cache.get_cache(key=id)
|
||||||
if valid_token is not None and isinstance(valid_token, dict):
|
if valid_token is not None and isinstance(valid_token, dict):
|
||||||
user_api_key_cache.set_cache(
|
user_api_key_cache.set_cache(
|
||||||
key=id, value=existing_spend_obj.json()
|
key=id, value=existing_spend_obj.json()
|
||||||
)
|
)
|
||||||
|
|
||||||
verbose_proxy_logger.debug(f"new cost: {existing_spend_obj.spend}")
|
verbose_proxy_logger.debug(
|
||||||
|
f"user - new cost: {existing_spend_obj.spend}, user_id: {id}"
|
||||||
|
)
|
||||||
data_list.append(existing_spend_obj)
|
data_list.append(existing_spend_obj)
|
||||||
|
|
||||||
# Update the cost column for the given user id
|
# Update the cost column for the given user id
|
||||||
|
@ -980,15 +1023,28 @@ async def update_database(
|
||||||
# Calculate the new cost by adding the existing cost and response_cost
|
# Calculate the new cost by adding the existing cost and response_cost
|
||||||
new_spend = existing_spend + response_cost
|
new_spend = existing_spend + response_cost
|
||||||
|
|
||||||
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
# track cost per model, for the given key
|
||||||
|
spend_per_model = existing_spend_obj.model_spend or {}
|
||||||
|
current_model = kwargs.get("model")
|
||||||
|
if current_model is not None and spend_per_model is not None:
|
||||||
|
if spend_per_model.get(current_model) is None:
|
||||||
|
spend_per_model[current_model] = response_cost
|
||||||
|
else:
|
||||||
|
spend_per_model[current_model] += response_cost
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"new cost: {new_spend}, new spend per model: {spend_per_model}"
|
||||||
|
)
|
||||||
# Update the cost column for the given token
|
# Update the cost column for the given token
|
||||||
await prisma_client.update_data(
|
await prisma_client.update_data(
|
||||||
token=token, data={"spend": new_spend}
|
token=token,
|
||||||
|
data={"spend": new_spend, "model_spend": spend_per_model},
|
||||||
)
|
)
|
||||||
|
|
||||||
valid_token = user_api_key_cache.get_cache(key=token)
|
valid_token = user_api_key_cache.get_cache(key=token)
|
||||||
if valid_token is not None:
|
if valid_token is not None:
|
||||||
valid_token.spend = new_spend
|
valid_token.spend = new_spend
|
||||||
|
valid_token.model_spend = spend_per_model
|
||||||
user_api_key_cache.set_cache(key=token, value=valid_token)
|
user_api_key_cache.set_cache(key=token, value=valid_token)
|
||||||
elif custom_db_client is not None:
|
elif custom_db_client is not None:
|
||||||
# Fetch the existing cost for the given token
|
# Fetch the existing cost for the given token
|
||||||
|
@ -1068,10 +1124,21 @@ async def update_database(
|
||||||
# Calculate the new cost by adding the existing cost and response_cost
|
# Calculate the new cost by adding the existing cost and response_cost
|
||||||
new_spend = existing_spend + response_cost
|
new_spend = existing_spend + response_cost
|
||||||
|
|
||||||
|
# track cost per model, for the given team
|
||||||
|
spend_per_model = existing_spend_obj.model_spend or {}
|
||||||
|
current_model = kwargs.get("model")
|
||||||
|
if current_model is not None and spend_per_model is not None:
|
||||||
|
if spend_per_model.get(current_model) is None:
|
||||||
|
spend_per_model[current_model] = response_cost
|
||||||
|
else:
|
||||||
|
spend_per_model[current_model] += response_cost
|
||||||
|
|
||||||
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
||||||
# Update the cost column for the given token
|
# Update the cost column for the given token
|
||||||
await prisma_client.update_data(
|
await prisma_client.update_data(
|
||||||
team_id=team_id, data={"spend": new_spend}, table_name="team"
|
team_id=team_id,
|
||||||
|
data={"spend": new_spend, "model_spend": spend_per_model},
|
||||||
|
table_name="team",
|
||||||
)
|
)
|
||||||
|
|
||||||
elif custom_db_client is not None:
|
elif custom_db_client is not None:
|
||||||
|
@ -1645,6 +1712,7 @@ async def generate_key_helper_fn(
|
||||||
key_alias: Optional[str] = None,
|
key_alias: Optional[str] = None,
|
||||||
allowed_cache_controls: Optional[list] = [],
|
allowed_cache_controls: Optional[list] = [],
|
||||||
permissions: Optional[dict] = {},
|
permissions: Optional[dict] = {},
|
||||||
|
model_max_budget: Optional[dict] = {},
|
||||||
):
|
):
|
||||||
global prisma_client, custom_db_client, user_api_key_cache
|
global prisma_client, custom_db_client, user_api_key_cache
|
||||||
|
|
||||||
|
@ -1678,6 +1746,8 @@ async def generate_key_helper_fn(
|
||||||
config_json = json.dumps(config)
|
config_json = json.dumps(config)
|
||||||
permissions_json = json.dumps(permissions)
|
permissions_json = json.dumps(permissions)
|
||||||
metadata_json = json.dumps(metadata)
|
metadata_json = json.dumps(metadata)
|
||||||
|
model_max_budget_json = json.dumps(model_max_budget)
|
||||||
|
|
||||||
user_id = user_id or str(uuid.uuid4())
|
user_id = user_id or str(uuid.uuid4())
|
||||||
user_role = user_role or "app_user"
|
user_role = user_role or "app_user"
|
||||||
tpm_limit = tpm_limit
|
tpm_limit = tpm_limit
|
||||||
|
@ -1720,6 +1790,7 @@ async def generate_key_helper_fn(
|
||||||
"budget_reset_at": key_reset_at,
|
"budget_reset_at": key_reset_at,
|
||||||
"allowed_cache_controls": allowed_cache_controls,
|
"allowed_cache_controls": allowed_cache_controls,
|
||||||
"permissions": permissions_json,
|
"permissions": permissions_json,
|
||||||
|
"model_max_budget": model_max_budget_json,
|
||||||
}
|
}
|
||||||
if (
|
if (
|
||||||
general_settings.get("allow_user_auth", False) == True
|
general_settings.get("allow_user_auth", False) == True
|
||||||
|
@ -1735,6 +1806,11 @@ async def generate_key_helper_fn(
|
||||||
saved_token["metadata"] = json.loads(saved_token["metadata"])
|
saved_token["metadata"] = json.loads(saved_token["metadata"])
|
||||||
if isinstance(saved_token["permissions"], str):
|
if isinstance(saved_token["permissions"], str):
|
||||||
saved_token["permissions"] = json.loads(saved_token["permissions"])
|
saved_token["permissions"] = json.loads(saved_token["permissions"])
|
||||||
|
if isinstance(saved_token["model_max_budget"], str):
|
||||||
|
saved_token["model_max_budget"] = json.loads(
|
||||||
|
saved_token["model_max_budget"]
|
||||||
|
)
|
||||||
|
|
||||||
if saved_token.get("expires", None) is not None and isinstance(
|
if saved_token.get("expires", None) is not None and isinstance(
|
||||||
saved_token["expires"], datetime
|
saved_token["expires"], datetime
|
||||||
):
|
):
|
||||||
|
@ -3078,6 +3154,20 @@ async def generate_key_fn(
|
||||||
- max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
- max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
||||||
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||||
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
|
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
|
||||||
|
- model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
1. Allow users to turn on/off pii masking
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location 'http://0.0.0.0:8000/key/generate' \
|
||||||
|
--header 'Authorization: Bearer sk-1234' \
|
||||||
|
--header 'Content-Type: application/json' \
|
||||||
|
--data '{
|
||||||
|
"permissions": {"allow_pii_controls": true}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
- key: (str) The generated api key
|
- key: (str) The generated api key
|
||||||
|
@ -4871,7 +4961,7 @@ async def auth_callback(request: Request):
|
||||||
if user_id is None:
|
if user_id is None:
|
||||||
user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")
|
user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")
|
||||||
response = await generate_key_helper_fn(
|
response = await generate_key_helper_fn(
|
||||||
**{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email} # type: ignore
|
**{"duration": "1hr", "key_max_budget": 0.01, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email} # type: ignore
|
||||||
)
|
)
|
||||||
key = response["token"] # type: ignore
|
key = response["token"] # type: ignore
|
||||||
user_id = response["user_id"] # type: ignore
|
user_id = response["user_id"] # type: ignore
|
||||||
|
|
|
@ -24,6 +24,8 @@ model LiteLLM_TeamTable {
|
||||||
budget_reset_at DateTime?
|
budget_reset_at DateTime?
|
||||||
created_at DateTime @default(now()) @map("created_at")
|
created_at DateTime @default(now()) @map("created_at")
|
||||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||||
|
model_spend Json @default("{}")
|
||||||
|
model_max_budget Json @default("{}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track spend, rate limit, budget Users
|
// Track spend, rate limit, budget Users
|
||||||
|
@ -41,6 +43,8 @@ model LiteLLM_UserTable {
|
||||||
budget_duration String?
|
budget_duration String?
|
||||||
budget_reset_at DateTime?
|
budget_reset_at DateTime?
|
||||||
allowed_cache_controls String[] @default([])
|
allowed_cache_controls String[] @default([])
|
||||||
|
model_spend Json @default("{}")
|
||||||
|
model_max_budget Json @default("{}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate Tokens for Proxy
|
// Generate Tokens for Proxy
|
||||||
|
@ -64,6 +68,8 @@ model LiteLLM_VerificationToken {
|
||||||
budget_duration String?
|
budget_duration String?
|
||||||
budget_reset_at DateTime?
|
budget_reset_at DateTime?
|
||||||
allowed_cache_controls String[] @default([])
|
allowed_cache_controls String[] @default([])
|
||||||
|
model_spend Json @default("{}")
|
||||||
|
model_max_budget Json @default("{}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// store proxy config.yaml
|
// store proxy config.yaml
|
||||||
|
|
|
@ -1379,6 +1379,7 @@ async def _read_request_body(request):
|
||||||
"""
|
"""
|
||||||
import ast, json
|
import ast, json
|
||||||
|
|
||||||
|
try:
|
||||||
request_data = {}
|
request_data = {}
|
||||||
if request is None:
|
if request is None:
|
||||||
return request_data
|
return request_data
|
||||||
|
@ -1392,6 +1393,8 @@ async def _read_request_body(request):
|
||||||
except:
|
except:
|
||||||
request_data = json.loads(body_str)
|
request_data = json.loads(body_str)
|
||||||
return request_data
|
return request_data
|
||||||
|
except:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
|
def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
|
||||||
|
@ -1408,6 +1411,22 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def _is_user_proxy_admin(user_id_information=None):
|
||||||
|
if (
|
||||||
|
user_id_information == None
|
||||||
|
or len(user_id_information) == 0
|
||||||
|
or user_id_information[0] == None
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
_user = user_id_information[0]
|
||||||
|
if (
|
||||||
|
_user.get("user_role", None) is not None
|
||||||
|
and _user.get("user_role") == "proxy_admin"
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
# LiteLLM Admin UI - Non SSO Login
|
# LiteLLM Admin UI - Non SSO Login
|
||||||
html_form = """
|
html_form = """
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
|
|
|
@ -1912,7 +1912,7 @@ def test_mistral_anyscale_stream():
|
||||||
# test_baseten_wizardLMcompletion_withbase()
|
# test_baseten_wizardLMcompletion_withbase()
|
||||||
|
|
||||||
# def test_baseten_mosaic_ML_completion_withbase():
|
# def test_baseten_mosaic_ML_completion_withbase():
|
||||||
# model_name = "31dxrj3"
|
# model_name = "31dxrj3",
|
||||||
# litellm.api_base = "https://app.baseten.co"
|
# litellm.api_base = "https://app.baseten.co"
|
||||||
# try:
|
# try:
|
||||||
# response = completion(model=model_name, messages=messages)
|
# response = completion(model=model_name, messages=messages)
|
||||||
|
|
|
@ -1101,6 +1101,116 @@ def test_call_with_key_over_budget(prisma_client):
|
||||||
print(vars(e))
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
|
def test_call_with_key_over_model_budget(prisma_client):
|
||||||
|
# 12. Make a call with a key over budget, expect to fail
|
||||||
|
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||||
|
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||||
|
try:
|
||||||
|
|
||||||
|
async def test():
|
||||||
|
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||||
|
|
||||||
|
# set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
|
||||||
|
request = GenerateKeyRequest(
|
||||||
|
max_budget=1000,
|
||||||
|
model_max_budget={
|
||||||
|
"chatgpt-v-2": 0.000001,
|
||||||
|
},
|
||||||
|
metadata={"user_api_key": 0.0001},
|
||||||
|
)
|
||||||
|
key = await generate_key_fn(request)
|
||||||
|
print(key)
|
||||||
|
|
||||||
|
generated_key = key.key
|
||||||
|
user_id = key.user_id
|
||||||
|
bearer_token = "Bearer " + generated_key
|
||||||
|
|
||||||
|
request = Request(scope={"type": "http"})
|
||||||
|
request._url = URL(url="/chat/completions")
|
||||||
|
|
||||||
|
async def return_body():
|
||||||
|
return b'{"model": "chatgpt-v-2"}'
|
||||||
|
|
||||||
|
request.body = return_body
|
||||||
|
|
||||||
|
# use generated key to auth in
|
||||||
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
|
print("result from user auth with new key", result)
|
||||||
|
|
||||||
|
# update spend using track_cost callback, make 2nd request, it should fail
|
||||||
|
from litellm.proxy.proxy_server import (
|
||||||
|
_PROXY_track_cost_callback as track_cost_callback,
|
||||||
|
)
|
||||||
|
from litellm import ModelResponse, Choices, Message, Usage
|
||||||
|
from litellm.caching import Cache
|
||||||
|
|
||||||
|
litellm.cache = Cache()
|
||||||
|
import time
|
||||||
|
|
||||||
|
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
|
||||||
|
|
||||||
|
resp = ModelResponse(
|
||||||
|
id=request_id,
|
||||||
|
choices=[
|
||||||
|
Choices(
|
||||||
|
finish_reason=None,
|
||||||
|
index=0,
|
||||||
|
message=Message(
|
||||||
|
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
||||||
|
role="assistant",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
],
|
||||||
|
model="gpt-35-turbo", # azure always has model written like this
|
||||||
|
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
||||||
|
)
|
||||||
|
await track_cost_callback(
|
||||||
|
kwargs={
|
||||||
|
"model": "chatgpt-v-2",
|
||||||
|
"stream": False,
|
||||||
|
"litellm_params": {
|
||||||
|
"metadata": {
|
||||||
|
"user_api_key": hash_token(generated_key),
|
||||||
|
"user_api_key_user_id": user_id,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"response_cost": 0.00002,
|
||||||
|
},
|
||||||
|
completion_response=resp,
|
||||||
|
start_time=datetime.now(),
|
||||||
|
end_time=datetime.now(),
|
||||||
|
)
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
# test spend_log was written and we can read it
|
||||||
|
spend_logs = await view_spend_logs(request_id=request_id)
|
||||||
|
|
||||||
|
print("read spend logs", spend_logs)
|
||||||
|
assert len(spend_logs) == 1
|
||||||
|
|
||||||
|
spend_log = spend_logs[0]
|
||||||
|
|
||||||
|
assert spend_log.request_id == request_id
|
||||||
|
assert spend_log.spend == float("2e-05")
|
||||||
|
assert spend_log.model == "chatgpt-v-2"
|
||||||
|
assert (
|
||||||
|
spend_log.cache_key
|
||||||
|
== "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
|
||||||
|
)
|
||||||
|
|
||||||
|
# use generated key to auth in
|
||||||
|
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||||
|
print("result from user auth with new key", result)
|
||||||
|
pytest.fail(f"This should have failed!. They key crossed it's budget")
|
||||||
|
|
||||||
|
asyncio.run(test())
|
||||||
|
except Exception as e:
|
||||||
|
# print(f"Error - {str(e)}")
|
||||||
|
traceback.print_exc()
|
||||||
|
error_detail = e.message
|
||||||
|
assert "Authentication Error, ExceededModelBudget:" in error_detail
|
||||||
|
print(vars(e))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio()
|
@pytest.mark.asyncio()
|
||||||
async def test_call_with_key_never_over_budget(prisma_client):
|
async def test_call_with_key_never_over_budget(prisma_client):
|
||||||
# Make a call with a key with budget=None, it should never fail
|
# Make a call with a key with budget=None, it should never fail
|
||||||
|
|
|
@ -89,7 +89,6 @@ from .exceptions import (
|
||||||
UnprocessableEntityError,
|
UnprocessableEntityError,
|
||||||
)
|
)
|
||||||
|
|
||||||
verbose_logger.debug(f"sys.path: {sys.path}")
|
|
||||||
try:
|
try:
|
||||||
from .proxy.enterprise.enterprise_callbacks.generic_api_callback import (
|
from .proxy.enterprise.enterprise_callbacks.generic_api_callback import (
|
||||||
GenericAPILogger,
|
GenericAPILogger,
|
||||||
|
|
|
@ -1,11 +1,16 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.25.0"
|
version = "1.25.2"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
|
[tool.poetry.urls]
|
||||||
|
homepage = "https://litellm.ai"
|
||||||
|
repository = "https://github.com/BerriAI/litellm"
|
||||||
|
documentation = "https://docs.litellm.ai"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = ">=3.8.1,<4.0, !=3.9.7"
|
python = ">=3.8.1,<4.0, !=3.9.7"
|
||||||
openai = ">=1.0.0"
|
openai = ">=1.0.0"
|
||||||
|
@ -69,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.25.0"
|
version = "1.25.2"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
|
@ -24,6 +24,8 @@ model LiteLLM_TeamTable {
|
||||||
budget_reset_at DateTime?
|
budget_reset_at DateTime?
|
||||||
created_at DateTime @default(now()) @map("created_at")
|
created_at DateTime @default(now()) @map("created_at")
|
||||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||||
|
model_spend Json @default("{}")
|
||||||
|
model_max_budget Json @default("{}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track spend, rate limit, budget Users
|
// Track spend, rate limit, budget Users
|
||||||
|
@ -41,6 +43,8 @@ model LiteLLM_UserTable {
|
||||||
budget_duration String?
|
budget_duration String?
|
||||||
budget_reset_at DateTime?
|
budget_reset_at DateTime?
|
||||||
allowed_cache_controls String[] @default([])
|
allowed_cache_controls String[] @default([])
|
||||||
|
model_spend Json @default("{}")
|
||||||
|
model_max_budget Json @default("{}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate Tokens for Proxy
|
// Generate Tokens for Proxy
|
||||||
|
@ -64,6 +68,8 @@ model LiteLLM_VerificationToken {
|
||||||
budget_duration String?
|
budget_duration String?
|
||||||
budget_reset_at DateTime?
|
budget_reset_at DateTime?
|
||||||
allowed_cache_controls String[] @default([])
|
allowed_cache_controls String[] @default([])
|
||||||
|
model_spend Json @default("{}")
|
||||||
|
model_max_budget Json @default("{}")
|
||||||
}
|
}
|
||||||
|
|
||||||
// store proxy config.yaml
|
// store proxy config.yaml
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-2322bcdc2ec71284.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S_8LZOnl2nyURq-NYnh2p\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
|
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-2322bcdc2ec71284.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["S_8LZOnl2nyURq-NYnh2p",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -239,7 +239,7 @@ export const userSpendLogsCall = async (
|
||||||
|
|
||||||
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/key/info`;
|
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
|
||||||
|
|
||||||
const response = await fetch(url, {
|
const response = await fetch(url, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
|
|
|
@ -49,6 +49,14 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
const [accessToken, setAccessToken] = useState<string | null>(null);
|
const [accessToken, setAccessToken] = useState<string | null>(null);
|
||||||
const [userModels, setUserModels] = useState<string[]>([]);
|
const [userModels, setUserModels] = useState<string[]>([]);
|
||||||
|
|
||||||
|
// check if window is not undefined
|
||||||
|
if (typeof window !== "undefined") {
|
||||||
|
window.addEventListener('beforeunload', function() {
|
||||||
|
// Clear session storage
|
||||||
|
sessionStorage.clear();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function formatUserRole(userRole: string) {
|
function formatUserRole(userRole: string) {
|
||||||
if (!userRole) {
|
if (!userRole) {
|
||||||
return "Undefined Role";
|
return "Undefined Role";
|
||||||
|
@ -70,6 +78,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
|
|
||||||
// Moved useEffect inside the component and used a condition to run fetch only if the params are available
|
// Moved useEffect inside the component and used a condition to run fetch only if the params are available
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
|
|
||||||
if (token) {
|
if (token) {
|
||||||
const decoded = jwtDecode(token) as { [key: string]: any };
|
const decoded = jwtDecode(token) as { [key: string]: any };
|
||||||
if (decoded) {
|
if (decoded) {
|
||||||
|
@ -97,9 +106,9 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (userID && accessToken && userRole && !data) {
|
if (userID && accessToken && userRole && !data) {
|
||||||
const cachedData = localStorage.getItem("userData" + userID);
|
const cachedData = sessionStorage.getItem("userData" + userID);
|
||||||
const cachedSpendData = localStorage.getItem("userSpendData" + userID);
|
const cachedSpendData = sessionStorage.getItem("userSpendData" + userID);
|
||||||
const cachedUserModels = localStorage.getItem("userModels" + userID);
|
const cachedUserModels = sessionStorage.getItem("userModels" + userID);
|
||||||
if (cachedData && cachedSpendData && cachedUserModels) {
|
if (cachedData && cachedSpendData && cachedUserModels) {
|
||||||
setData(JSON.parse(cachedData));
|
setData(JSON.parse(cachedData));
|
||||||
setUserSpendData(JSON.parse(cachedSpendData));
|
setUserSpendData(JSON.parse(cachedSpendData));
|
||||||
|
@ -111,8 +120,8 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
const response = await userInfoCall(accessToken, userID, userRole);
|
const response = await userInfoCall(accessToken, userID, userRole);
|
||||||
setUserSpendData(response["user_info"]);
|
setUserSpendData(response["user_info"]);
|
||||||
setData(response["keys"]); // Assuming this is the correct path to your data
|
setData(response["keys"]); // Assuming this is the correct path to your data
|
||||||
localStorage.setItem("userData" + userID, JSON.stringify(response["keys"]));
|
sessionStorage.setItem("userData" + userID, JSON.stringify(response["keys"]));
|
||||||
localStorage.setItem(
|
sessionStorage.setItem(
|
||||||
"userSpendData" + userID,
|
"userSpendData" + userID,
|
||||||
JSON.stringify(response["user_info"])
|
JSON.stringify(response["user_info"])
|
||||||
);
|
);
|
||||||
|
@ -126,7 +135,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
||||||
|
|
||||||
console.log("userModels:", userModels);
|
console.log("userModels:", userModels);
|
||||||
|
|
||||||
localStorage.setItem("userModels" + userID, JSON.stringify(available_model_names));
|
sessionStorage.setItem("userModels" + userID, JSON.stringify(available_model_names));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue