forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_llamaguard_custom_categories
This commit is contained in:
commit
038ba426ab
57 changed files with 585 additions and 364 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -40,7 +40,7 @@ ui/litellm-dashboard/node_modules
|
|||
ui/litellm-dashboard/next-env.d.ts
|
||||
ui/litellm-dashboard/package.json
|
||||
ui/litellm-dashboard/package-lock.json
|
||||
deploy/charts/litellm-helm/*.tgz
|
||||
deploy/charts/litellm-helm/charts/*
|
||||
deploy/charts/litellm/*.tgz
|
||||
deploy/charts/litellm/charts/*
|
||||
deploy/charts/*.tgz
|
||||
litellm/proxy/vertex_key.json
|
||||
|
|
|
@ -1,89 +0,0 @@
|
|||
{{- if .Values.ui.enabled -}}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "litellm.fullname" . }}-ui
|
||||
labels:
|
||||
{{- include "litellm.labels" . | nindent 4 }}
|
||||
spec:
|
||||
{{- if not .Values.ui.autoscaling.enabled }}
|
||||
replicas: {{ .Values.ui.replicaCount }}
|
||||
{{- end }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "litellm.ui.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
{{- with .Values.podAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "litellm.ui.labels" . | nindent 8 }}
|
||||
{{- with .Values.ui.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "litellm.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.ui.podSecurityContext | nindent 8 }}
|
||||
containers:
|
||||
- name: {{ include "litellm.name" . }}-ui
|
||||
securityContext:
|
||||
{{- toYaml .Values.ui.securityContext | nindent 12 }}
|
||||
image: "{{ .Values.ui.image.repository }}:{{ .Values.ui.image.tag | default (printf "main-%s" .Chart.AppVersion) }}"
|
||||
imagePullPolicy: {{ .Values.ui.image.pullPolicy }}
|
||||
env:
|
||||
- name: BASE_URL
|
||||
value: {{ (index .Values.ui.ingress.hosts 0).host | default "example.com" }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ .Values.ui.service.port }}
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
# Give the container time to start up. Up to 5 minutes (10 * 30 seconds)
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
failureThreshold: 30
|
||||
periodSeconds: 10
|
||||
resources:
|
||||
{{- toYaml .Values.ui.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
{{- with .Values.ui.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir:
|
||||
sizeLimit: 500Mi
|
||||
{{- with .Values.ui.volumes }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.ui.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.ui.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.ui.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
|
@ -1,61 +0,0 @@
|
|||
{{- if .Values.ui.ingress.enabled -}}
|
||||
{{- $fullName := (printf "%s%s" (include "litellm.fullname" .) "-ui") -}}
|
||||
{{- $svcPort := .Values.ui.service.port -}}
|
||||
{{- if and .Values.ui.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
||||
{{- if not (hasKey .Values.ui.ingress.annotations "kubernetes.io/ingress.class") }}
|
||||
{{- $_ := set .Values.ui.ingress.annotations "kubernetes.io/ingress.class" .Values.ui.ingress.className}}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
{{- else -}}
|
||||
apiVersion: extensions/v1beta1
|
||||
{{- end }}
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ $fullName }}
|
||||
labels:
|
||||
{{- include "litellm.ui.labels" . | nindent 4 }}
|
||||
{{- with .Values.ui.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if and .Values.ui.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
|
||||
ingressClassName: {{ .Values.ui.ingress.className }}
|
||||
{{- end }}
|
||||
{{- if .Values.ui.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ui.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- range .Values.ui.ingress.hosts }}
|
||||
- host: {{ .host | quote }}
|
||||
http:
|
||||
paths:
|
||||
{{- range .paths }}
|
||||
- path: {{ .path }}
|
||||
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
|
||||
pathType: {{ .pathType }}
|
||||
{{- end }}
|
||||
backend:
|
||||
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
|
||||
service:
|
||||
name: {{ $fullName }}
|
||||
port:
|
||||
number: {{ $svcPort }}
|
||||
{{- else }}
|
||||
serviceName: {{ $fullName }}
|
||||
servicePort: {{ $svcPort }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -1,17 +0,0 @@
|
|||
{{- if .Values.ui.enabled -}}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "litellm.fullname" . }}-ui
|
||||
labels:
|
||||
{{- include "litellm.labels" . | nindent 4 }}
|
||||
spec:
|
||||
type: {{ .Values.ui.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.ui.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
{{- include "litellm.ui.selectorLabels" . | nindent 4 }}
|
||||
{{ end -}}
|
|
@ -2,7 +2,7 @@ apiVersion: v2
|
|||
|
||||
# We can't call ourselves just "litellm" because then we couldn't publish to the
|
||||
# same OCI repository as the "litellm" OCI image
|
||||
name: litellm-helm
|
||||
name: litellm
|
||||
description: Call all LLM APIs using the OpenAI format
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
|
@ -18,17 +18,16 @@ type: application
|
|||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
version: 0.2.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: v1.18.9
|
||||
appVersion: v1.24.5
|
||||
|
||||
dependencies:
|
||||
- name: "postgresql"
|
||||
version: ">=13.3.0"
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
condition: db.deployStandalone
|
||||
|
|
@ -43,20 +43,6 @@ data:
|
|||
type: Opaque
|
||||
```
|
||||
|
||||
### LiteLLM Admin UI Settings
|
||||
|
||||
| Name | Description | Value |
|
||||
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
|
||||
| `ui.enabled` | Should the LiteLLM Admin UI be deployed | `true` |
|
||||
| `ui.replicaCount` | The number of LiteLLM Admin UI pods to be deployed | `1` |
|
||||
| `ui.image.repository` | LiteLLM Admin UI image repository | `ghcr.io/berriai/litellm` |
|
||||
| `ui.image.pullPolicy` | LiteLLM Admin UI image pull policy | `IfNotPresent` |
|
||||
| `ui.image.tag` | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published. | `""` |
|
||||
| `ui.imagePullSecrets` | Registry credentials for the above images. | `[]` |
|
||||
| `ui.service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` |
|
||||
| `ui.service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the web server will listen on. | `8000` |
|
||||
| `ui.ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A |
|
||||
|
||||
### Database Settings
|
||||
| Name | Description | Value |
|
||||
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
|
||||
|
@ -86,18 +72,18 @@ type: Opaque
|
|||
```
|
||||
|
||||
## Accessing the Admin UI
|
||||
When browsing to the URL published per the settings in `ui.ingress.*`, you will
|
||||
When browsing to the URL published per the settings in `ingress.*`, you will
|
||||
be prompted for **Admin Configuration**. The **Proxy Endpoint** is the internal
|
||||
(from the `litellm-ui` pod's perspective) URL published by the `litellm-proxy`
|
||||
(from the `litellm` pod's perspective) URL published by the `<RELEASE>-litellm`
|
||||
Kubernetes Service. If the deployment uses the default settings for this
|
||||
service, the **Proxy Endpoint** should be set to `http://litellm-proxy:8000`.
|
||||
service, the **Proxy Endpoint** should be set to `http://<RELEASE>-litellm:8000`.
|
||||
|
||||
The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
|
||||
was not provided to the helm command line, the `masterkey` is a randomly
|
||||
generated string stored in the `litellm-masterkey` Kubernetes Secret.
|
||||
generated string stored in the `<RELEASE>-litellm-masterkey` Kubernetes Secret.
|
||||
|
||||
```bash
|
||||
kubectl -n litellm get secret litellm-masterkey -o jsonpath="{.data.masterkey}"
|
||||
kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.masterkey}"
|
||||
```
|
||||
|
||||
## Admin UI Limitations
|
|
@ -41,14 +41,6 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
|||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
{{- define "litellm.ui.labels" -}}
|
||||
helm.sh/chart: {{ include "litellm.chart" . }}
|
||||
{{ include "litellm.ui.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
|
@ -57,10 +49,6 @@ Selector labels
|
|||
app.kubernetes.io/name: {{ include "litellm.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
{{- define "litellm.ui.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "litellm.name" . }}-ui
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
|
@ -1,7 +1,7 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "litellm.fullname" . }}-proxy
|
||||
name: {{ include "litellm.fullname" . }}
|
||||
labels:
|
||||
{{- include "litellm.labels" . | nindent 4 }}
|
||||
spec:
|
||||
|
@ -41,12 +41,12 @@ spec:
|
|||
- name: DATABASE_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "litellm.name" . }}-dbcredentials
|
||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||
key: username
|
||||
- name: PGPASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "litellm.name" . }}-dbcredentials
|
||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||
key: password
|
||||
- name: DATABASE_HOST
|
||||
value: {{ .Release.Name }}-postgresql
|
||||
|
@ -108,12 +108,12 @@ spec:
|
|||
- name: DATABASE_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "litellm.name" . }}-dbcredentials
|
||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||
key: username
|
||||
- name: DATABASE_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "litellm.name" . }}-dbcredentials
|
||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||
key: password
|
||||
- name: DATABASE_HOST
|
||||
value: {{ .Release.Name }}-postgresql
|
||||
|
@ -140,7 +140,7 @@ spec:
|
|||
- name: PROXY_MASTER_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "litellm.name" . }}-masterkey
|
||||
name: {{ include "litellm.fullname" . }}-masterkey
|
||||
key: masterkey
|
||||
envFrom:
|
||||
{{- range .Values.environmentSecrets }}
|
||||
|
@ -150,16 +150,7 @@ spec:
|
|||
args:
|
||||
- --config
|
||||
- /etc/litellm/config.yaml
|
||||
# command:
|
||||
# - bash
|
||||
# - -c
|
||||
# - |
|
||||
# ls -la /etc/litellm/; cat /etc/litellm/config.yaml; export
|
||||
# find / 2>/dev/null | grep -v -e '^/proc' -e '^/sys' -e '^/dev' >/tmp/before.list
|
||||
# prisma generate
|
||||
# find / 2>/dev/null | grep -v -e '^/proc' -e '^/sys' -e '^/dev' >/tmp/after.list
|
||||
# diff -ruN /tmp/before.list /tmp/after.list
|
||||
# sleep 3600
|
||||
- --run_gunicorn
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ .Values.service.port }}
|
|
@ -1,5 +1,5 @@
|
|||
{{- if .Values.ingress.enabled -}}
|
||||
{{- $fullName := (printf "%s%s" (include "litellm.fullname" .) "-proxy") -}}
|
||||
{{- $fullName := include "litellm.fullname" . -}}
|
||||
{{- $svcPort := .Values.service.port -}}
|
||||
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
||||
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
|
|
@ -2,7 +2,7 @@
|
|||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "litellm.name" . }}-dbcredentials
|
||||
name: {{ include "litellm.fullname" . }}-dbcredentials
|
||||
data:
|
||||
# Password for the "postgres" user
|
||||
postgres-password: {{ ( index .Values.postgresql.auth "postgres-password") | default "litellm" | b64enc }}
|
|
@ -2,7 +2,7 @@
|
|||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "litellm.name" . }}-masterkey
|
||||
name: {{ include "litellm.fullname" . }}-masterkey
|
||||
data:
|
||||
masterkey: {{ $masterkey | b64enc }}
|
||||
type: Opaque
|
|
@ -1,7 +1,7 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "litellm.fullname" . }}-proxy
|
||||
name: {{ include "litellm.fullname" . }}
|
||||
labels:
|
||||
{{- include "litellm.labels" . | nindent 4 }}
|
||||
spec:
|
|
@ -11,5 +11,5 @@ spec:
|
|||
- name: wget
|
||||
image: busybox
|
||||
command: ['wget']
|
||||
args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}']
|
||||
args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}/health/readiness']
|
||||
restartPolicy: Never
|
|
@ -5,7 +5,9 @@
|
|||
replicaCount: 1
|
||||
|
||||
image:
|
||||
repository: ghcr.io/berriai/litellm
|
||||
# Use "ghcr.io/berriai/litellm-database" for optimized image with database
|
||||
# Alternatively, use "ghcr.io/berriai/litellm" for the default image
|
||||
repository: ghcr.io/berriai/litellm-database
|
||||
pullPolicy: IfNotPresent
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
# tag: "main-latest"
|
||||
|
@ -56,7 +58,7 @@ service:
|
|||
port: 8000
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
enabled: false
|
||||
className: "nginx"
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
|
@ -71,6 +73,8 @@ ingress:
|
|||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
# masterkey: changeit
|
||||
|
||||
# The elements within proxy_config are rendered as config.yaml for the proxy
|
||||
# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
|
||||
# Reference: https://docs.litellm.ai/docs/proxy/configs
|
||||
|
@ -159,61 +163,6 @@ postgresql:
|
|||
|
||||
# A secret is created by this chart (litellm-helm) with the credentials that
|
||||
# the new Postgres instance should use.
|
||||
existingSecret: litellm-dbcredentials
|
||||
secretKeys:
|
||||
userPasswordKey: password
|
||||
|
||||
ui:
|
||||
enabled: true
|
||||
replicaCount: 1
|
||||
autoscaling:
|
||||
enabled: false
|
||||
image:
|
||||
repository: ghcr.io/berriai/litellm-ui
|
||||
pullPolicy: IfNotPresent
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
# tag: "main-latest"
|
||||
# TODO: Switch to BerryAI repo and tags if/when they provide a ui image
|
||||
# https://github.com/BerriAI/litellm/pull/1505
|
||||
tag: ""
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8501
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: "nginx"
|
||||
annotations: {}
|
||||
hosts:
|
||||
- host: ui.example.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
|
||||
podAnnotations: {}
|
||||
podLabels: {}
|
||||
|
||||
podSecurityContext:
|
||||
fsGroup: 1000
|
||||
|
||||
securityContext:
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
|
||||
resources: {}
|
||||
|
||||
volumes: []
|
||||
|
||||
volumeMounts: []
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
# existingSecret: ""
|
||||
# secretKeys:
|
||||
# userPasswordKey: password
|
|
@ -16,6 +16,34 @@ response = completion(
|
|||
)
|
||||
```
|
||||
|
||||
## Specifying Safety Settings
|
||||
In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
|
||||
|
||||
```python
|
||||
response = completion(
|
||||
model="gemini/gemini-pro",
|
||||
messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
|
||||
safety_settings=[
|
||||
{
|
||||
"category": "HARM_CATEGORY_HARASSMENT",
|
||||
"threshold": "BLOCK_NONE",
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_HATE_SPEECH",
|
||||
"threshold": "BLOCK_NONE",
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||
"threshold": "BLOCK_NONE",
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||
"threshold": "BLOCK_NONE",
|
||||
},
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
# Gemini-Pro-Vision
|
||||
LiteLLM Supports the following image types passed in `url`
|
||||
- Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg
|
||||
|
|
|
@ -538,17 +538,13 @@ model_list: # will route requests to the least busy ollama model
|
|||
api_base: "http://127.0.0.1:8003"
|
||||
```
|
||||
|
||||
## Max Parallel Requests
|
||||
|
||||
To rate limit a user based on the number of parallel requests, e.g.:
|
||||
if user's parallel requests > x, send a 429 error
|
||||
if user's parallel requests <= x, let them use the API freely.
|
||||
|
||||
set the max parallel request limit on the config.yaml (note: this expects the user to be passing in an api key).
|
||||
## Configure DB Pool Limits + Connection Timeouts
|
||||
|
||||
```yaml
|
||||
general_settings:
|
||||
max_parallel_requests: 100 # max parallel requests for a user = 100
|
||||
general_settings:
|
||||
database_connection_pool_limit: 100 # sets connection pool for prisma client to postgres db at 100
|
||||
database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db
|
||||
```
|
||||
|
||||
## All settings
|
||||
|
@ -577,6 +573,8 @@ general_settings:
|
|||
"key_management_system": "google_kms", # either google_kms or azure_kms
|
||||
"master_key": "string",
|
||||
"database_url": "string",
|
||||
"database_connection_pool_limit": 0, # default 100
|
||||
"database_connection_timeout": 0, # default 60s
|
||||
"database_type": "dynamo_db",
|
||||
"database_args": {
|
||||
"billing_mode": "PROVISIONED_THROUGHPUT",
|
||||
|
|
|
@ -151,10 +151,54 @@ kubectl port-forward service/litellm-service 4000:4000
|
|||
|
||||
Your OpenAI proxy server is now running on `http://0.0.0.0:4000`.
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="helm-deploy" label="Helm">
|
||||
|
||||
### Step 1. Clone the repository
|
||||
|
||||
```bash
|
||||
git clone https://github.com/BerriAI/litellm.git
|
||||
```
|
||||
|
||||
### Step 2. Deploy with Helm
|
||||
|
||||
```bash
|
||||
helm install \
|
||||
--set masterkey=SuPeRsEcReT \
|
||||
mydeploy \
|
||||
deploy/charts/litellm
|
||||
```
|
||||
|
||||
### Step 3. Expose the service to localhost
|
||||
|
||||
```bash
|
||||
kubectl \
|
||||
port-forward \
|
||||
service/mydeploy-litellm \
|
||||
8000:8000
|
||||
```
|
||||
|
||||
Your OpenAI proxy server is now running on `http://127.0.0.1:8000`.
|
||||
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## Setting SSL Certification
|
||||
## Advanced Deployment Settings
|
||||
|
||||
### Customization of the server root path
|
||||
|
||||
:::info
|
||||
|
||||
In a Kubernetes deployment, it's possible to utilize a shared DNS to host multiple applications by modifying the virtual service
|
||||
|
||||
:::
|
||||
|
||||
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
|
||||
|
||||
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
|
||||
|
||||
|
||||
### Setting SSL Certification
|
||||
|
||||
Use this, If you need to set ssl certificates for your on prem litellm proxy
|
||||
|
||||
|
|
|
@ -72,3 +72,78 @@ curl --location 'http://0.0.0.0:8000/key/generate' \
|
|||
```
|
||||
|
||||
|
||||
## Turn on/off per request
|
||||
|
||||
The proxy support 2 request-level PII controls:
|
||||
|
||||
- *no-pii*: Optional(bool) - Allow user to turn off pii masking per request.
|
||||
- *output_parse_pii*: Optional(bool) - Allow user to turn off pii output parsing per request.
|
||||
|
||||
### Usage
|
||||
|
||||
**Step 1. Create key with pii permissions**
|
||||
|
||||
Set `allow_pii_controls` to true for a given key. This will allow the user to set request-level PII controls.
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
||||
--header 'Authorization: Bearer my-master-key' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"permissions": {"allow_pii_controls": true}
|
||||
}'
|
||||
```
|
||||
|
||||
**Step 2. Turn off pii output parsing**
|
||||
|
||||
```python
|
||||
import os
|
||||
from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
# This is the default and can be omitted
|
||||
api_key=os.environ.get("OPENAI_API_KEY"),
|
||||
base_url="http://0.0.0.0:8000"
|
||||
)
|
||||
|
||||
chat_completion = client.chat.completions.create(
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "My name is Jane Doe, my number is 8382043839",
|
||||
}
|
||||
],
|
||||
model="gpt-3.5-turbo",
|
||||
extra_body={
|
||||
"content_safety": {"output_parse_pii": False}
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
**Step 3: See response**
|
||||
|
||||
```
|
||||
{
|
||||
"id": "chatcmpl-8c5qbGTILZa1S4CK3b31yj5N40hFN",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"content": "Hi [PERSON], what can I help you with?",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1704089632,
|
||||
"model": "gpt-35-turbo",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 47,
|
||||
"prompt_tokens": 12,
|
||||
"total_tokens": 59
|
||||
},
|
||||
"_response_ms": 1753.426
|
||||
}
|
||||
```
|
|
@ -93,6 +93,7 @@ Request Params:
|
|||
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
|
||||
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
|
||||
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
|
||||
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
|
||||
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
||||
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||
|
||||
|
@ -676,8 +677,6 @@ general_settings:
|
|||
|
||||
### [BETA] Dynamo DB
|
||||
|
||||
Only live in `v1.16.21.dev1`.
|
||||
|
||||
#### Step 1. Save keys to env
|
||||
|
||||
```shell
|
||||
|
|
|
@ -129,7 +129,7 @@ const sidebars = {
|
|||
"proxy/caching",
|
||||
{
|
||||
"type": "category",
|
||||
"label": "Logging, Alerting, Caching",
|
||||
"label": "Logging, Alerting",
|
||||
"items": [
|
||||
"proxy/logging",
|
||||
"proxy/alerting",
|
||||
|
|
|
@ -6,9 +6,4 @@ Code in this folder is licensed under a commercial license. Please review the [L
|
|||
|
||||
👉 **Using in an Enterprise / Need specific features ?** Meet with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat?month=2024-02)
|
||||
|
||||
## Enterprise Features:
|
||||
|
||||
- Track, View spend per tag https://docs.litellm.ai/docs/proxy/spend
|
||||
- Custom API / microservice callbacks
|
||||
- Google Text Moderation API
|
||||
|
||||
See all Enterprise Features here 👉 [Docs](https://docs.litellm.ai/docs/proxy/enterprise)
|
||||
|
|
|
@ -110,7 +110,6 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
|
|||
-1
|
||||
] # get the last response - llama guard has a 4k token limit
|
||||
self.set_custom_prompt_template(messages=[safety_check_messages])
|
||||
# print(f"self.model: {self.model}")
|
||||
response = await litellm.acompletion(
|
||||
model=self.model,
|
||||
messages=[safety_check_messages],
|
||||
|
|
|
@ -121,6 +121,13 @@ def completion(
|
|||
## Load Config
|
||||
inference_params = copy.deepcopy(optional_params)
|
||||
stream = inference_params.pop("stream", None)
|
||||
|
||||
# Handle safety settings
|
||||
safety_settings_param = inference_params.pop("safety_settings", None)
|
||||
safety_settings = None
|
||||
if safety_settings_param:
|
||||
safety_settings = [genai.types.SafetySettingDict(x) for x in safety_settings_param]
|
||||
|
||||
config = litellm.GeminiConfig.get_config()
|
||||
for k, v in config.items():
|
||||
if (
|
||||
|
@ -141,11 +148,13 @@ def completion(
|
|||
response = _model.generate_content(
|
||||
contents=prompt,
|
||||
generation_config=genai.types.GenerationConfig(**inference_params),
|
||||
safety_settings=safety_settings,
|
||||
)
|
||||
else:
|
||||
response = _model.generate_content(
|
||||
contents=prompt,
|
||||
generation_config=genai.types.GenerationConfig(**inference_params),
|
||||
safety_settings=safety_settings,
|
||||
stream=True,
|
||||
)
|
||||
return response
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-2322bcdc2ec71284.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S_8LZOnl2nyURq-NYnh2p\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
|
||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-2322bcdc2ec71284.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["S_8LZOnl2nyURq-NYnh2p",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -155,6 +155,9 @@ class GenerateKeyRequest(GenerateRequestBase):
|
|||
aliases: Optional[dict] = {}
|
||||
config: Optional[dict] = {}
|
||||
permissions: Optional[dict] = {}
|
||||
model_max_budget: Optional[dict] = (
|
||||
{}
|
||||
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
|
||||
|
||||
|
||||
class GenerateKeyResponse(GenerateKeyRequest):
|
||||
|
@ -167,7 +170,13 @@ class GenerateKeyResponse(GenerateKeyRequest):
|
|||
def set_model_info(cls, values):
|
||||
if values.get("token") is not None:
|
||||
values.update({"key": values.get("token")})
|
||||
dict_fields = ["metadata", "aliases", "config", "permissions"]
|
||||
dict_fields = [
|
||||
"metadata",
|
||||
"aliases",
|
||||
"config",
|
||||
"permissions",
|
||||
"model_max_budget",
|
||||
]
|
||||
for field in dict_fields:
|
||||
value = values.get(field)
|
||||
if value is not None and isinstance(value, str):
|
||||
|
@ -302,6 +311,13 @@ class ConfigGeneralSettings(LiteLLMBase):
|
|||
None,
|
||||
description="connect to a postgres db - needed for generating temporary keys + tracking spend / key",
|
||||
)
|
||||
database_connection_pool_limit: Optional[int] = Field(
|
||||
100,
|
||||
description="default connection pool for prisma client connecting to postgres db",
|
||||
)
|
||||
database_connection_timeout: Optional[float] = Field(
|
||||
60, description="default timeout for a connection to the database"
|
||||
)
|
||||
database_type: Optional[Literal["dynamo_db"]] = Field(
|
||||
None, description="to use dynamodb instead of postgres db"
|
||||
)
|
||||
|
@ -383,6 +399,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
|
|||
budget_reset_at: Optional[datetime] = None
|
||||
allowed_cache_controls: Optional[list] = []
|
||||
permissions: Dict = {}
|
||||
model_spend: Dict = {}
|
||||
model_max_budget: Dict = {}
|
||||
|
||||
|
||||
class UserAPIKeyAuth(
|
||||
|
@ -410,6 +428,8 @@ class LiteLLM_UserTable(LiteLLMBase):
|
|||
user_id: str
|
||||
max_budget: Optional[float]
|
||||
spend: float = 0.0
|
||||
model_max_budget: Optional[Dict] = {}
|
||||
model_spend: Optional[Dict] = {}
|
||||
user_email: Optional[str]
|
||||
models: list = []
|
||||
|
||||
|
|
|
@ -287,6 +287,8 @@ class DynamoDBWrapper(CustomDB):
|
|||
or k == "config"
|
||||
or k == "metadata"
|
||||
or k == "permissions"
|
||||
or k == "model_spend"
|
||||
or k == "model_max_budget"
|
||||
)
|
||||
and v is not None
|
||||
and isinstance(v, str)
|
||||
|
|
|
@ -119,6 +119,9 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
|||
call_type: str,
|
||||
):
|
||||
"""
|
||||
- Check if request turned off pii
|
||||
- Check if user allowed to turn off pii (key permissions -> 'allow_pii_controls')
|
||||
|
||||
- Take the request data
|
||||
- Call /analyze -> get the results
|
||||
- Call /anonymize w/ the analyze results -> get the redacted text
|
||||
|
@ -126,13 +129,59 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
|||
For multiple messages in /chat/completions, we'll need to call them in parallel.
|
||||
"""
|
||||
permissions = user_api_key_dict.permissions
|
||||
|
||||
if permissions.get("pii", True) == False: # allow key to turn off pii masking
|
||||
return data
|
||||
|
||||
output_parse_pii = permissions.get(
|
||||
"output_parse_pii", litellm.output_parse_pii
|
||||
) # allow key to turn on/off output parsing for pii
|
||||
no_pii = permissions.get(
|
||||
"no-pii", None
|
||||
) # allow key to turn on/off pii masking (if user is allowed to set pii controls, then they can override the key defaults)
|
||||
|
||||
if no_pii is None:
|
||||
# check older way of turning on/off pii
|
||||
no_pii = not permissions.get("pii", True)
|
||||
|
||||
content_safety = data.get("content_safety", None)
|
||||
verbose_proxy_logger.debug(f"content_safety: {content_safety}")
|
||||
## Request-level turn on/off PII controls ##
|
||||
if content_safety is not None and isinstance(content_safety, dict):
|
||||
# pii masking ##
|
||||
if (
|
||||
content_safety.get("no-pii", None) is not None
|
||||
and content_safety.get("no-pii") == True
|
||||
):
|
||||
# check if user allowed to turn this off
|
||||
if permissions.get("allow_pii_controls", False) == False:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={"error": "Not allowed to set PII controls per request"},
|
||||
)
|
||||
else: # user allowed to turn off pii masking
|
||||
no_pii = content_safety.get("no-pii")
|
||||
if not isinstance(no_pii, bool):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={"error": "no_pii needs to be a boolean value"},
|
||||
)
|
||||
## pii output parsing ##
|
||||
if content_safety.get("output_parse_pii", None) is not None:
|
||||
# check if user allowed to turn this off
|
||||
if permissions.get("allow_pii_controls", False) == False:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={"error": "Not allowed to set PII controls per request"},
|
||||
)
|
||||
else: # user allowed to turn on/off pii output parsing
|
||||
output_parse_pii = content_safety.get("output_parse_pii")
|
||||
if not isinstance(output_parse_pii, bool):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail={
|
||||
"error": "output_parse_pii needs to be a boolean value"
|
||||
},
|
||||
)
|
||||
|
||||
if no_pii == False: # turn off pii masking
|
||||
return data
|
||||
|
||||
if call_type == "completion": # /chat/completions requests
|
||||
messages = data["messages"]
|
||||
|
|
|
@ -409,6 +409,8 @@ def run_server(
|
|||
"uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`"
|
||||
)
|
||||
|
||||
db_connection_pool_limit = 100
|
||||
db_connection_timeout = 60
|
||||
if config is not None:
|
||||
"""
|
||||
Allow user to pass in db url via config
|
||||
|
@ -427,6 +429,12 @@ def run_server(
|
|||
proxy_config.load_config(router=None, config_file_path=config)
|
||||
)
|
||||
database_url = general_settings.get("database_url", None)
|
||||
db_connection_pool_limit = general_settings.get(
|
||||
"database_connection_pool_limit", 100
|
||||
)
|
||||
db_connection_timeout = general_settings.get(
|
||||
"database_connection_timeout", 60
|
||||
)
|
||||
if database_url and database_url.startswith("os.environ/"):
|
||||
original_dir = os.getcwd()
|
||||
# set the working directory to where this script is
|
||||
|
@ -447,14 +455,19 @@ def run_server(
|
|||
try:
|
||||
if os.getenv("DATABASE_URL", None) is not None:
|
||||
### add connection pool + pool timeout args
|
||||
params = {"connection_limit": 100, "pool_timeout": 60}
|
||||
params = {
|
||||
"connection_limit": db_connection_pool_limit,
|
||||
"pool_timeout": db_connection_timeout,
|
||||
}
|
||||
database_url = os.getenv("DATABASE_URL")
|
||||
modified_url = append_query_params(database_url, params)
|
||||
os.environ["DATABASE_URL"] = modified_url
|
||||
###
|
||||
if os.getenv("DIRECT_URL", None) is not None:
|
||||
### add connection pool + pool timeout args
|
||||
params = {"connection_limit": 100, "pool_timeout": 60}
|
||||
params = {
|
||||
"connection_limit": db_connection_pool_limit,
|
||||
"pool_timeout": db_connection_timeout,
|
||||
}
|
||||
database_url = os.getenv("DIRECT_URL")
|
||||
modified_url = append_query_params(database_url, params)
|
||||
os.environ["DIRECT_URL"] = modified_url
|
||||
|
|
|
@ -93,6 +93,7 @@ from litellm.proxy.utils import (
|
|||
html_form,
|
||||
_read_request_body,
|
||||
_is_valid_team_configs,
|
||||
_is_user_proxy_admin,
|
||||
)
|
||||
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
||||
import pydantic
|
||||
|
@ -143,6 +144,9 @@ app = FastAPI(
|
|||
title="LiteLLM API",
|
||||
description=f"Proxy Server to call 100+ LLMs in the OpenAI format\n\n{ui_message}",
|
||||
version=version,
|
||||
root_path=os.environ.get(
|
||||
"SERVER_ROOT_PATH", ""
|
||||
), # check if user passed root path, FastAPI defaults this value to ""
|
||||
)
|
||||
|
||||
|
||||
|
@ -376,6 +380,11 @@ async def user_api_key_auth(
|
|||
# 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
|
||||
# 4. If token is expired
|
||||
# 5. If token spend is under Budget for the token
|
||||
# 6. If token spend per model is under budget per model
|
||||
|
||||
request_data = await _read_request_body(
|
||||
request=request
|
||||
) # request data, used across all checks. Making this easily available
|
||||
|
||||
# Check 1. If token can call model
|
||||
litellm.model_alias_map = valid_token.aliases
|
||||
|
@ -450,7 +459,6 @@ async def user_api_key_auth(
|
|||
if (
|
||||
litellm.max_user_budget is not None
|
||||
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
|
||||
request_data = await _read_request_body(request=request)
|
||||
user_passed_to_chat_completions = request_data.get("user", None)
|
||||
if user_passed_to_chat_completions is not None:
|
||||
user_id_list.append(user_passed_to_chat_completions)
|
||||
|
@ -496,11 +504,7 @@ async def user_api_key_auth(
|
|||
continue
|
||||
assert isinstance(_user, dict)
|
||||
# check if user is admin #
|
||||
if (
|
||||
_user.get("user_role", None) is not None
|
||||
and _user.get("user_role") == "proxy_admin"
|
||||
):
|
||||
return UserAPIKeyAuth(api_key=master_key)
|
||||
|
||||
# Token exists, not expired now check if its in budget for the user
|
||||
user_max_budget = _user.get("max_budget", None)
|
||||
user_current_spend = _user.get("spend", None)
|
||||
|
@ -587,6 +591,25 @@ async def user_api_key_auth(
|
|||
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
|
||||
)
|
||||
|
||||
# Check 5. Token Model Spend is under Model budget
|
||||
max_budget_per_model = valid_token.model_max_budget
|
||||
spend_per_model = valid_token.model_spend
|
||||
|
||||
if max_budget_per_model is not None and spend_per_model is not None:
|
||||
current_model = request_data.get("model")
|
||||
if current_model is not None:
|
||||
current_model_spend = spend_per_model.get(current_model, None)
|
||||
current_model_budget = max_budget_per_model.get(current_model, None)
|
||||
|
||||
if (
|
||||
current_model_spend is not None
|
||||
and current_model_budget is not None
|
||||
):
|
||||
if current_model_spend > current_model_budget:
|
||||
raise Exception(
|
||||
f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
|
||||
)
|
||||
|
||||
# Token passed all checks
|
||||
api_key = valid_token.token
|
||||
|
||||
|
@ -616,11 +639,15 @@ async def user_api_key_auth(
|
|||
)
|
||||
)
|
||||
if (
|
||||
route.startswith("/key/")
|
||||
or route.startswith("/user/")
|
||||
or route.startswith("/model/")
|
||||
or route.startswith("/spend/")
|
||||
) and (not is_master_key_valid):
|
||||
(
|
||||
route.startswith("/key/")
|
||||
or route.startswith("/user/")
|
||||
or route.startswith("/model/")
|
||||
or route.startswith("/spend/")
|
||||
)
|
||||
and (not is_master_key_valid)
|
||||
and (not _is_user_proxy_admin(user_id_information))
|
||||
):
|
||||
allow_user_auth = False
|
||||
if (
|
||||
general_settings.get("allow_user_auth", False) == True
|
||||
|
@ -712,9 +739,12 @@ async def user_api_key_auth(
|
|||
# Do something if the current route starts with any of the allowed routes
|
||||
pass
|
||||
else:
|
||||
raise Exception(
|
||||
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
|
||||
)
|
||||
if _is_user_proxy_admin(user_id_information):
|
||||
pass
|
||||
else:
|
||||
raise Exception(
|
||||
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
|
||||
)
|
||||
return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
|
||||
except Exception as e:
|
||||
# verbose_proxy_logger.debug(f"An exception occurred - {traceback.format_exc()}")
|
||||
|
@ -937,13 +967,26 @@ async def update_database(
|
|||
# Calculate the new cost by adding the existing cost and response_cost
|
||||
existing_spend_obj.spend = existing_spend + response_cost
|
||||
|
||||
# track cost per model, for the given user
|
||||
spend_per_model = existing_spend_obj.model_spend or {}
|
||||
current_model = kwargs.get("model")
|
||||
|
||||
if current_model is not None and spend_per_model is not None:
|
||||
if spend_per_model.get(current_model) is None:
|
||||
spend_per_model[current_model] = response_cost
|
||||
else:
|
||||
spend_per_model[current_model] += response_cost
|
||||
existing_spend_obj.model_spend = spend_per_model
|
||||
|
||||
valid_token = user_api_key_cache.get_cache(key=id)
|
||||
if valid_token is not None and isinstance(valid_token, dict):
|
||||
user_api_key_cache.set_cache(
|
||||
key=id, value=existing_spend_obj.json()
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(f"new cost: {existing_spend_obj.spend}")
|
||||
verbose_proxy_logger.debug(
|
||||
f"user - new cost: {existing_spend_obj.spend}, user_id: {id}"
|
||||
)
|
||||
data_list.append(existing_spend_obj)
|
||||
|
||||
# Update the cost column for the given user id
|
||||
|
@ -980,15 +1023,28 @@ async def update_database(
|
|||
# Calculate the new cost by adding the existing cost and response_cost
|
||||
new_spend = existing_spend + response_cost
|
||||
|
||||
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
||||
# track cost per model, for the given key
|
||||
spend_per_model = existing_spend_obj.model_spend or {}
|
||||
current_model = kwargs.get("model")
|
||||
if current_model is not None and spend_per_model is not None:
|
||||
if spend_per_model.get(current_model) is None:
|
||||
spend_per_model[current_model] = response_cost
|
||||
else:
|
||||
spend_per_model[current_model] += response_cost
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
f"new cost: {new_spend}, new spend per model: {spend_per_model}"
|
||||
)
|
||||
# Update the cost column for the given token
|
||||
await prisma_client.update_data(
|
||||
token=token, data={"spend": new_spend}
|
||||
token=token,
|
||||
data={"spend": new_spend, "model_spend": spend_per_model},
|
||||
)
|
||||
|
||||
valid_token = user_api_key_cache.get_cache(key=token)
|
||||
if valid_token is not None:
|
||||
valid_token.spend = new_spend
|
||||
valid_token.model_spend = spend_per_model
|
||||
user_api_key_cache.set_cache(key=token, value=valid_token)
|
||||
elif custom_db_client is not None:
|
||||
# Fetch the existing cost for the given token
|
||||
|
@ -1068,10 +1124,21 @@ async def update_database(
|
|||
# Calculate the new cost by adding the existing cost and response_cost
|
||||
new_spend = existing_spend + response_cost
|
||||
|
||||
# track cost per model, for the given team
|
||||
spend_per_model = existing_spend_obj.model_spend or {}
|
||||
current_model = kwargs.get("model")
|
||||
if current_model is not None and spend_per_model is not None:
|
||||
if spend_per_model.get(current_model) is None:
|
||||
spend_per_model[current_model] = response_cost
|
||||
else:
|
||||
spend_per_model[current_model] += response_cost
|
||||
|
||||
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
||||
# Update the cost column for the given token
|
||||
await prisma_client.update_data(
|
||||
team_id=team_id, data={"spend": new_spend}, table_name="team"
|
||||
team_id=team_id,
|
||||
data={"spend": new_spend, "model_spend": spend_per_model},
|
||||
table_name="team",
|
||||
)
|
||||
|
||||
elif custom_db_client is not None:
|
||||
|
@ -1645,6 +1712,7 @@ async def generate_key_helper_fn(
|
|||
key_alias: Optional[str] = None,
|
||||
allowed_cache_controls: Optional[list] = [],
|
||||
permissions: Optional[dict] = {},
|
||||
model_max_budget: Optional[dict] = {},
|
||||
):
|
||||
global prisma_client, custom_db_client, user_api_key_cache
|
||||
|
||||
|
@ -1678,6 +1746,8 @@ async def generate_key_helper_fn(
|
|||
config_json = json.dumps(config)
|
||||
permissions_json = json.dumps(permissions)
|
||||
metadata_json = json.dumps(metadata)
|
||||
model_max_budget_json = json.dumps(model_max_budget)
|
||||
|
||||
user_id = user_id or str(uuid.uuid4())
|
||||
user_role = user_role or "app_user"
|
||||
tpm_limit = tpm_limit
|
||||
|
@ -1720,6 +1790,7 @@ async def generate_key_helper_fn(
|
|||
"budget_reset_at": key_reset_at,
|
||||
"allowed_cache_controls": allowed_cache_controls,
|
||||
"permissions": permissions_json,
|
||||
"model_max_budget": model_max_budget_json,
|
||||
}
|
||||
if (
|
||||
general_settings.get("allow_user_auth", False) == True
|
||||
|
@ -1735,6 +1806,11 @@ async def generate_key_helper_fn(
|
|||
saved_token["metadata"] = json.loads(saved_token["metadata"])
|
||||
if isinstance(saved_token["permissions"], str):
|
||||
saved_token["permissions"] = json.loads(saved_token["permissions"])
|
||||
if isinstance(saved_token["model_max_budget"], str):
|
||||
saved_token["model_max_budget"] = json.loads(
|
||||
saved_token["model_max_budget"]
|
||||
)
|
||||
|
||||
if saved_token.get("expires", None) is not None and isinstance(
|
||||
saved_token["expires"], datetime
|
||||
):
|
||||
|
@ -3078,6 +3154,20 @@ async def generate_key_fn(
|
|||
- max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
|
||||
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
|
||||
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
|
||||
- model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
|
||||
|
||||
Examples:
|
||||
|
||||
1. Allow users to turn on/off pii masking
|
||||
|
||||
```bash
|
||||
curl --location 'http://0.0.0.0:8000/key/generate' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"permissions": {"allow_pii_controls": true}
|
||||
}'
|
||||
```
|
||||
|
||||
Returns:
|
||||
- key: (str) The generated api key
|
||||
|
@ -4871,7 +4961,7 @@ async def auth_callback(request: Request):
|
|||
if user_id is None:
|
||||
user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")
|
||||
response = await generate_key_helper_fn(
|
||||
**{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email} # type: ignore
|
||||
**{"duration": "1hr", "key_max_budget": 0.01, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email} # type: ignore
|
||||
)
|
||||
key = response["token"] # type: ignore
|
||||
user_id = response["user_id"] # type: ignore
|
||||
|
|
|
@ -24,6 +24,8 @@ model LiteLLM_TeamTable {
|
|||
budget_reset_at DateTime?
|
||||
created_at DateTime @default(now()) @map("created_at")
|
||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
}
|
||||
|
||||
// Track spend, rate limit, budget Users
|
||||
|
@ -41,6 +43,8 @@ model LiteLLM_UserTable {
|
|||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
}
|
||||
|
||||
// Generate Tokens for Proxy
|
||||
|
@ -64,6 +68,8 @@ model LiteLLM_VerificationToken {
|
|||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
}
|
||||
|
||||
// store proxy config.yaml
|
||||
|
|
|
@ -1379,19 +1379,22 @@ async def _read_request_body(request):
|
|||
"""
|
||||
import ast, json
|
||||
|
||||
request_data = {}
|
||||
if request is None:
|
||||
return request_data
|
||||
body = await request.body()
|
||||
|
||||
if body == b"" or body is None:
|
||||
return request_data
|
||||
body_str = body.decode()
|
||||
try:
|
||||
request_data = ast.literal_eval(body_str)
|
||||
request_data = {}
|
||||
if request is None:
|
||||
return request_data
|
||||
body = await request.body()
|
||||
|
||||
if body == b"" or body is None:
|
||||
return request_data
|
||||
body_str = body.decode()
|
||||
try:
|
||||
request_data = ast.literal_eval(body_str)
|
||||
except:
|
||||
request_data = json.loads(body_str)
|
||||
return request_data
|
||||
except:
|
||||
request_data = json.loads(body_str)
|
||||
return request_data
|
||||
return {}
|
||||
|
||||
|
||||
def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
|
||||
|
@ -1408,6 +1411,22 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
|
|||
return
|
||||
|
||||
|
||||
def _is_user_proxy_admin(user_id_information=None):
|
||||
if (
|
||||
user_id_information == None
|
||||
or len(user_id_information) == 0
|
||||
or user_id_information[0] == None
|
||||
):
|
||||
return False
|
||||
_user = user_id_information[0]
|
||||
if (
|
||||
_user.get("user_role", None) is not None
|
||||
and _user.get("user_role") == "proxy_admin"
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
# LiteLLM Admin UI - Non SSO Login
|
||||
html_form = """
|
||||
<!DOCTYPE html>
|
||||
|
|
|
@ -1912,7 +1912,7 @@ def test_mistral_anyscale_stream():
|
|||
# test_baseten_wizardLMcompletion_withbase()
|
||||
|
||||
# def test_baseten_mosaic_ML_completion_withbase():
|
||||
# model_name = "31dxrj3"
|
||||
# model_name = "31dxrj3",
|
||||
# litellm.api_base = "https://app.baseten.co"
|
||||
# try:
|
||||
# response = completion(model=model_name, messages=messages)
|
||||
|
|
|
@ -1101,6 +1101,116 @@ def test_call_with_key_over_budget(prisma_client):
|
|||
print(vars(e))
|
||||
|
||||
|
||||
def test_call_with_key_over_model_budget(prisma_client):
|
||||
# 12. Make a call with a key over budget, expect to fail
|
||||
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
|
||||
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
|
||||
try:
|
||||
|
||||
async def test():
|
||||
await litellm.proxy.proxy_server.prisma_client.connect()
|
||||
|
||||
# set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
|
||||
request = GenerateKeyRequest(
|
||||
max_budget=1000,
|
||||
model_max_budget={
|
||||
"chatgpt-v-2": 0.000001,
|
||||
},
|
||||
metadata={"user_api_key": 0.0001},
|
||||
)
|
||||
key = await generate_key_fn(request)
|
||||
print(key)
|
||||
|
||||
generated_key = key.key
|
||||
user_id = key.user_id
|
||||
bearer_token = "Bearer " + generated_key
|
||||
|
||||
request = Request(scope={"type": "http"})
|
||||
request._url = URL(url="/chat/completions")
|
||||
|
||||
async def return_body():
|
||||
return b'{"model": "chatgpt-v-2"}'
|
||||
|
||||
request.body = return_body
|
||||
|
||||
# use generated key to auth in
|
||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||
print("result from user auth with new key", result)
|
||||
|
||||
# update spend using track_cost callback, make 2nd request, it should fail
|
||||
from litellm.proxy.proxy_server import (
|
||||
_PROXY_track_cost_callback as track_cost_callback,
|
||||
)
|
||||
from litellm import ModelResponse, Choices, Message, Usage
|
||||
from litellm.caching import Cache
|
||||
|
||||
litellm.cache = Cache()
|
||||
import time
|
||||
|
||||
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
|
||||
|
||||
resp = ModelResponse(
|
||||
id=request_id,
|
||||
choices=[
|
||||
Choices(
|
||||
finish_reason=None,
|
||||
index=0,
|
||||
message=Message(
|
||||
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
|
||||
role="assistant",
|
||||
),
|
||||
)
|
||||
],
|
||||
model="gpt-35-turbo", # azure always has model written like this
|
||||
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
|
||||
)
|
||||
await track_cost_callback(
|
||||
kwargs={
|
||||
"model": "chatgpt-v-2",
|
||||
"stream": False,
|
||||
"litellm_params": {
|
||||
"metadata": {
|
||||
"user_api_key": hash_token(generated_key),
|
||||
"user_api_key_user_id": user_id,
|
||||
}
|
||||
},
|
||||
"response_cost": 0.00002,
|
||||
},
|
||||
completion_response=resp,
|
||||
start_time=datetime.now(),
|
||||
end_time=datetime.now(),
|
||||
)
|
||||
await asyncio.sleep(10)
|
||||
# test spend_log was written and we can read it
|
||||
spend_logs = await view_spend_logs(request_id=request_id)
|
||||
|
||||
print("read spend logs", spend_logs)
|
||||
assert len(spend_logs) == 1
|
||||
|
||||
spend_log = spend_logs[0]
|
||||
|
||||
assert spend_log.request_id == request_id
|
||||
assert spend_log.spend == float("2e-05")
|
||||
assert spend_log.model == "chatgpt-v-2"
|
||||
assert (
|
||||
spend_log.cache_key
|
||||
== "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
|
||||
)
|
||||
|
||||
# use generated key to auth in
|
||||
result = await user_api_key_auth(request=request, api_key=bearer_token)
|
||||
print("result from user auth with new key", result)
|
||||
pytest.fail(f"This should have failed!. They key crossed it's budget")
|
||||
|
||||
asyncio.run(test())
|
||||
except Exception as e:
|
||||
# print(f"Error - {str(e)}")
|
||||
traceback.print_exc()
|
||||
error_detail = e.message
|
||||
assert "Authentication Error, ExceededModelBudget:" in error_detail
|
||||
print(vars(e))
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_call_with_key_never_over_budget(prisma_client):
|
||||
# Make a call with a key with budget=None, it should never fail
|
||||
|
|
|
@ -89,7 +89,6 @@ from .exceptions import (
|
|||
UnprocessableEntityError,
|
||||
)
|
||||
|
||||
verbose_logger.debug(f"sys.path: {sys.path}")
|
||||
try:
|
||||
from .proxy.enterprise.enterprise_callbacks.generic_api_callback import (
|
||||
GenericAPILogger,
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.25.0"
|
||||
version = "1.25.2"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.urls]
|
||||
homepage = "https://litellm.ai"
|
||||
repository = "https://github.com/BerriAI/litellm"
|
||||
documentation = "https://docs.litellm.ai"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0, !=3.9.7"
|
||||
openai = ">=1.0.0"
|
||||
|
@ -69,7 +74,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.25.0"
|
||||
version = "1.25.2"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -24,6 +24,8 @@ model LiteLLM_TeamTable {
|
|||
budget_reset_at DateTime?
|
||||
created_at DateTime @default(now()) @map("created_at")
|
||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
}
|
||||
|
||||
// Track spend, rate limit, budget Users
|
||||
|
@ -41,6 +43,8 @@ model LiteLLM_UserTable {
|
|||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
}
|
||||
|
||||
// Generate Tokens for Proxy
|
||||
|
@ -64,6 +68,8 @@ model LiteLLM_VerificationToken {
|
|||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
}
|
||||
|
||||
// store proxy config.yaml
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-2322bcdc2ec71284.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S_8LZOnl2nyURq-NYnh2p\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
|
||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-2322bcdc2ec71284.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["S_8LZOnl2nyURq-NYnh2p",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -239,7 +239,7 @@ export const userSpendLogsCall = async (
|
|||
|
||||
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
|
||||
try {
|
||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/key/info`;
|
||||
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: "POST",
|
||||
|
|
|
@ -49,6 +49,14 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
|||
const [accessToken, setAccessToken] = useState<string | null>(null);
|
||||
const [userModels, setUserModels] = useState<string[]>([]);
|
||||
|
||||
// check if window is not undefined
|
||||
if (typeof window !== "undefined") {
|
||||
window.addEventListener('beforeunload', function() {
|
||||
// Clear session storage
|
||||
sessionStorage.clear();
|
||||
});
|
||||
}
|
||||
|
||||
function formatUserRole(userRole: string) {
|
||||
if (!userRole) {
|
||||
return "Undefined Role";
|
||||
|
@ -70,6 +78,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
|||
|
||||
// Moved useEffect inside the component and used a condition to run fetch only if the params are available
|
||||
useEffect(() => {
|
||||
|
||||
if (token) {
|
||||
const decoded = jwtDecode(token) as { [key: string]: any };
|
||||
if (decoded) {
|
||||
|
@ -97,22 +106,22 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
|||
}
|
||||
}
|
||||
if (userID && accessToken && userRole && !data) {
|
||||
const cachedData = localStorage.getItem("userData" + userID);
|
||||
const cachedSpendData = localStorage.getItem("userSpendData" + userID);
|
||||
const cachedUserModels = localStorage.getItem("userModels" + userID);
|
||||
const cachedData = sessionStorage.getItem("userData" + userID);
|
||||
const cachedSpendData = sessionStorage.getItem("userSpendData" + userID);
|
||||
const cachedUserModels = sessionStorage.getItem("userModels" + userID);
|
||||
if (cachedData && cachedSpendData && cachedUserModels) {
|
||||
setData(JSON.parse(cachedData));
|
||||
setUserSpendData(JSON.parse(cachedSpendData));
|
||||
setUserModels(JSON.parse(cachedUserModels));
|
||||
|
||||
|
||||
} else {
|
||||
const fetchData = async () => {
|
||||
try {
|
||||
const response = await userInfoCall(accessToken, userID, userRole);
|
||||
setUserSpendData(response["user_info"]);
|
||||
setData(response["keys"]); // Assuming this is the correct path to your data
|
||||
localStorage.setItem("userData" + userID, JSON.stringify(response["keys"]));
|
||||
localStorage.setItem(
|
||||
sessionStorage.setItem("userData" + userID, JSON.stringify(response["keys"]));
|
||||
sessionStorage.setItem(
|
||||
"userSpendData" + userID,
|
||||
JSON.stringify(response["user_info"])
|
||||
);
|
||||
|
@ -126,7 +135,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
|
|||
|
||||
console.log("userModels:", userModels);
|
||||
|
||||
localStorage.setItem("userModels" + userID, JSON.stringify(available_model_names));
|
||||
sessionStorage.setItem("userModels" + userID, JSON.stringify(available_model_names));
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue