Merge branch 'main' into litellm_llamaguard_custom_categories

This commit is contained in:
Krish Dholakia 2024-02-17 21:36:40 -08:00 committed by GitHub
commit 038ba426ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
57 changed files with 585 additions and 364 deletions

4
.gitignore vendored
View file

@ -40,7 +40,7 @@ ui/litellm-dashboard/node_modules
ui/litellm-dashboard/next-env.d.ts
ui/litellm-dashboard/package.json
ui/litellm-dashboard/package-lock.json
deploy/charts/litellm-helm/*.tgz
deploy/charts/litellm-helm/charts/*
deploy/charts/litellm/*.tgz
deploy/charts/litellm/charts/*
deploy/charts/*.tgz
litellm/proxy/vertex_key.json

View file

@ -1,89 +0,0 @@
{{- if .Values.ui.enabled -}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "litellm.fullname" . }}-ui
labels:
{{- include "litellm.labels" . | nindent 4 }}
spec:
{{- if not .Values.ui.autoscaling.enabled }}
replicas: {{ .Values.ui.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "litellm.ui.selectorLabels" . | nindent 6 }}
template:
metadata:
{{- with .Values.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "litellm.ui.labels" . | nindent 8 }}
{{- with .Values.ui.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "litellm.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.ui.podSecurityContext | nindent 8 }}
containers:
- name: {{ include "litellm.name" . }}-ui
securityContext:
{{- toYaml .Values.ui.securityContext | nindent 12 }}
image: "{{ .Values.ui.image.repository }}:{{ .Values.ui.image.tag | default (printf "main-%s" .Chart.AppVersion) }}"
imagePullPolicy: {{ .Values.ui.image.pullPolicy }}
env:
- name: BASE_URL
value: {{ (index .Values.ui.ingress.hosts 0).host | default "example.com" }}
ports:
- name: http
containerPort: {{ .Values.ui.service.port }}
protocol: TCP
livenessProbe:
httpGet:
path: /
port: http
readinessProbe:
httpGet:
path: /
port: http
# Give the container time to start up. Up to 5 minutes (10 * 30 seconds)
startupProbe:
httpGet:
path: /
port: http
failureThreshold: 30
periodSeconds: 10
resources:
{{- toYaml .Values.ui.resources | nindent 12 }}
volumeMounts:
- name: tmp
mountPath: /tmp
{{- with .Values.ui.volumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
- name: tmp
emptyDir:
sizeLimit: 500Mi
{{- with .Values.ui.volumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.ui.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.ui.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.ui.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end -}}

View file

@ -1,61 +0,0 @@
{{- if .Values.ui.ingress.enabled -}}
{{- $fullName := (printf "%s%s" (include "litellm.fullname" .) "-ui") -}}
{{- $svcPort := .Values.ui.service.port -}}
{{- if and .Values.ui.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ui.ingress.annotations "kubernetes.io/ingress.class") }}
{{- $_ := set .Values.ui.ingress.annotations "kubernetes.io/ingress.class" .Values.ui.ingress.className}}
{{- end }}
{{- end }}
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
apiVersion: networking.k8s.io/v1beta1
{{- else -}}
apiVersion: extensions/v1beta1
{{- end }}
kind: Ingress
metadata:
name: {{ $fullName }}
labels:
{{- include "litellm.ui.labels" . | nindent 4 }}
{{- with .Values.ui.ingress.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
{{- if and .Values.ui.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
ingressClassName: {{ .Values.ui.ingress.className }}
{{- end }}
{{- if .Values.ui.ingress.tls }}
tls:
{{- range .Values.ui.ingress.tls }}
- hosts:
{{- range .hosts }}
- {{ . | quote }}
{{- end }}
secretName: {{ .secretName }}
{{- end }}
{{- end }}
rules:
{{- range .Values.ui.ingress.hosts }}
- host: {{ .host | quote }}
http:
paths:
{{- range .paths }}
- path: {{ .path }}
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
pathType: {{ .pathType }}
{{- end }}
backend:
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
service:
name: {{ $fullName }}
port:
number: {{ $svcPort }}
{{- else }}
serviceName: {{ $fullName }}
servicePort: {{ $svcPort }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}

View file

@ -1,17 +0,0 @@
{{- if .Values.ui.enabled -}}
apiVersion: v1
kind: Service
metadata:
name: {{ include "litellm.fullname" . }}-ui
labels:
{{- include "litellm.labels" . | nindent 4 }}
spec:
type: {{ .Values.ui.service.type }}
ports:
- port: {{ .Values.ui.service.port }}
targetPort: http
protocol: TCP
name: http
selector:
{{- include "litellm.ui.selectorLabels" . | nindent 4 }}
{{ end -}}

View file

@ -2,7 +2,7 @@ apiVersion: v2
# We can't call ourselves just "litellm" because then we couldn't publish to the
# same OCI repository as the "litellm" OCI image
name: litellm-helm
name: litellm
description: Call all LLM APIs using the OpenAI format
# A chart can be either an 'application' or a 'library' chart.
@ -18,17 +18,16 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
version: 0.2.0
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: v1.18.9
appVersion: v1.24.5
dependencies:
- name: "postgresql"
version: ">=13.3.0"
repository: oci://registry-1.docker.io/bitnamicharts
condition: db.deployStandalone

View file

@ -43,20 +43,6 @@ data:
type: Opaque
```
### LiteLLM Admin UI Settings
| Name | Description | Value |
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
| `ui.enabled` | Should the LiteLLM Admin UI be deployed | `true` |
| `ui.replicaCount` | The number of LiteLLM Admin UI pods to be deployed | `1` |
| `ui.image.repository` | LiteLLM Admin UI image repository | `ghcr.io/berriai/litellm` |
| `ui.image.pullPolicy` | LiteLLM Admin UI image pull policy | `IfNotPresent` |
| `ui.image.tag` | Overrides the image tag whose default the latest version of LiteLLM at the time this chart was published. | `""` |
| `ui.imagePullSecrets` | Registry credentials for the above images. | `[]` |
| `ui.service.type` | Kubernetes Service type (e.g. `LoadBalancer`, `ClusterIP`, etc.) | `ClusterIP` |
| `ui.service.port` | TCP port that the Kubernetes Service will listen on. Also the TCP port within the Pod that the web server will listen on. | `8000` |
| `ui.ingress.*` | See [values.yaml](./values.yaml) for example settings | N/A |
### Database Settings
| Name | Description | Value |
| ---------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- |
@ -86,18 +72,18 @@ type: Opaque
```
## Accessing the Admin UI
When browsing to the URL published per the settings in `ui.ingress.*`, you will
When browsing to the URL published per the settings in `ingress.*`, you will
be prompted for **Admin Configuration**. The **Proxy Endpoint** is the internal
(from the `litellm-ui` pod's perspective) URL published by the `litellm-proxy`
(from the `litellm` pod's perspective) URL published by the `<RELEASE>-litellm`
Kubernetes Service. If the deployment uses the default settings for this
service, the **Proxy Endpoint** should be set to `http://litellm-proxy:8000`.
service, the **Proxy Endpoint** should be set to `http://<RELEASE>-litellm:8000`.
The **Proxy Key** is the value specified for `masterkey` or, if a `masterkey`
was not provided to the helm command line, the `masterkey` is a randomly
generated string stored in the `litellm-masterkey` Kubernetes Secret.
generated string stored in the `<RELEASE>-litellm-masterkey` Kubernetes Secret.
```bash
kubectl -n litellm get secret litellm-masterkey -o jsonpath="{.data.masterkey}"
kubectl -n litellm get secret <RELEASE>-litellm-masterkey -o jsonpath="{.data.masterkey}"
```
## Admin UI Limitations

View file

@ -41,14 +41,6 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{- define "litellm.ui.labels" -}}
helm.sh/chart: {{ include "litellm.chart" . }}
{{ include "litellm.ui.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}
{{/*
Selector labels
@ -57,10 +49,6 @@ Selector labels
app.kubernetes.io/name: {{ include "litellm.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{- define "litellm.ui.selectorLabels" -}}
app.kubernetes.io/name: {{ include "litellm.name" . }}-ui
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}
{{/*
Create the name of the service account to use

View file

@ -1,7 +1,7 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "litellm.fullname" . }}-proxy
name: {{ include "litellm.fullname" . }}
labels:
{{- include "litellm.labels" . | nindent 4 }}
spec:
@ -41,12 +41,12 @@ spec:
- name: DATABASE_USERNAME
valueFrom:
secretKeyRef:
name: {{ include "litellm.name" . }}-dbcredentials
name: {{ include "litellm.fullname" . }}-dbcredentials
key: username
- name: PGPASSWORD
valueFrom:
secretKeyRef:
name: {{ include "litellm.name" . }}-dbcredentials
name: {{ include "litellm.fullname" . }}-dbcredentials
key: password
- name: DATABASE_HOST
value: {{ .Release.Name }}-postgresql
@ -108,12 +108,12 @@ spec:
- name: DATABASE_USERNAME
valueFrom:
secretKeyRef:
name: {{ include "litellm.name" . }}-dbcredentials
name: {{ include "litellm.fullname" . }}-dbcredentials
key: username
- name: DATABASE_PASSWORD
valueFrom:
secretKeyRef:
name: {{ include "litellm.name" . }}-dbcredentials
name: {{ include "litellm.fullname" . }}-dbcredentials
key: password
- name: DATABASE_HOST
value: {{ .Release.Name }}-postgresql
@ -140,7 +140,7 @@ spec:
- name: PROXY_MASTER_KEY
valueFrom:
secretKeyRef:
name: {{ include "litellm.name" . }}-masterkey
name: {{ include "litellm.fullname" . }}-masterkey
key: masterkey
envFrom:
{{- range .Values.environmentSecrets }}
@ -150,16 +150,7 @@ spec:
args:
- --config
- /etc/litellm/config.yaml
# command:
# - bash
# - -c
# - |
# ls -la /etc/litellm/; cat /etc/litellm/config.yaml; export
# find / 2>/dev/null | grep -v -e '^/proc' -e '^/sys' -e '^/dev' >/tmp/before.list
# prisma generate
# find / 2>/dev/null | grep -v -e '^/proc' -e '^/sys' -e '^/dev' >/tmp/after.list
# diff -ruN /tmp/before.list /tmp/after.list
# sleep 3600
- --run_gunicorn
ports:
- name: http
containerPort: {{ .Values.service.port }}

View file

@ -1,5 +1,5 @@
{{- if .Values.ingress.enabled -}}
{{- $fullName := (printf "%s%s" (include "litellm.fullname" .) "-proxy") -}}
{{- $fullName := include "litellm.fullname" . -}}
{{- $svcPort := .Values.service.port -}}
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}

View file

@ -2,7 +2,7 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ include "litellm.name" . }}-dbcredentials
name: {{ include "litellm.fullname" . }}-dbcredentials
data:
# Password for the "postgres" user
postgres-password: {{ ( index .Values.postgresql.auth "postgres-password") | default "litellm" | b64enc }}

View file

@ -2,7 +2,7 @@
apiVersion: v1
kind: Secret
metadata:
name: {{ include "litellm.name" . }}-masterkey
name: {{ include "litellm.fullname" . }}-masterkey
data:
masterkey: {{ $masterkey | b64enc }}
type: Opaque

View file

@ -1,7 +1,7 @@
apiVersion: v1
kind: Service
metadata:
name: {{ include "litellm.fullname" . }}-proxy
name: {{ include "litellm.fullname" . }}
labels:
{{- include "litellm.labels" . | nindent 4 }}
spec:

View file

@ -11,5 +11,5 @@ spec:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}']
args: ['{{ include "litellm.fullname" . }}:{{ .Values.service.port }}/health/readiness']
restartPolicy: Never

View file

@ -5,7 +5,9 @@
replicaCount: 1
image:
repository: ghcr.io/berriai/litellm
# Use "ghcr.io/berriai/litellm-database" for optimized image with database
# Alternatively, use "ghcr.io/berriai/litellm" for the default image
repository: ghcr.io/berriai/litellm-database
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
# tag: "main-latest"
@ -56,7 +58,7 @@ service:
port: 8000
ingress:
enabled: true
enabled: false
className: "nginx"
annotations: {}
# kubernetes.io/ingress.class: nginx
@ -71,6 +73,8 @@ ingress:
# hosts:
# - chart-example.local
# masterkey: changeit
# The elements within proxy_config are rendered as config.yaml for the proxy
# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
# Reference: https://docs.litellm.ai/docs/proxy/configs
@ -159,61 +163,6 @@ postgresql:
# A secret is created by this chart (litellm-helm) with the credentials that
# the new Postgres instance should use.
existingSecret: litellm-dbcredentials
secretKeys:
userPasswordKey: password
ui:
enabled: true
replicaCount: 1
autoscaling:
enabled: false
image:
repository: ghcr.io/berriai/litellm-ui
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
# tag: "main-latest"
# TODO: Switch to BerryAI repo and tags if/when they provide a ui image
# https://github.com/BerriAI/litellm/pull/1505
tag: ""
service:
type: ClusterIP
port: 8501
ingress:
enabled: true
className: "nginx"
annotations: {}
hosts:
- host: ui.example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
podAnnotations: {}
podLabels: {}
podSecurityContext:
fsGroup: 1000
securityContext:
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
resources: {}
volumes: []
volumeMounts: []
nodeSelector: {}
tolerations: []
affinity: {}
# existingSecret: ""
# secretKeys:
# userPasswordKey: password

View file

@ -16,6 +16,34 @@ response = completion(
)
```
## Specifying Safety Settings
In certain use-cases you may need to make calls to the models and pass [safety settigns](https://ai.google.dev/docs/safety_setting_gemini) different from the defaults. To do so, simple pass the `safety_settings` argument to `completion` or `acompletion`. For example:
```python
response = completion(
model="gemini/gemini-pro",
messages=[{"role": "user", "content": "write code for saying hi from LiteLLM"}]
safety_settings=[
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_NONE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_NONE",
},
]
)
```
# Gemini-Pro-Vision
LiteLLM Supports the following image types passed in `url`
- Images with direct links - https://storage.googleapis.com/github-repo/img/gemini/intro/landmark3.jpg

View file

@ -538,17 +538,13 @@ model_list: # will route requests to the least busy ollama model
api_base: "http://127.0.0.1:8003"
```
## Max Parallel Requests
To rate limit a user based on the number of parallel requests, e.g.:
if user's parallel requests > x, send a 429 error
if user's parallel requests <= x, let them use the API freely.
set the max parallel request limit on the config.yaml (note: this expects the user to be passing in an api key).
## Configure DB Pool Limits + Connection Timeouts
```yaml
general_settings:
max_parallel_requests: 100 # max parallel requests for a user = 100
general_settings:
database_connection_pool_limit: 100 # sets connection pool for prisma client to postgres db at 100
database_connection_timeout: 60 # sets a 60s timeout for any connection call to the db
```
## All settings
@ -577,6 +573,8 @@ general_settings:
"key_management_system": "google_kms", # either google_kms or azure_kms
"master_key": "string",
"database_url": "string",
"database_connection_pool_limit": 0, # default 100
"database_connection_timeout": 0, # default 60s
"database_type": "dynamo_db",
"database_args": {
"billing_mode": "PROVISIONED_THROUGHPUT",

View file

@ -151,10 +151,54 @@ kubectl port-forward service/litellm-service 4000:4000
Your OpenAI proxy server is now running on `http://0.0.0.0:4000`.
</TabItem>
<TabItem value="helm-deploy" label="Helm">
### Step 1. Clone the repository
```bash
git clone https://github.com/BerriAI/litellm.git
```
### Step 2. Deploy with Helm
```bash
helm install \
--set masterkey=SuPeRsEcReT \
mydeploy \
deploy/charts/litellm
```
### Step 3. Expose the service to localhost
```bash
kubectl \
port-forward \
service/mydeploy-litellm \
8000:8000
```
Your OpenAI proxy server is now running on `http://127.0.0.1:8000`.
</TabItem>
</Tabs>
## Setting SSL Certification
## Advanced Deployment Settings
### Customization of the server root path
:::info
In a Kubernetes deployment, it's possible to utilize a shared DNS to host multiple applications by modifying the virtual service
:::
Customize the root path to eliminate the need for employing multiple DNS configurations during deployment.
👉 Set `SERVER_ROOT_PATH` in your .env and this will be set as your server root path
### Setting SSL Certification
Use this, If you need to set ssl certificates for your on prem litellm proxy

View file

@ -72,3 +72,78 @@ curl --location 'http://0.0.0.0:8000/key/generate' \
```
## Turn on/off per request
The proxy support 2 request-level PII controls:
- *no-pii*: Optional(bool) - Allow user to turn off pii masking per request.
- *output_parse_pii*: Optional(bool) - Allow user to turn off pii output parsing per request.
### Usage
**Step 1. Create key with pii permissions**
Set `allow_pii_controls` to true for a given key. This will allow the user to set request-level PII controls.
```bash
curl --location 'http://0.0.0.0:8000/key/generate' \
--header 'Authorization: Bearer my-master-key' \
--header 'Content-Type: application/json' \
--data '{
"permissions": {"allow_pii_controls": true}
}'
```
**Step 2. Turn off pii output parsing**
```python
import os
from openai import OpenAI
client = OpenAI(
# This is the default and can be omitted
api_key=os.environ.get("OPENAI_API_KEY"),
base_url="http://0.0.0.0:8000"
)
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": "My name is Jane Doe, my number is 8382043839",
}
],
model="gpt-3.5-turbo",
extra_body={
"content_safety": {"output_parse_pii": False}
}
)
```
**Step 3: See response**
```
{
"id": "chatcmpl-8c5qbGTILZa1S4CK3b31yj5N40hFN",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"content": "Hi [PERSON], what can I help you with?",
"role": "assistant"
}
}
],
"created": 1704089632,
"model": "gpt-35-turbo",
"object": "chat.completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 47,
"prompt_tokens": 12,
"total_tokens": 59
},
"_response_ms": 1753.426
}
```

View file

@ -93,6 +93,7 @@ Request Params:
- `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
- `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
- `max_budget`: *Optional[float]* - Specify max budget for a given key.
- `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
- `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
- `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
@ -676,8 +677,6 @@ general_settings:
### [BETA] Dynamo DB
Only live in `v1.16.21.dev1`.
#### Step 1. Save keys to env
```shell

View file

@ -129,7 +129,7 @@ const sidebars = {
"proxy/caching",
{
"type": "category",
"label": "Logging, Alerting, Caching",
"label": "Logging, Alerting",
"items": [
"proxy/logging",
"proxy/alerting",

View file

@ -6,9 +6,4 @@ Code in this folder is licensed under a commercial license. Please review the [L
👉 **Using in an Enterprise / Need specific features ?** Meet with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat?month=2024-02)
## Enterprise Features:
- Track, View spend per tag https://docs.litellm.ai/docs/proxy/spend
- Custom API / microservice callbacks
- Google Text Moderation API
See all Enterprise Features here 👉 [Docs](https://docs.litellm.ai/docs/proxy/enterprise)

View file

@ -110,7 +110,6 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
-1
] # get the last response - llama guard has a 4k token limit
self.set_custom_prompt_template(messages=[safety_check_messages])
# print(f"self.model: {self.model}")
response = await litellm.acompletion(
model=self.model,
messages=[safety_check_messages],

View file

@ -121,6 +121,13 @@ def completion(
## Load Config
inference_params = copy.deepcopy(optional_params)
stream = inference_params.pop("stream", None)
# Handle safety settings
safety_settings_param = inference_params.pop("safety_settings", None)
safety_settings = None
if safety_settings_param:
safety_settings = [genai.types.SafetySettingDict(x) for x in safety_settings_param]
config = litellm.GeminiConfig.get_config()
for k, v in config.items():
if (
@ -141,11 +148,13 @@ def completion(
response = _model.generate_content(
contents=prompt,
generation_config=genai.types.GenerationConfig(**inference_params),
safety_settings=safety_settings,
)
else:
response = _model.generate_content(
contents=prompt,
generation_config=genai.types.GenerationConfig(**inference_params),
safety_settings=safety_settings,
stream=True,
)
return response

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-2322bcdc2ec71284.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S_8LZOnl2nyURq-NYnh2p\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-2322bcdc2ec71284.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["S_8LZOnl2nyURq-NYnh2p",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -155,6 +155,9 @@ class GenerateKeyRequest(GenerateRequestBase):
aliases: Optional[dict] = {}
config: Optional[dict] = {}
permissions: Optional[dict] = {}
model_max_budget: Optional[dict] = (
{}
) # {"gpt-4": 5.0, "gpt-3.5-turbo": 5.0}, defaults to {}
class GenerateKeyResponse(GenerateKeyRequest):
@ -167,7 +170,13 @@ class GenerateKeyResponse(GenerateKeyRequest):
def set_model_info(cls, values):
if values.get("token") is not None:
values.update({"key": values.get("token")})
dict_fields = ["metadata", "aliases", "config", "permissions"]
dict_fields = [
"metadata",
"aliases",
"config",
"permissions",
"model_max_budget",
]
for field in dict_fields:
value = values.get(field)
if value is not None and isinstance(value, str):
@ -302,6 +311,13 @@ class ConfigGeneralSettings(LiteLLMBase):
None,
description="connect to a postgres db - needed for generating temporary keys + tracking spend / key",
)
database_connection_pool_limit: Optional[int] = Field(
100,
description="default connection pool for prisma client connecting to postgres db",
)
database_connection_timeout: Optional[float] = Field(
60, description="default timeout for a connection to the database"
)
database_type: Optional[Literal["dynamo_db"]] = Field(
None, description="to use dynamodb instead of postgres db"
)
@ -383,6 +399,8 @@ class LiteLLM_VerificationToken(LiteLLMBase):
budget_reset_at: Optional[datetime] = None
allowed_cache_controls: Optional[list] = []
permissions: Dict = {}
model_spend: Dict = {}
model_max_budget: Dict = {}
class UserAPIKeyAuth(
@ -410,6 +428,8 @@ class LiteLLM_UserTable(LiteLLMBase):
user_id: str
max_budget: Optional[float]
spend: float = 0.0
model_max_budget: Optional[Dict] = {}
model_spend: Optional[Dict] = {}
user_email: Optional[str]
models: list = []

View file

@ -287,6 +287,8 @@ class DynamoDBWrapper(CustomDB):
or k == "config"
or k == "metadata"
or k == "permissions"
or k == "model_spend"
or k == "model_max_budget"
)
and v is not None
and isinstance(v, str)

View file

@ -119,6 +119,9 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
call_type: str,
):
"""
- Check if request turned off pii
- Check if user allowed to turn off pii (key permissions -> 'allow_pii_controls')
- Take the request data
- Call /analyze -> get the results
- Call /anonymize w/ the analyze results -> get the redacted text
@ -126,13 +129,59 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
For multiple messages in /chat/completions, we'll need to call them in parallel.
"""
permissions = user_api_key_dict.permissions
if permissions.get("pii", True) == False: # allow key to turn off pii masking
return data
output_parse_pii = permissions.get(
"output_parse_pii", litellm.output_parse_pii
) # allow key to turn on/off output parsing for pii
no_pii = permissions.get(
"no-pii", None
) # allow key to turn on/off pii masking (if user is allowed to set pii controls, then they can override the key defaults)
if no_pii is None:
# check older way of turning on/off pii
no_pii = not permissions.get("pii", True)
content_safety = data.get("content_safety", None)
verbose_proxy_logger.debug(f"content_safety: {content_safety}")
## Request-level turn on/off PII controls ##
if content_safety is not None and isinstance(content_safety, dict):
# pii masking ##
if (
content_safety.get("no-pii", None) is not None
and content_safety.get("no-pii") == True
):
# check if user allowed to turn this off
if permissions.get("allow_pii_controls", False) == False:
raise HTTPException(
status_code=400,
detail={"error": "Not allowed to set PII controls per request"},
)
else: # user allowed to turn off pii masking
no_pii = content_safety.get("no-pii")
if not isinstance(no_pii, bool):
raise HTTPException(
status_code=400,
detail={"error": "no_pii needs to be a boolean value"},
)
## pii output parsing ##
if content_safety.get("output_parse_pii", None) is not None:
# check if user allowed to turn this off
if permissions.get("allow_pii_controls", False) == False:
raise HTTPException(
status_code=400,
detail={"error": "Not allowed to set PII controls per request"},
)
else: # user allowed to turn on/off pii output parsing
output_parse_pii = content_safety.get("output_parse_pii")
if not isinstance(output_parse_pii, bool):
raise HTTPException(
status_code=400,
detail={
"error": "output_parse_pii needs to be a boolean value"
},
)
if no_pii == False: # turn off pii masking
return data
if call_type == "completion": # /chat/completions requests
messages = data["messages"]

View file

@ -409,6 +409,8 @@ def run_server(
"uvicorn, gunicorn needs to be imported. Run - `pip install 'litellm[proxy]'`"
)
db_connection_pool_limit = 100
db_connection_timeout = 60
if config is not None:
"""
Allow user to pass in db url via config
@ -427,6 +429,12 @@ def run_server(
proxy_config.load_config(router=None, config_file_path=config)
)
database_url = general_settings.get("database_url", None)
db_connection_pool_limit = general_settings.get(
"database_connection_pool_limit", 100
)
db_connection_timeout = general_settings.get(
"database_connection_timeout", 60
)
if database_url and database_url.startswith("os.environ/"):
original_dir = os.getcwd()
# set the working directory to where this script is
@ -447,14 +455,19 @@ def run_server(
try:
if os.getenv("DATABASE_URL", None) is not None:
### add connection pool + pool timeout args
params = {"connection_limit": 100, "pool_timeout": 60}
params = {
"connection_limit": db_connection_pool_limit,
"pool_timeout": db_connection_timeout,
}
database_url = os.getenv("DATABASE_URL")
modified_url = append_query_params(database_url, params)
os.environ["DATABASE_URL"] = modified_url
###
if os.getenv("DIRECT_URL", None) is not None:
### add connection pool + pool timeout args
params = {"connection_limit": 100, "pool_timeout": 60}
params = {
"connection_limit": db_connection_pool_limit,
"pool_timeout": db_connection_timeout,
}
database_url = os.getenv("DIRECT_URL")
modified_url = append_query_params(database_url, params)
os.environ["DIRECT_URL"] = modified_url

View file

@ -93,6 +93,7 @@ from litellm.proxy.utils import (
html_form,
_read_request_body,
_is_valid_team_configs,
_is_user_proxy_admin,
)
from litellm.proxy.secret_managers.google_kms import load_google_kms
import pydantic
@ -143,6 +144,9 @@ app = FastAPI(
title="LiteLLM API",
description=f"Proxy Server to call 100+ LLMs in the OpenAI format\n\n{ui_message}",
version=version,
root_path=os.environ.get(
"SERVER_ROOT_PATH", ""
), # check if user passed root path, FastAPI defaults this value to ""
)
@ -376,6 +380,11 @@ async def user_api_key_auth(
# 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
# 4. If token is expired
# 5. If token spend is under Budget for the token
# 6. If token spend per model is under budget per model
request_data = await _read_request_body(
request=request
) # request data, used across all checks. Making this easily available
# Check 1. If token can call model
litellm.model_alias_map = valid_token.aliases
@ -450,7 +459,6 @@ async def user_api_key_auth(
if (
litellm.max_user_budget is not None
): # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
request_data = await _read_request_body(request=request)
user_passed_to_chat_completions = request_data.get("user", None)
if user_passed_to_chat_completions is not None:
user_id_list.append(user_passed_to_chat_completions)
@ -496,11 +504,7 @@ async def user_api_key_auth(
continue
assert isinstance(_user, dict)
# check if user is admin #
if (
_user.get("user_role", None) is not None
and _user.get("user_role") == "proxy_admin"
):
return UserAPIKeyAuth(api_key=master_key)
# Token exists, not expired now check if its in budget for the user
user_max_budget = _user.get("max_budget", None)
user_current_spend = _user.get("spend", None)
@ -587,6 +591,25 @@ async def user_api_key_auth(
f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
)
# Check 5. Token Model Spend is under Model budget
max_budget_per_model = valid_token.model_max_budget
spend_per_model = valid_token.model_spend
if max_budget_per_model is not None and spend_per_model is not None:
current_model = request_data.get("model")
if current_model is not None:
current_model_spend = spend_per_model.get(current_model, None)
current_model_budget = max_budget_per_model.get(current_model, None)
if (
current_model_spend is not None
and current_model_budget is not None
):
if current_model_spend > current_model_budget:
raise Exception(
f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
)
# Token passed all checks
api_key = valid_token.token
@ -616,11 +639,15 @@ async def user_api_key_auth(
)
)
if (
route.startswith("/key/")
or route.startswith("/user/")
or route.startswith("/model/")
or route.startswith("/spend/")
) and (not is_master_key_valid):
(
route.startswith("/key/")
or route.startswith("/user/")
or route.startswith("/model/")
or route.startswith("/spend/")
)
and (not is_master_key_valid)
and (not _is_user_proxy_admin(user_id_information))
):
allow_user_auth = False
if (
general_settings.get("allow_user_auth", False) == True
@ -712,9 +739,12 @@ async def user_api_key_auth(
# Do something if the current route starts with any of the allowed routes
pass
else:
raise Exception(
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
)
if _is_user_proxy_admin(user_id_information):
pass
else:
raise Exception(
f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
)
return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
except Exception as e:
# verbose_proxy_logger.debug(f"An exception occurred - {traceback.format_exc()}")
@ -937,13 +967,26 @@ async def update_database(
# Calculate the new cost by adding the existing cost and response_cost
existing_spend_obj.spend = existing_spend + response_cost
# track cost per model, for the given user
spend_per_model = existing_spend_obj.model_spend or {}
current_model = kwargs.get("model")
if current_model is not None and spend_per_model is not None:
if spend_per_model.get(current_model) is None:
spend_per_model[current_model] = response_cost
else:
spend_per_model[current_model] += response_cost
existing_spend_obj.model_spend = spend_per_model
valid_token = user_api_key_cache.get_cache(key=id)
if valid_token is not None and isinstance(valid_token, dict):
user_api_key_cache.set_cache(
key=id, value=existing_spend_obj.json()
)
verbose_proxy_logger.debug(f"new cost: {existing_spend_obj.spend}")
verbose_proxy_logger.debug(
f"user - new cost: {existing_spend_obj.spend}, user_id: {id}"
)
data_list.append(existing_spend_obj)
# Update the cost column for the given user id
@ -980,15 +1023,28 @@ async def update_database(
# Calculate the new cost by adding the existing cost and response_cost
new_spend = existing_spend + response_cost
verbose_proxy_logger.debug(f"new cost: {new_spend}")
# track cost per model, for the given key
spend_per_model = existing_spend_obj.model_spend or {}
current_model = kwargs.get("model")
if current_model is not None and spend_per_model is not None:
if spend_per_model.get(current_model) is None:
spend_per_model[current_model] = response_cost
else:
spend_per_model[current_model] += response_cost
verbose_proxy_logger.debug(
f"new cost: {new_spend}, new spend per model: {spend_per_model}"
)
# Update the cost column for the given token
await prisma_client.update_data(
token=token, data={"spend": new_spend}
token=token,
data={"spend": new_spend, "model_spend": spend_per_model},
)
valid_token = user_api_key_cache.get_cache(key=token)
if valid_token is not None:
valid_token.spend = new_spend
valid_token.model_spend = spend_per_model
user_api_key_cache.set_cache(key=token, value=valid_token)
elif custom_db_client is not None:
# Fetch the existing cost for the given token
@ -1068,10 +1124,21 @@ async def update_database(
# Calculate the new cost by adding the existing cost and response_cost
new_spend = existing_spend + response_cost
# track cost per model, for the given team
spend_per_model = existing_spend_obj.model_spend or {}
current_model = kwargs.get("model")
if current_model is not None and spend_per_model is not None:
if spend_per_model.get(current_model) is None:
spend_per_model[current_model] = response_cost
else:
spend_per_model[current_model] += response_cost
verbose_proxy_logger.debug(f"new cost: {new_spend}")
# Update the cost column for the given token
await prisma_client.update_data(
team_id=team_id, data={"spend": new_spend}, table_name="team"
team_id=team_id,
data={"spend": new_spend, "model_spend": spend_per_model},
table_name="team",
)
elif custom_db_client is not None:
@ -1645,6 +1712,7 @@ async def generate_key_helper_fn(
key_alias: Optional[str] = None,
allowed_cache_controls: Optional[list] = [],
permissions: Optional[dict] = {},
model_max_budget: Optional[dict] = {},
):
global prisma_client, custom_db_client, user_api_key_cache
@ -1678,6 +1746,8 @@ async def generate_key_helper_fn(
config_json = json.dumps(config)
permissions_json = json.dumps(permissions)
metadata_json = json.dumps(metadata)
model_max_budget_json = json.dumps(model_max_budget)
user_id = user_id or str(uuid.uuid4())
user_role = user_role or "app_user"
tpm_limit = tpm_limit
@ -1720,6 +1790,7 @@ async def generate_key_helper_fn(
"budget_reset_at": key_reset_at,
"allowed_cache_controls": allowed_cache_controls,
"permissions": permissions_json,
"model_max_budget": model_max_budget_json,
}
if (
general_settings.get("allow_user_auth", False) == True
@ -1735,6 +1806,11 @@ async def generate_key_helper_fn(
saved_token["metadata"] = json.loads(saved_token["metadata"])
if isinstance(saved_token["permissions"], str):
saved_token["permissions"] = json.loads(saved_token["permissions"])
if isinstance(saved_token["model_max_budget"], str):
saved_token["model_max_budget"] = json.loads(
saved_token["model_max_budget"]
)
if saved_token.get("expires", None) is not None and isinstance(
saved_token["expires"], datetime
):
@ -3078,6 +3154,20 @@ async def generate_key_fn(
- max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
- metadata: Optional[dict] - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
- permissions: Optional[dict] - key-specific permissions. Currently just used for turning off pii masking (if connected). Example - {"pii": false}
- model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
Examples:
1. Allow users to turn on/off pii masking
```bash
curl --location 'http://0.0.0.0:8000/key/generate' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"permissions": {"allow_pii_controls": true}
}'
```
Returns:
- key: (str) The generated api key
@ -4871,7 +4961,7 @@ async def auth_callback(request: Request):
if user_id is None:
user_id = getattr(result, "first_name", "") + getattr(result, "last_name", "")
response = await generate_key_helper_fn(
**{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email} # type: ignore
**{"duration": "1hr", "key_max_budget": 0.01, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard", "user_email": user_email} # type: ignore
)
key = response["token"] # type: ignore
user_id = response["user_id"] # type: ignore

View file

@ -24,6 +24,8 @@ model LiteLLM_TeamTable {
budget_reset_at DateTime?
created_at DateTime @default(now()) @map("created_at")
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
model_spend Json @default("{}")
model_max_budget Json @default("{}")
}
// Track spend, rate limit, budget Users
@ -41,6 +43,8 @@ model LiteLLM_UserTable {
budget_duration String?
budget_reset_at DateTime?
allowed_cache_controls String[] @default([])
model_spend Json @default("{}")
model_max_budget Json @default("{}")
}
// Generate Tokens for Proxy
@ -64,6 +68,8 @@ model LiteLLM_VerificationToken {
budget_duration String?
budget_reset_at DateTime?
allowed_cache_controls String[] @default([])
model_spend Json @default("{}")
model_max_budget Json @default("{}")
}
// store proxy config.yaml

View file

@ -1379,19 +1379,22 @@ async def _read_request_body(request):
"""
import ast, json
request_data = {}
if request is None:
return request_data
body = await request.body()
if body == b"" or body is None:
return request_data
body_str = body.decode()
try:
request_data = ast.literal_eval(body_str)
request_data = {}
if request is None:
return request_data
body = await request.body()
if body == b"" or body is None:
return request_data
body_str = body.decode()
try:
request_data = ast.literal_eval(body_str)
except:
request_data = json.loads(body_str)
return request_data
except:
request_data = json.loads(body_str)
return request_data
return {}
def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
@ -1408,6 +1411,22 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
return
def _is_user_proxy_admin(user_id_information=None):
if (
user_id_information == None
or len(user_id_information) == 0
or user_id_information[0] == None
):
return False
_user = user_id_information[0]
if (
_user.get("user_role", None) is not None
and _user.get("user_role") == "proxy_admin"
):
return True
return False
# LiteLLM Admin UI - Non SSO Login
html_form = """
<!DOCTYPE html>

View file

@ -1912,7 +1912,7 @@ def test_mistral_anyscale_stream():
# test_baseten_wizardLMcompletion_withbase()
# def test_baseten_mosaic_ML_completion_withbase():
# model_name = "31dxrj3"
# model_name = "31dxrj3",
# litellm.api_base = "https://app.baseten.co"
# try:
# response = completion(model=model_name, messages=messages)

View file

@ -1101,6 +1101,116 @@ def test_call_with_key_over_budget(prisma_client):
print(vars(e))
def test_call_with_key_over_model_budget(prisma_client):
# 12. Make a call with a key over budget, expect to fail
setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
try:
async def test():
await litellm.proxy.proxy_server.prisma_client.connect()
# set budget for chatgpt-v-2 to 0.000001, expect the next request to fail
request = GenerateKeyRequest(
max_budget=1000,
model_max_budget={
"chatgpt-v-2": 0.000001,
},
metadata={"user_api_key": 0.0001},
)
key = await generate_key_fn(request)
print(key)
generated_key = key.key
user_id = key.user_id
bearer_token = "Bearer " + generated_key
request = Request(scope={"type": "http"})
request._url = URL(url="/chat/completions")
async def return_body():
return b'{"model": "chatgpt-v-2"}'
request.body = return_body
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print("result from user auth with new key", result)
# update spend using track_cost callback, make 2nd request, it should fail
from litellm.proxy.proxy_server import (
_PROXY_track_cost_callback as track_cost_callback,
)
from litellm import ModelResponse, Choices, Message, Usage
from litellm.caching import Cache
litellm.cache = Cache()
import time
request_id = f"chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac{time.time()}"
resp = ModelResponse(
id=request_id,
choices=[
Choices(
finish_reason=None,
index=0,
message=Message(
content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a",
role="assistant",
),
)
],
model="gpt-35-turbo", # azure always has model written like this
usage=Usage(prompt_tokens=210, completion_tokens=200, total_tokens=410),
)
await track_cost_callback(
kwargs={
"model": "chatgpt-v-2",
"stream": False,
"litellm_params": {
"metadata": {
"user_api_key": hash_token(generated_key),
"user_api_key_user_id": user_id,
}
},
"response_cost": 0.00002,
},
completion_response=resp,
start_time=datetime.now(),
end_time=datetime.now(),
)
await asyncio.sleep(10)
# test spend_log was written and we can read it
spend_logs = await view_spend_logs(request_id=request_id)
print("read spend logs", spend_logs)
assert len(spend_logs) == 1
spend_log = spend_logs[0]
assert spend_log.request_id == request_id
assert spend_log.spend == float("2e-05")
assert spend_log.model == "chatgpt-v-2"
assert (
spend_log.cache_key
== "a61ae14fe4a8b8014a61e6ae01a100c8bc6770ac37c293242afed954bc69207d"
)
# use generated key to auth in
result = await user_api_key_auth(request=request, api_key=bearer_token)
print("result from user auth with new key", result)
pytest.fail(f"This should have failed!. They key crossed it's budget")
asyncio.run(test())
except Exception as e:
# print(f"Error - {str(e)}")
traceback.print_exc()
error_detail = e.message
assert "Authentication Error, ExceededModelBudget:" in error_detail
print(vars(e))
@pytest.mark.asyncio()
async def test_call_with_key_never_over_budget(prisma_client):
# Make a call with a key with budget=None, it should never fail

View file

@ -89,7 +89,6 @@ from .exceptions import (
UnprocessableEntityError,
)
verbose_logger.debug(f"sys.path: {sys.path}")
try:
from .proxy.enterprise.enterprise_callbacks.generic_api_callback import (
GenericAPILogger,

View file

@ -1,11 +1,16 @@
[tool.poetry]
name = "litellm"
version = "1.25.0"
version = "1.25.2"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
readme = "README.md"
[tool.poetry.urls]
homepage = "https://litellm.ai"
repository = "https://github.com/BerriAI/litellm"
documentation = "https://docs.litellm.ai"
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0, !=3.9.7"
openai = ">=1.0.0"
@ -69,7 +74,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.25.0"
version = "1.25.2"
version_files = [
"pyproject.toml:^version"
]

View file

@ -24,6 +24,8 @@ model LiteLLM_TeamTable {
budget_reset_at DateTime?
created_at DateTime @default(now()) @map("created_at")
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
model_spend Json @default("{}")
model_max_budget Json @default("{}")
}
// Track spend, rate limit, budget Users
@ -41,6 +43,8 @@ model LiteLLM_UserTable {
budget_duration String?
budget_reset_at DateTime?
allowed_cache_controls String[] @default([])
model_spend Json @default("{}")
model_max_budget Json @default("{}")
}
// Generate Tokens for Proxy
@ -64,6 +68,8 @@ model LiteLLM_VerificationToken {
budget_duration String?
budget_reset_at DateTime?
allowed_cache_controls String[] @default([])
model_spend Json @default("{}")
model_max_budget Json @default("{}")
}
// store proxy config.yaml

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-2322bcdc2ec71284.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"S_8LZOnl2nyURq-NYnh2p\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-2322bcdc2ec71284.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["S_8LZOnl2nyURq-NYnh2p",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -239,7 +239,7 @@ export const userSpendLogsCall = async (
export const keyInfoCall = async (accessToken: String, keys: String[]) => {
try {
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/key/info`;
let url = proxyBaseUrl ? `${proxyBaseUrl}/v2/key/info` : `/v2/key/info`;
const response = await fetch(url, {
method: "POST",

View file

@ -49,6 +49,14 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
const [accessToken, setAccessToken] = useState<string | null>(null);
const [userModels, setUserModels] = useState<string[]>([]);
// check if window is not undefined
if (typeof window !== "undefined") {
window.addEventListener('beforeunload', function() {
// Clear session storage
sessionStorage.clear();
});
}
function formatUserRole(userRole: string) {
if (!userRole) {
return "Undefined Role";
@ -70,6 +78,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
// Moved useEffect inside the component and used a condition to run fetch only if the params are available
useEffect(() => {
if (token) {
const decoded = jwtDecode(token) as { [key: string]: any };
if (decoded) {
@ -97,22 +106,22 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
}
}
if (userID && accessToken && userRole && !data) {
const cachedData = localStorage.getItem("userData" + userID);
const cachedSpendData = localStorage.getItem("userSpendData" + userID);
const cachedUserModels = localStorage.getItem("userModels" + userID);
const cachedData = sessionStorage.getItem("userData" + userID);
const cachedSpendData = sessionStorage.getItem("userSpendData" + userID);
const cachedUserModels = sessionStorage.getItem("userModels" + userID);
if (cachedData && cachedSpendData && cachedUserModels) {
setData(JSON.parse(cachedData));
setUserSpendData(JSON.parse(cachedSpendData));
setUserModels(JSON.parse(cachedUserModels));
} else {
const fetchData = async () => {
try {
const response = await userInfoCall(accessToken, userID, userRole);
setUserSpendData(response["user_info"]);
setData(response["keys"]); // Assuming this is the correct path to your data
localStorage.setItem("userData" + userID, JSON.stringify(response["keys"]));
localStorage.setItem(
sessionStorage.setItem("userData" + userID, JSON.stringify(response["keys"]));
sessionStorage.setItem(
"userSpendData" + userID,
JSON.stringify(response["user_info"])
);
@ -126,7 +135,7 @@ const UserDashboard: React.FC<UserDashboardProps> = ({
console.log("userModels:", userModels);
localStorage.setItem("userModels" + userID, JSON.stringify(available_model_names));
sessionStorage.setItem("userModels" + userID, JSON.stringify(available_model_names));