Merge branch 'main' into main

2024-03-30 13:21:53 +09:00 · 2024-03-30 13:21:53 +09:00 · 1b84dfac91
commit 1b84dfac91
parent 29e8c144fb 97616ee267
301 changed files with 62646 additions and 3691 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -28,8 +28,9 @@ jobs:
            pip install "pytest==7.3.1"
            pip install "pytest-asyncio==0.21.1"
            pip install mypy
-            pip install "google-generativeai>=0.3.2"
+            pip install "google-generativeai==0.3.2"
-            pip install "google-cloud-aiplatform>=1.38.0"
+            pip install "google-cloud-aiplatform==1.43.0"
            pip install pyarrow
            pip install "boto3>=1.28.57"
            pip install "aioboto3>=12.3.0"
            pip install langchain
@ -48,6 +49,7 @@ jobs:
            pip install argon2-cffi
            pip install "pytest-mock==3.12.0"
            pip install python-multipart
            pip install google-cloud-aiplatform
      - save_cache:
          paths:
            - ./venv
@ -152,10 +154,11 @@ jobs:
            pip install "pytest-mock==3.12.0"
            pip install "pytest-asyncio==0.21.1"
            pip install mypy
-            pip install "google-generativeai>=0.3.2"
+            pip install "google-generativeai==0.3.2"
-            pip install "google-cloud-aiplatform>=1.38.0"
+            pip install "google-cloud-aiplatform==1.43.0"
-            pip install "boto3>=1.28.57"
+            pip install pyarrow
-            pip install "aioboto3>=12.3.0"
+            pip install "boto3==1.34.34"
            pip install "aioboto3==12.3.0"
            pip install langchain
            pip install "langfuse>=2.0.0"
            pip install numpydoc
--- a/.circleci/requirements.txt
+++ b/.circleci/requirements.txt
@ -7,8 +7,7 @@ baseten
 cohere
 redis
 anthropic
 boto3
 orjson
 pydantic
-google-cloud-aiplatform
+google-cloud-aiplatform==1.43.0
 redisvl==0.0.7 # semantic caching
--- a/.github/workflows/ghcr_deploy.yml
+++ b/.github/workflows/ghcr_deploy.yml
@ -43,6 +43,13 @@ jobs:
          push: true
          file: Dockerfile.database
          tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
      -
        name: Build and push litellm-spend-logs image
        uses: docker/build-push-action@v5
        with:
          push: true
          file: ./litellm-js/spend-logs/Dockerfile
          tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
  build-and-push-image:
    runs-on: ubuntu-latest
@ -120,6 +127,44 @@ jobs:
          tags: ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-database.outputs.tags }}-latest 
          labels: ${{ steps.meta-database.outputs.labels }} 
          platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
  build-and-push-image-spend-logs:
    runs-on: ubuntu-latest
    permissions:
      contents: read
      packages: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4
      - name: Log in to the Container registry
        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Extract metadata (tags, labels) for spend-logs Dockerfile
        id: meta-spend-logs
        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
        with:
          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs
      # Configure multi platform Docker builds
      - name: Set up QEMU
        uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
      - name: Build and push Database Docker image
        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
        with:
          context: .
          file: ./litellm-js/spend-logs/Dockerfile
          push: true
          tags: ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }}, ${{ steps.meta-spend-logs.outputs.tags }}-latest 
          labels: ${{ steps.meta-spend-logs.outputs.labels }} 
          platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
  build-and-push-helm-chart:
    runs-on: ubuntu-latest
    steps:
--- a/8
+++ b/8
@ -1,8 +1,8 @@
 # Base image for building
-ARG LITELLM_BUILD_IMAGE=python:3.9
+ARG LITELLM_BUILD_IMAGE=python:3.11.8-slim
 # Runtime image
-ARG LITELLM_RUNTIME_IMAGE=python:3.9-slim
+ARG LITELLM_RUNTIME_IMAGE=python:3.11.8-slim
 # Builder stage
 FROM $LITELLM_BUILD_IMAGE as builder
@ -70,5 +70,5 @@ EXPOSE 4000/tcp
 ENTRYPOINT ["litellm"]
 # Append "--detailed_debug" to the end of CMD to view detailed debug logs 
-# CMD ["--port", "4000", "--config", "./proxy_server_config.yaml", "--run_gunicorn", "--detailed_debug"]
+# CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]
-CMD ["--port", "4000", "--config", "./proxy_server_config.yaml", "--run_gunicorn", "--num_workers", "4"]
+CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]
--- a/Dockerfile.database
+++ b/Dockerfile.database
@ -1,8 +1,8 @@
 # Base image for building
-ARG LITELLM_BUILD_IMAGE=python:3.9
+ARG LITELLM_BUILD_IMAGE=python:3.11.8-slim
 # Runtime image
-ARG LITELLM_RUNTIME_IMAGE=python:3.9-slim
+ARG LITELLM_RUNTIME_IMAGE=python:3.11.8-slim
 # Builder stage
 FROM $LITELLM_BUILD_IMAGE as builder
@ -72,5 +72,5 @@ EXPOSE 4000/tcp
 ENTRYPOINT ["litellm"]
 # Append "--detailed_debug" to the end of CMD to view detailed debug logs 
-# CMD ["--port", "4000","--run_gunicorn", "--detailed_debug"]
+# CMD ["--port", "4000", "--detailed_debug"]
-CMD ["--port", "4000", "--run_gunicorn"]
+CMD ["--port", "4000"]
--- a/README.md
+++ b/README.md
@ -31,11 +31,11 @@ LiteLLM manages:
 - Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
 - Set Budgets & Rate limits per project, api key, model [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
 **Stable Release**: v`1.30.2` 👈 Recommended stable version of proxy. 
 [**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
 [**Jump to Supported LLM Providers**](https://github.com/BerriAI/litellm?tab=readme-ov-file#supported-provider-docs)
 🚨 **Stable Release:** v1.34.1 
 Support for more providers. Missing a provider or LLM Platform, raise a [feature request](https://github.com/BerriAI/litellm/issues/new?assignees=&labels=enhancement&projects=&template=feature_request.yml&title=%5BFeature%5D%3A+).
 # Usage ([**Docs**](https://docs.litellm.ai/docs/))
--- a/deploy/kubernetes/kub.yaml
+++ b/deploy/kubernetes/kub.yaml
@ -0,0 +1,55 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: litellm-deployment
 spec:
  replicas: 3
  selector:
    matchLabels:
      app: litellm
  template:
    metadata:
      labels:
        app: litellm
    spec:
      containers:
        - name: litellm-container
          image: ghcr.io/berriai/litellm:main-latest
          env:
            - name: AZURE_API_KEY
              value: "d6f****"
            - name: AZURE_API_BASE
              value: "https://openai
            - name: LITELLM_MASTER_KEY
              value: "sk-1234"
            - name: DATABASE_URL
              value: "postgresql://ishaan:*********""
          args:
            - "--config"
            - "/app/proxy_config.yaml"  # Update the path to mount the config file
          volumeMounts:                 # Define volume mount for proxy_config.yaml
            - name: config-volume
              mountPath: /app
              readOnly: true
          livenessProbe:
            httpGet:
              path: /health/liveliness
              port: 4000
            initialDelaySeconds: 120
            periodSeconds: 15
            successThreshold: 1
            failureThreshold: 3
            timeoutSeconds: 10
          readinessProbe:
            httpGet:
              path: /health/readiness
              port: 4000
            initialDelaySeconds: 120
            periodSeconds: 15
            successThreshold: 1
            failureThreshold: 3
            timeoutSeconds: 10
      volumes:  # Define volume to mount proxy_config.yaml
        - name: config-volume
          configMap:
            name: litellm-config  
--- a/deploy/kubernetes/service.yaml
+++ b/deploy/kubernetes/service.yaml
@ -0,0 +1,12 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: litellm-service
 spec:
  selector:
    app: litellm
  ports:
    - protocol: TCP
      port: 4000
      targetPort: 4000
  type: LoadBalancer
--- a/docs/my-website/docs/debugging/hosted_debugging.md
+++ b/docs/my-website/docs/debugging/hosted_debugging.md
@ -76,7 +76,6 @@ Click on your personal dashboard link. Here's how you can find it 👇
 Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider. 
 <Image img={require('../../img/dashboard_log_row.png')} alt="Dashboard Log Row" />    
--- a/docs/my-website/docs/observability/athina_integration.md
+++ b/docs/my-website/docs/observability/athina_integration.md
@ -41,6 +41,35 @@ response = completion(
 ) 
 ```
 ## Additional information in metadata
 You can send some additional information to Athina by using the `metadata` field in completion. This can be useful for sending metadata about the request, such as the customer_id, prompt_slug, or any other information you want to track.
 ```python
 #openai call with additional metadata
 response = completion(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "user", "content": "Hi 👋 - i'm openai"}
  ],
  metadata={
    "environment": "staging",
    "prompt_slug": "my_prompt_slug/v1"
  }
 )
 ```
 Following are the allowed fields in metadata, their types, and their descriptions:
 * `environment: Optional[str]` - Environment your app is running in (ex: production, staging, etc). This is useful for segmenting inference calls by environment.
 * `prompt_slug: Optional[str]` - Identifier for the prompt used for inference. This is useful for segmenting inference calls by prompt.
 * `customer_id: Optional[str]` - This is your customer ID. This is useful for segmenting inference calls by customer.
 * `customer_user_id: Optional[str]` - This is the end user ID. This is useful for segmenting inference calls by the end user.
 * `session_id: Optional[str]` - is the session or conversation ID. This is used for grouping different inferences into a conversation or chain. [Read more].(https://docs.athina.ai/logging/grouping_inferences)
 * `external_reference_id: Optional[str]` - This is useful if you want to associate your own internal identifier with the inference logged to Athina.
 * `context: Optional[Union[dict, str]]` - This is the context used as information for the prompt. For RAG applications, this is the "retrieved" data. You may log context as a string or as an object (dictionary).
 * `expected_response: Optional[str]` - This is the reference response to compare against for evaluation purposes. This is useful for segmenting inference calls by expected response.
 * `user_query: Optional[str]` - This is the user's query. For conversational applications, this is the user's last message.
 ## Support & Talk with Athina Team
 - [Schedule Demo 👋](https://cal.com/shiv-athina/30min)
--- a/docs/my-website/docs/providers/anthropic.md
+++ b/docs/my-website/docs/providers/anthropic.md
@ -60,11 +60,30 @@ export ANTHROPIC_API_KEY="your-api-key"
 ### 2. Start the proxy 
 <Tabs>
 <TabItem value="cli" label="cli">
 ```bash
 $ litellm --model claude-3-opus-20240229
 # Server running on http://0.0.0.0:4000
 ```
 </TabItem>
 <TabItem value="config" label="config.yaml">
 ```yaml
 model_list:
  - model_name: claude-3 ### RECEIVED MODEL NAME ###
    litellm_params: # all params accepted by litellm.completion() - https://docs.litellm.ai/docs/completion/input
      model: claude-3-opus-20240229 ### MODEL NAME sent to `litellm.completion()` ###
      api_key: "os.environ/ANTHROPIC_API_KEY" # does os.getenv("AZURE_API_KEY_EU")
 ```
 ```bash
 litellm --config /path/to/config.yaml
 ```
 </TabItem>
 </Tabs>
 ### 3. Test it
@ -76,7 +95,7 @@ $ litellm --model claude-3-opus-20240229
 curl --location 'http://0.0.0.0:4000/chat/completions' \
 --header 'Content-Type: application/json' \
 --data ' {
-      "model": "gpt-3.5-turbo",
+      "model": "claude-3",
      "messages": [
        {
          "role": "user",
@ -97,7 +116,7 @@ client = openai.OpenAI(
 )
 # request sent to model set on litellm proxy, `litellm --model`
-response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+response = client.chat.completions.create(model="claude-3", messages = [
    {
        "role": "user",
        "content": "this is a test request, write a short poem"
@ -121,7 +140,7 @@ from langchain.schema import HumanMessage, SystemMessage
 chat = ChatOpenAI(
    openai_api_base="http://0.0.0.0:4000", # set openai_api_base to the LiteLLM Proxy
-    model = "gpt-3.5-turbo",
+    model = "claude-3",
    temperature=0.1
 )
@ -238,7 +257,7 @@ resp = litellm.completion(
 print(f"\nResponse: {resp}")
 ```
-### Usage - "Assistant Pre-fill"
+## Usage - "Assistant Pre-fill"
 You can "put words in Claude's mouth" by including an `assistant` role message as the last item in the `messages` array.
@ -271,8 +290,8 @@ Human: How do you say 'Hello' in German? Return your answer as a JSON object, li
 Assistant: {
 ```
-### Usage - "System" messages
+## Usage - "System" messages
-If you're using Anthropic's Claude 2.1 with Bedrock, `system` role messages are properly formatted for you.
+If you're using Anthropic's Claude 2.1, `system` role messages are properly formatted for you.
 ```python
 import os
--- a/docs/my-website/docs/providers/aws_sagemaker.md
+++ b/docs/my-website/docs/providers/aws_sagemaker.md
@ -20,7 +20,28 @@ os.environ["AWS_SECRET_ACCESS_KEY"] = ""
 os.environ["AWS_REGION_NAME"] = ""
 response = completion(
-            model="sagemaker/jumpstart-dft-meta-textgeneration-llama-2-7b", 
+            model="sagemaker/<your-endpoint-name>", 
            messages=[{ "content": "Hello, how are you?","role": "user"}],
            temperature=0.2,
            max_tokens=80
        )
 ```
 ### Passing Inference Component Name
 If you have multiple models on an endpoint, you'll need to specify the individual model names, do this via `model_id`.  
 ```python
 import os 
 from litellm import completion
 os.environ["AWS_ACCESS_KEY_ID"] = ""
 os.environ["AWS_SECRET_ACCESS_KEY"] = ""
 os.environ["AWS_REGION_NAME"] = ""
 response = completion(
            model="sagemaker/<your-endpoint-name>", 
            model_id="<your-model-name",
            messages=[{ "content": "Hello, how are you?","role": "user"}],
            temperature=0.2,
            max_tokens=80
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@ -2,6 +2,7 @@
 ## Pre-requisites
 * `pip install -q google-generativeai`
 * Get API Key - https://aistudio.google.com/
 # Gemini-Pro
 ## Sample Usage
@ -97,6 +98,6 @@ print(content)
 | Model Name       | Function Call                        | Required OS Variables    |
 |------------------|--------------------------------------|-------------------------|
 | gemini-pro       | `completion('gemini/gemini-pro', messages)` | `os.environ['GEMINI_API_KEY']` |
-| gemini-1.5-pro       | `completion('gemini/gemini-pro', messages)` | `os.environ['GEMINI_API_KEY']` |
+| gemini-1.5-pro       | `completion('gemini/gemini-1.5-pro', messages)` | `os.environ['GEMINI_API_KEY']` |
 | gemini-pro-vision       | `completion('gemini/gemini-pro-vision', messages)` | `os.environ['GEMINI_API_KEY']` |
-| gemini-1.5-pro-vision       | `completion('gemini/gemini-pro-vision', messages)` | `os.environ['GEMINI_API_KEY']` |
+| gemini-1.5-pro-vision       | `completion('gemini/gemini-1.5-pro-vision', messages)` | `os.environ['GEMINI_API_KEY']` |
--- a/docs/my-website/docs/proxy/budget_alerts.md
+++ b/docs/my-website/docs/proxy/budget_alerts.md
@ -1,61 +0,0 @@
 import Image from '@theme/IdealImage';
 # 🚨 Budget Alerting
 **Alerts when a project will exceed it’s planned limit**
 <Image img={require('../../img/budget_alerts.png')} />
 ## Quick Start
 ### 1. Setup Slack Alerting on your Proxy Config.yaml 
 **Add Slack Webhook to your env**
 Get a slack webhook url from https://api.slack.com/messaging/webhooks
 Set `SLACK_WEBHOOK_URL` in your proxy env
 ```shell
 export SLACK_WEBHOOK_URL="https://hooks.slack.com/services/<>/<>/<>"
 ```
 **Update proxy config.yaml with slack alerting**  
 Add `general_settings:alerting`
 ```yaml
 model_list: 
    model_name: "azure-model"
    litellm_params:
        model: "azure/gpt-35-turbo"
 general_settings: 
    alerting: ["slack"]
 ```
 Start proxy
 ```bash
 $ litellm --config /path/to/config.yaml
 ```
 ### 2. Create API Key on Proxy Admin UI
 The Admin UI is found on `your-litellm-proxy-endpoint/ui`, example `http://localhost:4000/ui/` 
 - Set a key name 
 - Set a Soft Budget on when to get alerted 
 <Image img={require('../../img/create_key.png')} />
 ### 3. Test Slack Alerting on Admin UI
 After creating a key on the Admin UI, click on "Test Slack Alert" to send a test alert to your Slack channel
 <Image img={require('../../img/test_alert.png')} />
 ### 4. Check Slack 
 When the test alert works, you should expect to see this on your alerts slack channel 
 <Image img={require('../../img/budget_alerts.png')} />
--- a/docs/my-website/docs/proxy/caching.md
+++ b/docs/my-website/docs/proxy/caching.md
@ -32,8 +32,9 @@ litellm_settings:
  cache: True          # set cache responses to True, litellm defaults to using a redis cache
 ```
-#### [OPTIONAL] Step 1.5: Add redis namespaces 
+#### [OPTIONAL] Step 1.5: Add redis namespaces, default ttl 
 ## Namespace
 If you want to create some folder for your keys, you can set a namespace, like this:
 ```yaml
@ -50,6 +51,16 @@ and keys will be stored like:
 litellm_caching:<hash>
 ```
 ## TTL
 ```yaml
 litellm_settings:
  cache: true 
  cache_params:        # set cache params for redis
    type: redis
    ttl: 600 # will be cached on redis for 600s
 ```
 #### Step 2: Add Redis Credentials to .env
 Set either `REDIS_URL` or the `REDIS_HOST` in your os environment, to enable caching.
--- a/docs/my-website/docs/proxy/call_hooks.md
+++ b/docs/my-website/docs/proxy/call_hooks.md
@ -1,7 +1,10 @@
 import Image from '@theme/IdealImage';
 # Modify / Reject Incoming Requests
 - Modify data before making llm api calls on proxy
 - Reject data before making llm api calls / before returning the response 
 - Enforce 'user' param for all openai endpoint calls
 See a complete example with our [parallel request rate limiter](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py)
@ -95,7 +98,7 @@ We might need to update the function schema in the future, to support multiple e
 :::
-See a complete example with our [Llama Guard content moderation hook](https://github.com/BerriAI/litellm/blob/main/enterprise/hooks/llama_guard.py)
+See a complete example with our [Llama Guard content moderation hook](https://github.com/BerriAI/litellm/blob/main/enterprise/enterprise_hooks/llm_guard.py)
 ```python
 from litellm.integrations.custom_logger import CustomLogger
@ -173,3 +176,18 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
    ],
    }'
 ```
 ## Advanced - Enforce 'user' param 
 Set `enforce_user_param` to true, to require all calls to the openai endpoints to have the 'user' param. 
 [**See Code**](https://github.com/BerriAI/litellm/blob/4777921a31c4c70e4d87b927cb233b6a09cd8b51/litellm/proxy/auth/auth_checks.py#L72)
 ```yaml
 general_settings:
  enforce_user_param: True
 ```
 **Result**
 <Image img={require('../../img/end_user_enforcement.png')}/>
--- a/docs/my-website/docs/proxy/configs.md
+++ b/docs/my-website/docs/proxy/configs.md
@ -62,7 +62,6 @@ model_list:
 litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py
  drop_params: True
  set_verbose: True
 general_settings: 
  master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
@ -558,6 +557,16 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
 }'
 ```
 ## Disable Swagger UI 
 To disable the Swagger docs from the base url, set 
 ```env
 NO_DOCS="True"
 ```
 in your environment, and restart the proxy. 
 ## Configure DB Pool Limits + Connection Timeouts 
@ -593,6 +602,8 @@ general_settings:
    "disable_spend_logs": "boolean", # turn off writing each transaction to the db
    "disable_reset_budget": "boolean", # turn off reset budget scheduled task
    "enable_jwt_auth": "boolean", # allow proxy admin to auth in via jwt tokens with 'litellm_proxy_admin' in claims
    "enforce_user_param": "boolean", # requires all openai endpoint requests to have a 'user' param
    "allowed_routes": "list", # list of allowed proxy API routes - a user can access. (currently JWT-Auth only)
    "key_management_system": "google_kms", # either google_kms or azure_kms
    "master_key": "string",
    "database_url": "string",
--- a/docs/my-website/docs/proxy/deploy.md
+++ b/docs/my-website/docs/proxy/deploy.md
@ -103,7 +103,10 @@ RUN chmod +x entrypoint.sh
 EXPOSE 4000/tcp
 # Override the CMD instruction with your desired command and arguments
-CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug", "--run_gunicorn"]
+# WARNING: FOR PROD DO NOT USE `--detailed_debug` it slows down response times, instead use the following CMD
 # CMD ["--port", "4000", "--config", "config.yaml"]
 CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]
 ```
 </TabItem>
@ -232,7 +235,6 @@ Your OpenAI proxy server is now running on `http://127.0.0.1:4000`.
 | [LiteLLM container + Redis](#litellm-container--redis) | + load balance across multiple litellm containers |
 | [LiteLLM Database container + PostgresDB + Redis](#litellm-database-container--postgresdb--redis) | + use Virtual Keys + Track Spend + load balance across multiple litellm containers |
 ## Deploy with Database
 ### Docker, Kubernetes, Helm Chart
@ -474,25 +476,6 @@ docker run --name litellm-proxy \
 ghcr.io/berriai/litellm-database:main-latest --config your_config.yaml
 ```
 ## Best Practices for Deploying to Production
 ### 1. Switch of debug logs in production 
 don't use [`--detailed-debug`, `--debug`](https://docs.litellm.ai/docs/proxy/debugging#detailed-debug) or `litellm.set_verbose=True`. We found using debug logs can add 5-10% latency per LLM API call
 ### 2. Use `run_gunicorn` and `num_workers`
 Example setting `--run_gunicorn` and `--num_workers`
 ```shell
 docker run ghcr.io/berriai/litellm-database:main-latest --run_gunicorn --num_workers 4
 ```
 Why `Gunicorn`?
 - Gunicorn takes care of running multiple instances of your web application
 - Gunicorn is ideal for running litellm proxy on cluster of machines with Kubernetes
 Why `num_workers`? 
 Setting `num_workers` to the number of CPUs available ensures optimal utilization of system resources by matching the number of worker processes to the available CPU cores.
 ## Advanced Deployment Settings
 ### Customization of the server root path
@ -525,6 +508,57 @@ Provide an ssl certificate when starting litellm proxy server
 ## Platform-specific Guide
 <Tabs>
 <TabItem value="AWS EKS" label="AWS EKS - Kubernetes">
 ### Kubernetes - Deploy on EKS
 Step1. Create an EKS Cluster with the following spec
 ```shell
 eksctl create cluster --name=litellm-cluster --region=us-west-2 --node-type=t2.small
 ```
 Step 2. Mount litellm proxy config on kub cluster 
 This will mount your local file called `proxy_config.yaml` on kubernetes cluster
 ```shell
 kubectl create configmap litellm-config --from-file=proxy_config.yaml
 ```
 Step 3. Apply `kub.yaml` and `service.yaml`
 Clone the following `kub.yaml` and `service.yaml` files and apply locally
 - Use this `kub.yaml` file - [litellm kub.yaml](https://github.com/BerriAI/litellm/blob/main/deploy/kubernetes/kub.yaml)
 - Use this `service.yaml` file - [litellm service.yaml](https://github.com/BerriAI/litellm/blob/main/deploy/kubernetes/service.yaml)
 Apply `kub.yaml`
 ```
 kubectl apply -f kub.yaml
 ```
 Apply `service.yaml` - creates an AWS load balancer to expose the proxy
 ```
 kubectl apply -f service.yaml
 # service/litellm-service created
 ```
 Step 4. Get Proxy Base URL
 ```shell
 kubectl get services
 # litellm-service   LoadBalancer   10.100.6.31   a472dc7c273fd47fd******.us-west-2.elb.amazonaws.com   4000:30374/TCP   63m
 ```
 Proxy Base URL =  `a472dc7c273fd47fd******.us-west-2.elb.amazonaws.com:4000`
 That's it, now you can start using LiteLLM Proxy
 </TabItem>
 <TabItem value="aws-stack" label="AWS Cloud Formation Stack">
--- a/docs/my-website/docs/proxy/enterprise.md
+++ b/docs/my-website/docs/proxy/enterprise.md
@ -12,9 +12,9 @@ Features here are behind a commercial license in our `/enterprise` folder. [**Se
 :::
 Features: 
 - ✅ Content Moderation with LLM Guard
 - ✅ Content Moderation with LlamaGuard 
 - ✅ Content Moderation with Google Text Moderations 
 - ✅ Content Moderation with LLM Guard
 - ✅ Reject calls from Blocked User list 
 - ✅ Reject calls (incoming / outgoing) with Banned Keywords (e.g. competitors)
 - ✅ Don't log/store specific requests (eg confidential LLM requests)
@ -23,6 +23,71 @@ Features:
 ## Content Moderation
 ### Content Moderation with LLM Guard
 Set the LLM Guard API Base in your environment 
 ```env
 LLM_GUARD_API_BASE = "http://0.0.0.0:8192" # deployed llm guard api
 ```
 Add `llmguard_moderations` as a callback 
 ```yaml
 litellm_settings:
    callbacks: ["llmguard_moderations"]
 ```
 Now you can easily test it
 - Make a regular /chat/completion call 
 - Check your proxy logs for any statement with `LLM Guard:`
 Expected results: 
 ```
 LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
 ```
 #### Turn on/off per key
 **1. Update config**
 ```yaml
 litellm_settings:
    callbacks: ["llmguard_moderations"]
    llm_guard_mode: "key-specific"
 ```
 **2. Create new key**
 ```bash
 curl --location 'http://localhost:4000/key/generate' \
 --header 'Authorization: Bearer sk-1234' \
 --header 'Content-Type: application/json' \
 --data '{
    "models": ["fake-openai-endpoint"],
    "permissions": {
        "enable_llm_guard_check": true # 👈 KEY CHANGE
    }
 }'
 # Returns {..'key': 'my-new-key'}
 ```
 **2. Test it!**
 ```bash
 curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
 --header 'Authorization: Bearer my-new-key' \ # 👈 TEST KEY
 --data '{"model": "fake-openai-endpoint", "messages": [
        {"role": "system", "content": "Be helpful"},
        {"role": "user", "content": "What do you know?"}
    ]
    }'
 ```
 ### Content Moderation with LlamaGuard 
 Currently works with Sagemaker's LlamaGuard endpoint. 
@ -55,32 +120,7 @@ callbacks: ["llamaguard_moderations"]
  llamaguard_unsafe_content_categories: /path/to/llamaguard_prompt.txt
 ```
 ### Content Moderation with LLM Guard
 Set the LLM Guard API Base in your environment 
 ```env
 LLM_GUARD_API_BASE = "http://0.0.0.0:4000"
 ```
 Add `llmguard_moderations` as a callback 
 ```yaml
 litellm_settings:
    callbacks: ["llmguard_moderations"]
 ```
 Now you can easily test it
 - Make a regular /chat/completion call 
 - Check your proxy logs for any statement with `LLM Guard:`
 Expected results: 
 ```
 LLM Guard: Received response - {"sanitized_prompt": "hello world", "is_valid": true, "scanners": { "Regex": 0.0 }}
 ```
 ### Content Moderation with Google Text Moderation 
--- a/docs/my-website/docs/proxy/grafana_metrics.md
+++ b/docs/my-website/docs/proxy/grafana_metrics.md
@ -0,0 +1,53 @@
 # Grafana, Prometheus metrics [BETA]
 LiteLLM Exposes a `/metrics` endpoint for Prometheus to Poll
 ## Quick Start
 If you're using the LiteLLM CLI with `litellm --config proxy_config.yaml` then you need to `pip install prometheus_client==0.20.0`. **This is already pre-installed on the litellm Docker image**
 Add this to your proxy config.yaml 
 ```yaml
 model_list:
 - model_name: gpt-3.5-turbo
    litellm_params:
      model: gpt-3.5-turbo
 litellm_settings:
  success_callback: ["prometheus"]
 ```
 Start the proxy
 ```shell
 litellm --config config.yaml --debug
 ```
 Test Request
 ```shell
 curl --location 'http://0.0.0.0:4000/chat/completions' \
    --header 'Content-Type: application/json' \
    --data '{
    "model": "gpt-3.5-turbo",
    "messages": [
        {
        "role": "user",
        "content": "what llm are you"
        }
    ]
 }'
 ```
 View Metrics on `/metrics`, Visit `http://localhost:4000/metrics` 
 ```shell
 http://localhost:4000/metrics
 # <proxy_base_url>/metrics
 ```
 ## Metrics Tracked 
 | Metric Name          | Description                          |
 |----------------------|--------------------------------------|
 | `litellm_requests_metric`             | Number of requests made, per `"user", "key", "model"`          |
 | `litellm_spend_metric`                | Total Spend, per `"user", "key", "model"`                 |
 | `litellm_total_tokens`         | input + output tokens per `"user", "key", "model"`     |
--- a/docs/my-website/docs/proxy/prod.md
+++ b/docs/my-website/docs/proxy/prod.md
@ -0,0 +1,249 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # ⚡ Best Practices for Production
 Expected Performance in Production
 1 LiteLLM Uvicorn Worker on Kubernetes
 | Description | Value |
 |--------------|-------|
 | Avg latency | `50ms` |
 | Median latency | `51ms` |
 | `/chat/completions` Requests/second | `35` |
 | `/chat/completions` Requests/minute | `2100` |
 | `/chat/completions` Requests/hour | `126K` |
 ## 1. Switch of Debug Logging
 Remove `set_verbose: True` from your config.yaml
 ```yaml
 litellm_settings:
  set_verbose: True
 ```
 You should only see the following level of details in logs on the proxy server
 ```shell
 # INFO:     192.168.2.205:11774 - "POST /chat/completions HTTP/1.1" 200 OK
 # INFO:     192.168.2.205:34717 - "POST /chat/completions HTTP/1.1" 200 OK
 # INFO:     192.168.2.205:29734 - "POST /chat/completions HTTP/1.1" 200 OK
 ```
 ## 2. On Kubernetes - Use 1 Uvicorn worker [Suggested CMD]
 Use this Docker `CMD`. This will start the proxy with 1 Uvicorn Async Worker
 (Ensure that you're not setting `run_gunicorn` or `num_workers` in the CMD). 
 ```shell
 CMD ["--port", "4000", "--config", "./proxy_server_config.yaml"]
 ```
 ## 2. Batch write spend updates every 60s
 The default proxy batch write is 10s. This is to make it easy to see spend when debugging locally. 
 In production, we recommend using a longer interval period of 60s. This reduces the number of connections used to make DB writes. 
 ```yaml
 general_settings:
  master_key: sk-1234
  proxy_batch_write_at: 5 # 👈 Frequency of batch writing logs to server (in seconds)
 ```
 ## 3. Move spend logs to separate server
 Writing each spend log to the db can slow down your proxy. In testing we saw a 70% improvement in median response time, by moving writing spend logs to a separate server. 
 👉 [LiteLLM Spend Logs Server](https://github.com/BerriAI/litellm/tree/main/litellm-js/spend-logs)
 **Spend Logs**  
 This is a log of the key, tokens, model, and latency for each call on the proxy. 
 [**Full Payload**](https://github.com/BerriAI/litellm/blob/8c9623a6bc4ad9da0a2dac64249a60ed8da719e8/litellm/proxy/utils.py#L1769)
 **1. Start the spend logs server**
 ```bash
 docker run -p 3000:3000 \
  -e DATABASE_URL="postgres://.." \
  ghcr.io/berriai/litellm-spend_logs:main-latest
 # RUNNING on http://0.0.0.0:3000
 ```
 **2. Connect to proxy**
 Example litellm_config.yaml
 ```yaml
 model_list:
 - model_name: fake-openai-endpoint
  litellm_params:
    model: openai/my-fake-model
    api_key: my-fake-key
    api_base: https://exampleopenaiendpoint-production.up.railway.app/
 general_settings:
  master_key: sk-1234
  proxy_batch_write_at: 5 # 👈 Frequency of batch writing logs to server (in seconds)
 ```
 Add `SPEND_LOGS_URL` as an environment variable when starting the proxy 
 ```bash
 docker run \
    -v $(pwd)/litellm_config.yaml:/app/config.yaml \
    -e DATABASE_URL="postgresql://.." \
    -e SPEND_LOGS_URL="http://host.docker.internal:3000" \ # 👈 KEY CHANGE
    -p 4000:4000 \
    ghcr.io/berriai/litellm:main-latest \
    --config /app/config.yaml --detailed_debug
 # Running on http://0.0.0.0:4000
 ```
 **3. Test Proxy!**
 ```bash
 curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
 --header 'Authorization: Bearer sk-1234' \
 --data '{
    "model": "fake-openai-endpoint", 
    "messages": [
        {"role": "system", "content": "Be helpful"},
        {"role": "user", "content": "What do you know?"}
    ]
 }'
 ```
 In your LiteLLM Spend Logs Server, you should see
 **Expected Response**
 ```
 Received and stored 1 logs. Total logs in memory: 1
 ...
 Flushed 1 log to the DB.
 ```
 ### Machine Specification
 A t2.micro should be sufficient to handle 1k logs / minute on this server. 
 This consumes at max 120MB, and <0.1 vCPU. 
 ## 4. Switch off resetting budgets
 Add this to your config.yaml. (Only spend per Key, User and Team will be tracked - spend per API Call will not be written to the LiteLLM Database)
 ```yaml
 general_settings:
  disable_spend_logs: true
  disable_reset_budget: true
 ```
 ## 5. Switch of `litellm.telemetry`
 Switch of all telemetry tracking done by litellm
 ```yaml
 litellm_settings:
  telemetry: False
 ```
 ## Machine Specifications to Deploy LiteLLM
 | Service | Spec | CPUs | Memory | Architecture | Version|
 | --- | --- | --- | --- | --- | --- | 
 | Server | `t2.small`. | `1vCPUs` | `8GB` | `x86` |
 | Redis Cache | - | - | - | - | 7.0+ Redis Engine|
 ## Reference Kubernetes Deployment YAML
 Reference Kubernetes `deployment.yaml` that was load tested by us
 ```yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: litellm-deployment
 spec:
  replicas: 3
  selector:
    matchLabels:
      app: litellm
  template:
    metadata:
      labels:
        app: litellm
    spec:
      containers:
        - name: litellm-container
          image: ghcr.io/berriai/litellm:main-latest
          imagePullPolicy: Always
          env:
            - name: AZURE_API_KEY
              value: "d6******"
            - name: AZURE_API_BASE
              value: "https://ope******"
            - name: LITELLM_MASTER_KEY
              value: "sk-1234"
            - name: DATABASE_URL
              value: "po**********"
          args:
            - "--config"
            - "/app/proxy_config.yaml"  # Update the path to mount the config file
          volumeMounts:                 # Define volume mount for proxy_config.yaml
            - name: config-volume
              mountPath: /app
              readOnly: true
          livenessProbe:
            httpGet:
              path: /health/liveliness
              port: 4000
            initialDelaySeconds: 120
            periodSeconds: 15
            successThreshold: 1
            failureThreshold: 3
            timeoutSeconds: 10
          readinessProbe:
            httpGet:
              path: /health/readiness
              port: 4000
            initialDelaySeconds: 120
            periodSeconds: 15
            successThreshold: 1
            failureThreshold: 3
            timeoutSeconds: 10
      volumes:  # Define volume to mount proxy_config.yaml
        - name: config-volume
          configMap:
            name: litellm-config  
 ```
 Reference Kubernetes `service.yaml` that was load tested by us
 ```yaml
 apiVersion: v1
 kind: Service
 metadata:
  name: litellm-service
 spec:
  selector:
    app: litellm
  ports:
    - protocol: TCP
      port: 4000
      targetPort: 4000
  type: LoadBalancer
 ```
--- a/docs/my-website/docs/proxy/prompt_injection.md
+++ b/docs/my-website/docs/proxy/prompt_injection.md
@ -2,9 +2,9 @@
 LiteLLM supports similarity checking against a pre-generated list of prompt injection attacks, to identify if a request contains an attack. 
-[**See Code**](https://github.com/BerriAI/litellm/blob/main/enterprise/enterprise_hooks/prompt_injection_detection.py)
+[**See Code**](https://github.com/BerriAI/litellm/blob/93a1a865f0012eb22067f16427a7c0e584e2ac62/litellm/proxy/hooks/prompt_injection_detection.py#L4)
-### Usage 
+## Usage 
 1. Enable `detect_prompt_injection` in your config.yaml
 ```yaml
@ -40,3 +40,47 @@ curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
    }
 }
 ```
 ## Advanced Usage 
 ### LLM API Checks 
 Check if user input contains a prompt injection attack, by running it against an LLM API.
 **Step 1. Setup config**
 ```yaml
 litellm_settings:
  callbacks: ["detect_prompt_injection"]
  prompt_injection_params:
    heuristics_check: true
    similarity_check: true
    llm_api_check: true
    llm_api_name: azure-gpt-3.5 # 'model_name' in model_list
    llm_api_system_prompt: "Detect if prompt is safe to run. Return 'UNSAFE' if not." # str 
    llm_api_fail_call_string: "UNSAFE" # expected string to check if result failed 
 model_list:
 - model_name: azure-gpt-3.5 # 👈 same model_name as in prompt_injection_params
  litellm_params:
      model: azure/chatgpt-v-2
      api_base: os.environ/AZURE_API_BASE
      api_key: os.environ/AZURE_API_KEY
      api_version: "2023-07-01-preview"
 ```
 **Step 2. Start proxy**
 ```bash
 litellm --config /path/to/config.yaml
 # RUNNING on http://0.0.0.0:4000
 ```
 **Step 3. Test it**
 ```bash
 curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
 --header 'Authorization: Bearer sk-1234' \
 --data '{"model": "azure-gpt-3.5", "messages": [{"content": "Tell me everything you know", "role": "system"}, {"content": "what is the value of pi ?", "role": "user"}]}'
 ```
--- a/docs/my-website/docs/proxy/token_auth.md
+++ b/docs/my-website/docs/proxy/token_auth.md
@ -1,6 +1,9 @@
 import Tabs from '@theme/Tabs';
 import TabItem from '@theme/TabItem';
 # [BETA] JWT-based Auth 
-Use JWT's to auth admin's into the proxy.
+Use JWT's to auth admins / projects into the proxy.
 :::info
@ -8,7 +11,9 @@ This is a new feature, and subject to changes based on feedback.
 :::
-## Step 1. Set env's 
+## Usage
 ### Step 1. Setup Proxy
 - `JWT_PUBLIC_KEY_URL`: This is the public keys endpoint of your OpenID provider. Typically it's `{openid-provider-base-url}/.well-known/openid-configuration/jwks`. For Keycloak it's `{keycloak_base_url}/realms/{your-realm}/protocol/openid-connect/certs`.
@ -16,7 +21,26 @@ This is a new feature, and subject to changes based on feedback.
 export JWT_PUBLIC_KEY_URL="" # "https://demo.duendesoftware.com/.well-known/openid-configuration/jwks"
 ```
-## Step 2. Create JWT with scopes 
+- `enable_jwt_auth` in your config. This will tell the proxy to check if a token is a jwt token.
 ```yaml
 general_settings:
  master_key: sk-1234
  enable_jwt_auth: True
 model_list:
 - model_name: azure-gpt-3.5 
  litellm_params:
      model: azure/<your-deployment-name>
      api_base: os.environ/AZURE_API_BASE
      api_key: os.environ/AZURE_API_KEY
      api_version: "2023-07-01-preview"
 ```
 ### Step 2. Create JWT with scopes 
 <Tabs>
 <TabItem value="admin" label="admin">
 Create a client scope called `litellm_proxy_admin` in your OpenID provider (e.g. Keycloak).
@ -32,8 +56,26 @@ curl --location ' 'https://demo.duendesoftware.com/connect/token'' \
 --data-urlencode 'grant_type=password' \
 --data-urlencode 'scope=litellm_proxy_admin' # 👈 grant this scope
 ```
 </TabItem>
 <TabItem value="project" label="project">
-## Step 3. Create a proxy key with JWT 
+Create a JWT for your project on your OpenID provider (e.g. Keycloak).
 ```bash
 curl --location ' 'https://demo.duendesoftware.com/connect/token'' \
 --header 'Content-Type: application/x-www-form-urlencoded' \
 --data-urlencode 'client_id={CLIENT_ID}' \ # 👈 project id
 --data-urlencode 'client_secret={CLIENT_SECRET}' \
 --data-urlencode 'grant_type=client_credential' \
 ```
 </TabItem>
 </Tabs>
 ### Step 3. Test your JWT 
 <Tabs>
 <TabItem value="key" label="/key/generate">
 ```bash
 curl --location '{proxy_base_url}/key/generate' \
@ -41,3 +83,132 @@ curl --location '{proxy_base_url}/key/generate' \
 --header 'Content-Type: application/json' \
 --data '{}'
 ```
 </TabItem>
 <TabItem value="llm_call" label="/chat/completions">
 ```bash
 curl --location 'http://0.0.0.0:4000/v1/chat/completions' \
 --header 'Content-Type: application/json' \
 --header 'Authorization: Bearer eyJhbGciOiJSUzI1...' \
 --data '{"model": "azure-gpt-3.5", "messages": [ { "role": "user", "content": "What's the weather like in Boston today?" } ]}'
 ```
 </TabItem>
 </Tabs>
 ## Advanced - Set Accepted JWT Scope Names 
 Change the string in JWT 'scopes', that litellm evaluates to see if a user has admin access.
 ```yaml
 general_settings:
  master_key: sk-1234
  enable_jwt_auth: True
  litellm_jwtauth:
    admin_jwt_scope: "litellm-proxy-admin"
 ```
 ### JWT Scopes
 Here's what scopes on JWT-Auth tokens look like
 **Can be a list**
 ```
 scope: ["litellm-proxy-admin",...]
 ```
 **Can be a space-separated string**
 ```
 scope: "litellm-proxy-admin ..."
 ```
 ## Advanced - Allowed Routes 
 Configure which routes a JWT can access via the config.
 By default: 
 - Admins: can access only management routes (`/team/*`, `/key/*`, `/user/*`)
 - Teams: can access only openai routes (`/chat/completions`, etc.)+ info routes (`/*/info`)
 [**See Code**](https://github.com/BerriAI/litellm/blob/b204f0c01c703317d812a1553363ab0cb989d5b6/litellm/proxy/_types.py#L95)
 **Admin Routes**
 ```yaml
 general_settings:
  master_key: sk-1234
  enable_jwt_auth: True
  litellm_jwtauth:
    admin_jwt_scope: "litellm-proxy-admin"
    admin_allowed_routes: ["/v1/embeddings"]
 ```
 **Team Routes**
 ```yaml
 general_settings:
  master_key: sk-1234
  enable_jwt_auth: True
  litellm_jwtauth:
    ...
    team_jwt_scope: "litellm-team" # 👈 Set JWT Scope string
    team_allowed_routes: ["/v1/chat/completions"] # 👈 Set accepted routes
 ```
 ## Advanced - Caching Public Keys 
 Control how long public keys are cached for (in seconds).
 ```yaml
 general_settings:
  master_key: sk-1234
  enable_jwt_auth: True
  litellm_jwtauth:
    admin_jwt_scope: "litellm-proxy-admin"
    admin_allowed_routes: ["/v1/embeddings"]
    public_key_ttl: 600 # 👈 KEY CHANGE
 ```
 ## Advanced - Custom JWT Field 
 Set a custom field in which the team_id exists. By default, the 'client_id' field is checked. 
 ```yaml
 general_settings:
  master_key: sk-1234
  enable_jwt_auth: True
  litellm_jwtauth:
    team_id_jwt_field: "client_id" # 👈 KEY CHANGE
 ```
 ## All Params
 [**See Code**](https://github.com/BerriAI/litellm/blob/b204f0c01c703317d812a1553363ab0cb989d5b6/litellm/proxy/_types.py#L95)
 ## Advanced - Block Teams 
 To block all requests for a certain team id, use `/team/block`
 **Block Team**
 ```bash
 curl --location 'http://0.0.0.0:4000/team/block' \
 --header 'Authorization: Bearer <admin-token>' \
 --header 'Content-Type: application/json' \
 --data '{
    "team_id": "litellm-test-client-id-new" # 👈 set team id
 }'
 ```
 **Unblock Team**
 ```bash
 curl --location 'http://0.0.0.0:4000/team/unblock' \
 --header 'Authorization: Bearer <admin-token>' \
 --header 'Content-Type: application/json' \
 --data '{
    "team_id": "litellm-test-client-id-new" # 👈 set team id
 }'
 ```
--- a/docs/my-website/docs/proxy/ui.md
+++ b/docs/my-website/docs/proxy/ui.md
@ -47,8 +47,9 @@ Your Proxy Swagger is available on the root of the Proxy: e.g.: `http://localhos
 Set the following in your .env on the Proxy
 ```shell
-UI_USERNAME=ishaan-litellm
+LITELLM_MASTER_KEY="sk-1234" # this is your master key for using the proxy server
-UI_PASSWORD=langchain
+UI_USERNAME=ishaan-litellm   # username to sign in on UI
 UI_PASSWORD=langchain        # password to sign in on UI
 ```
 On accessing the LiteLLM UI, you will be prompted to enter your username, password
--- a/docs/my-website/docs/proxy/virtual_keys.md
+++ b/docs/my-website/docs/proxy/virtual_keys.md
@ -1,14 +1,14 @@
-# 🔑 Virtual Keys, Users
+import Tabs from '@theme/Tabs';
-Track Spend, Set budgets and create virtual keys for the proxy
+import TabItem from '@theme/TabItem';
 Grant other's temporary access to your proxy, with keys that expire after a set duration.
 # 🔑 Virtual Keys
 Track Spend, and control model access via virtual keys for the proxy
 :::info
 - 🔑 [UI to Generate, Edit, Delete Keys (with SSO)](https://docs.litellm.ai/docs/proxy/ui)
 - [Deploy LiteLLM Proxy with Key Management](https://docs.litellm.ai/docs/proxy/deploy#deploy-with-database)
- Dockerfile.database for LiteLLM Proxy + Key Management [here](https://github.com/BerriAI/litellm/blob/main/Dockerfile.database)
+- [Dockerfile.database for LiteLLM Proxy + Key Management](https://github.com/BerriAI/litellm/blob/main/Dockerfile.database)
 :::
@ -30,7 +30,7 @@ export DATABASE_URL=postgresql://<user>:<password>@<host>:<port>/<dbname>
 ```
-You can then generate temporary keys by hitting the `/key/generate` endpoint.
+You can then generate keys by hitting the `/key/generate` endpoint.
 [**See code**](https://github.com/BerriAI/litellm/blob/7a669a36d2689c7f7890bc9c93e04ff3c2641299/litellm/proxy/proxy_server.py#L672)
@ -46,8 +46,8 @@ model_list:
        model: ollama/llama2
 general_settings: 
-  master_key: sk-1234 # [OPTIONAL] if set all calls to proxy will require either this key or a valid generated token
+  master_key: sk-1234 
-  database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>"
+  database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # 👈 KEY CHANGE
 ```
 **Step 2: Start litellm**
@ -56,62 +56,220 @@ general_settings:
 litellm --config /path/to/config.yaml
 ```
-**Step 3: Generate temporary keys**
+**Step 3: Generate keys**
 ```shell 
 curl 'http://0.0.0.0:4000/key/generate' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4", "claude-2"], "duration": "20m","metadata": {"user": "ishaan@berri.ai"}}'
+--data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"], "metadata": {"user": "ishaan@berri.ai"}}'
 ```
 ## Advanced - Spend Tracking 
-## /key/generate
+Get spend per:
 - key - via `/key/info` [Swagger](https://litellm-api.up.railway.app/#/key%20management/info_key_fn_key_info_get)
 - user - via `/user/info` [Swagger](https://litellm-api.up.railway.app/#/user%20management/user_info_user_info_get)
 - team - via `/team/info` [Swagger](https://litellm-api.up.railway.app/#/team%20management/team_info_team_info_get)  
 - ⏳ end-users - via `/end_user/info` - [Comment on this issue for end-user cost tracking](https://github.com/BerriAI/litellm/issues/2633)
-### Request
+**How is it calculated?**
-```shell
+
-curl 'http://0.0.0.0:4000/key/generate' \
+The cost per model is stored [here](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) and calculated by the [`completion_cost`](https://github.com/BerriAI/litellm/blob/db7974f9f216ee50b53c53120d1e3fc064173b60/litellm/utils.py#L3771) function.
--header 'Authorization: Bearer <your-master-key>' \
+
--header 'Content-Type: application/json' \
+**How is it tracking?**
--data-raw '{
+
-  "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
+Spend is automatically tracked for the key in the "LiteLLM_VerificationTokenTable". If the key has an attached 'user_id' or 'team_id', the spend for that user is tracked in the "LiteLLM_UserTable", and team in the "LiteLLM_TeamTable".
-  "duration": "20m",
+
-  "metadata": {"user": "ishaan@berri.ai"},
+<Tabs>
-  "team_id": "core-infra",
+<TabItem value="key-info" label="Key Spend">
-  "max_budget": 10,
+
-  "soft_budget": 5,
+You can get spend for a key by using the `/key/info` endpoint. 
-}'
+
 ```bash
 curl 'http://0.0.0.0:4000/key/info?key=<user-key>' \
     -X GET \
     -H 'Authorization: Bearer <your-master-key>'
 ```
 This is automatically updated (in USD) when calls are made to /completions, /chat/completions, /embeddings using litellm's completion_cost() function. [**See Code**](https://github.com/BerriAI/litellm/blob/1a6ea20a0bb66491968907c2bfaabb7fe45fc064/litellm/utils.py#L1654). 
-Request Params:
+**Sample response**
 - `duration`: *Optional[str]* - Specify the length of time the token is valid for. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
 - `key_alias`: *Optional[str]* - User defined key alias
 - `team_id`: *Optional[str]* - The team id of the user
 - `models`: *Optional[list]* - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
 - `aliases`: *Optional[dict]* - Any alias mappings, on top of anything in the config.yaml model list. - https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---upgradedowngrade-models
 - `config`: *Optional[dict]* - any key-specific configs, overrides config in config.yaml
 - `spend`: *Optional[int]* - Amount spent by key. Default is 0. Will be updated by proxy whenever key is used. https://docs.litellm.ai/docs/proxy/virtual_keys#managing-auth---tracking-spend
 - `max_budget`: *Optional[float]* - Specify max budget for a given key.
 - `soft_budget`: *Optional[float]* - Specify soft limit budget for a given key. Get Alerts when key hits its soft budget
 - `model_max_budget`: *Optional[dict[str, float]]* - Specify max budget for each model, `model_max_budget={"gpt4": 0.5, "gpt-5": 0.01}`
 - `max_parallel_requests`: *Optional[int]* - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
 - `metadata`: *Optional[dict]* - Metadata for key, store information for key. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
 ### Response
 ```python
 {
-    "key": "sk-kdEXbIqZRwEeEiHwdg7sFA", # Bearer token
+    "key": "sk-tXL0wt5-lOOVK9sfY2UacA",
-    "expires": "2023-11-19T01:38:25.834000+00:00" # datetime object
+    "info": {
-    "key_name": "sk-...7sFA" # abbreviated key string, ONLY stored in db if `allow_user_auth: true` set - [see](./ui.md)
+        "token": "sk-tXL0wt5-lOOVK9sfY2UacA",
-    ...
+        "spend": 0.0001065, # 👈 SPEND
        "expires": "2023-11-24T23:19:11.131000Z",
        "models": [
            "gpt-3.5-turbo",
            "gpt-4",
            "claude-2"
        ],
        "aliases": {
            "mistral-7b": "gpt-3.5-turbo"
        },
        "config": {}
    }
 }
 ```
-### Upgrade/Downgrade Models 
+</TabItem>
 <TabItem value="user-info" label="User Spend">
 **1. Create a user**
 ```bash
 curl --location 'http://localhost:4000/user/new' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{user_email: "krrish@berri.ai"}' 
 ```
 **Expected Response**
 ```bash
 {
    ...
    "expires": "2023-12-22T09:53:13.861000Z",
    "user_id": "my-unique-id", # 👈 unique id
    "max_budget": 0.0
 }
 ```
 **2. Create a key for that user**
 ```bash
 curl 'http://0.0.0.0:4000/key/generate' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"], "user_id": "my-unique-id"}'
 ```
 Returns a key - `sk-...`.
 **3. See spend for user**
 ```bash
 curl 'http://0.0.0.0:4000/user/info?user_id=my-unique-id' \
     -X GET \
     -H 'Authorization: Bearer <your-master-key>'
 ```
 Expected Response
 ```bash
 {
  ...
  "spend": 0 # 👈 SPEND
 }
 ```
 </TabItem>
 <TabItem value="team-info" label="Team Spend">
 Use teams, if you want keys to be owned by multiple people (e.g. for a production app).
 **1. Create a team**
 ```bash
 curl --location 'http://localhost:4000/team/new' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{"team_alias": "my-awesome-team"}' 
 ```
 **Expected Response**
 ```bash
 {
    ...
    "expires": "2023-12-22T09:53:13.861000Z",
    "team_id": "my-unique-id", # 👈 unique id
    "max_budget": 0.0
 }
 ```
 **2. Create a key for that team**
 ```bash
 curl 'http://0.0.0.0:4000/key/generate' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{"models": ["gpt-3.5-turbo", "gpt-4"], "team_id": "my-unique-id"}'
 ```
 Returns a key - `sk-...`.
 **3. See spend for team**
 ```bash
 curl 'http://0.0.0.0:4000/team/info?team_id=my-unique-id' \
     -X GET \
     -H 'Authorization: Bearer <your-master-key>'
 ```
 Expected Response
 ```bash
 {
  ...
  "spend": 0 # 👈 SPEND
 }
 ```
 </TabItem>
 </Tabs>
 ## Advanced - Model Access
 ### Restrict models by `team_id`
 `litellm-dev` can only access `azure-gpt-3.5`
 **1. Create a team via `/team/new`**
 ```shell
 curl --location 'http://localhost:4000/team/new' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{
  "team_alias": "litellm-dev",
  "models": ["azure-gpt-3.5"]
 }' 
 # returns {...,"team_id": "my-unique-id"}
 ```
 **2. Create a key for team**
 ```shell
 curl --location 'http://localhost:4000/key/generate' \
 --header 'Authorization: Bearer sk-1234' \
 --header 'Content-Type: application/json' \
 --data-raw '{"team_id": "my-unique-id"}'
 ```
 **3. Test it**
 ```shell
 curl --location 'http://0.0.0.0:4000/chat/completions' \
    --header 'Content-Type: application/json' \
    --header 'Authorization: Bearer sk-qo992IjKOC2CHKZGRoJIGA' \
    --data '{
        "model": "BEDROCK_GROUP",
        "messages": [
            {
                "role": "user",
                "content": "hi"
            }
        ]
    }'
 ```
 ```shell
 {"error":{"message":"Invalid model for team litellm-dev: BEDROCK_GROUP.  Valid models for team are: ['azure-gpt-3.5']\n\n\nTraceback (most recent call last):\n  File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/proxy_server.py\", line 2298, in chat_completion\n    _is_valid_team_configs(\n  File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/utils.py\", line 1296, in _is_valid_team_configs\n    raise Exception(\nException: Invalid model for team litellm-dev: BEDROCK_GROUP.  Valid models for team are: ['azure-gpt-3.5']\n\n","type":"None","param":"None","code":500}}%            
 ```         
 ### Model Aliases
 If a user is expected to use a given model (i.e. gpt3-5), and you want to:
@ -189,419 +347,7 @@ curl --location 'http://localhost:4000/key/generate' \
 			"max_budget": 0,}'
 ```
-
+## Advanced - Custom Auth 
 ## /key/info
 ### Request
 ```shell
 curl -X GET "http://0.0.0.0:4000/key/info?key=sk-02Wr4IAlN3NvPXvL5JVvDA" \
 -H "Authorization: Bearer sk-1234"
 ```
 Request Params:
 - key: str - The key you want the info for
 ### Response
 `token` is the hashed key (The DB stores the hashed key for security)
 ```json
 {
  "key": "sk-02Wr4IAlN3NvPXvL5JVvDA",
  "info": {
    "token": "80321a12d03412c527f2bd9db5fabd746abead2e1d50b435a534432fbaca9ef5",
    "spend": 0.0,
    "expires": "2024-01-18T23:52:09.125000+00:00",
    "models": ["azure-gpt-3.5", "azure-embedding-model"],
    "aliases": {},
    "config": {},
    "user_id": "ishaan2@berri.ai",
    "team_id": "None",
    "max_parallel_requests": null,
    "metadata": {}
  }
 }
 ```
 ## /key/update
 ### Request
 ```shell
 curl 'http://0.0.0.0:4000/key/update' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{
  "key": "sk-kdEXbIqZRwEeEiHwdg7sFA",
  "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
  "metadata": {"user": "ishaan@berri.ai"},
  "team_id": "core-infra"
 }'
 ```
 Request Params:
 - key: str - The key that needs to be updated.
 - models: list or null (optional) - Specify the models a token has access to. If null, then the token has access to all models on the server.
 - metadata: dict or null (optional) - Pass metadata for the updated token. If null, defaults to an empty dictionary.
 - team_id: str or null (optional) - Specify the team_id for the associated key.
 ### Response
 ```json
 {
  "key": "sk-kdEXbIqZRwEeEiHwdg7sFA",
  "models": ["gpt-3.5-turbo", "gpt-4", "claude-2"],
  "metadata": {
    "user": "ishaan@berri.ai"
  }
 }
 ```
 ## /key/delete
 ### Request
 ```shell
 curl 'http://0.0.0.0:4000/key/delete' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{
  "keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"]
 }'
 ```
 Request Params:
 - keys: List[str] - List of keys to delete
 ### Response
 ```json
 {
  "deleted_keys": ["sk-kdEXbIqZRwEeEiHwdg7sFA"]
 }
 ```
 ## /user/new
 ### Request
 All [key/generate params supported](#keygenerate) for creating a user
 ```shell
 curl 'http://0.0.0.0:4000/user/new' \
 --header 'Authorization: Bearer sk-1234' \
 --header 'Content-Type: application/json' \
 --data-raw '{
  "user_id": "ishaan1",
  "user_email": "ishaan@litellm.ai",
  "user_role": "admin",
  "team_id": "cto-team",
  "max_budget": 20,
  "budget_duration": "1h"
 }'
 ```
 Request Params:
 - user_id: str (optional - defaults to uuid)  - The unique identifier for the user.
 - user_email: str (optional - defaults to "")  - The email address associated with the user.
 - user_role: str (optional - defaults to "app_user") - The role assigned to the user. Can be "admin", "app_owner", "app_user"
 **Possible `user_role` values**
 ```
 "admin" - Maintaining the proxy and owning the overall budget
 "app_owner" - employees maintaining the apps, each owner may own more than one app
 "app_user" - users who know nothing about the proxy. These users get created when you pass `user` to /chat/completions
 ```
 - team_id: str (optional - defaults to "") - The identifier for the team to which the user belongs.
 - max_budget: float (optional - defaults to `null`) - The maximum budget allocated for the user. No budget checks done if `max_budget==null`
 - budget_duration: str (optional - defaults to `null`) - The duration for which the budget is valid, e.g., "1h", "1d"
 ### Response
 A key will be generated for the new user created
 ```shell
 {
  "models": [],
  "spend": 0.0,
  "max_budget": null,
  "user_id": "ishaan1",
  "team_id": null,
  "max_parallel_requests": null,
  "metadata": {},
  "tpm_limit": null,
  "rpm_limit": null,
  "budget_duration": null,
  "allowed_cache_controls": [],
  "key_alias": null,
  "duration": null,
  "aliases": {},
  "config": {},
  "key": "sk-JflB33ucTqc2NYvNAgiBCA",
  "key_name": null,
  "expires": null
 }
 ```
 ## /user/info
 ### Request
 #### View all Users
 If you're trying to view all users, we recommend using pagination with the following args
 - `view_all=true`
 - `page=0` Optional(int) min = 0, default=0
 - `page_size=25` Optional(int) min = 1, default = 25
 ```shell
 curl -X GET "http://0.0.0.0:4000/user/info?view_all=true&page=0&page_size=25" -H "Authorization: Bearer sk-1234"
 ```
 #### View specific user_id
 ```shell
 curl -X GET "http://0.0.0.0:4000/user/info?user_id=228da235-eef0-4c30-bf53-5d6ac0d278c2" -H "Authorization: Bearer sk-1234"
 ```
 ### Response
 View user spend, budget, models, keys and teams 
 ```json
 {
  "user_id": "228da235-eef0-4c30-bf53-5d6ac0d278c2",
  "user_info": {
    "user_id": "228da235-eef0-4c30-bf53-5d6ac0d278c2",
    "team_id": null,
    "teams": [],
    "user_role": "app_user",
    "max_budget": null,
    "spend": 200000.0,
    "user_email": null,
    "models": [],
    "max_parallel_requests": null,
    "tpm_limit": null,
    "rpm_limit": null,
    "budget_duration": null,
    "budget_reset_at": null,
    "allowed_cache_controls": [],
    "model_spend": {
      "chatgpt-v-2": 200000
    },
    "model_max_budget": {}
  },
  "keys": [
    {
      "token": "16c337f9df00a0e6472627e39a2ed02e67bc9a8a760c983c4e9b8cad7954f3c0",
      "key_name": null,
      "key_alias": null,
      "spend": 200000.0,
      "expires": null,
      "models": [],
      "aliases": {},
      "config": {},
      "user_id": "228da235-eef0-4c30-bf53-5d6ac0d278c2",
      "team_id": null,
      "permissions": {},
      "max_parallel_requests": null,
      "metadata": {},
      "tpm_limit": null,
      "rpm_limit": null,
      "max_budget": null,
      "budget_duration": null,
      "budget_reset_at": null,
      "allowed_cache_controls": [],
      "model_spend": {
        "chatgpt-v-2": 200000
      },
      "model_max_budget": {}
    }
  ],
  "teams": []
 }
 ```
 ## Advanced 
 ### Upperbound /key/generate params
 Use this, if you need to control the upperbound that users can use for `max_budget`, `budget_duration` or any `key/generate` param per key. 
 Set `litellm_settings:upperbound_key_generate_params`:
 ```yaml
 litellm_settings:
  upperbound_key_generate_params:
    max_budget: 100 # upperbound of $100, for all /key/generate requests
    duration: "30d" # upperbound of 30 days for all /key/generate requests
 ```
 ** Expected Behavior **
 - Send a `/key/generate` request with `max_budget=200`
 - Key will be created with `max_budget=100` since 100 is the upper bound
 ### Default /key/generate params
 Use this, if you need to control the default `max_budget` or any `key/generate` param per key. 
 When a `/key/generate` request does not specify `max_budget`, it will use the `max_budget` specified in `default_key_generate_params`
 Set `litellm_settings:default_key_generate_params`:
 ```yaml
 litellm_settings:
  default_key_generate_params:
    max_budget: 1.5000
    models: ["azure-gpt-3.5"]
    duration:     # blank means `null`
    metadata: {"setting":"default"}
    team_id: "core-infra"
 ```
 ### Restrict models by `team_id`
 `litellm-dev` can only access `azure-gpt-3.5`
 ```yaml
 litellm_settings:
  default_team_settings:
    - team_id: litellm-dev
      models: ["azure-gpt-3.5"]
 ```
 #### Create key with team_id="litellm-dev"
 ```shell
 curl --location 'http://localhost:4000/key/generate' \
 --header 'Authorization: Bearer sk-1234' \
 --header 'Content-Type: application/json' \
 --data-raw '{"team_id": "litellm-dev"}'
 ```
 #### Use Key to call invalid model - Fails 
 ```shell
 curl --location 'http://0.0.0.0:4000/chat/completions' \
    --header 'Content-Type: application/json' \
    --header 'Authorization: Bearer sk-qo992IjKOC2CHKZGRoJIGA' \
    --data '{
        "model": "BEDROCK_GROUP",
        "messages": [
            {
                "role": "user",
                "content": "hi"
            }
        ]
    }'
 ```
 ```shell
 {"error":{"message":"Invalid model for team litellm-dev: BEDROCK_GROUP.  Valid models for team are: ['azure-gpt-3.5']\n\n\nTraceback (most recent call last):\n  File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/proxy_server.py\", line 2298, in chat_completion\n    _is_valid_team_configs(\n  File \"/Users/ishaanjaffer/Github/litellm/litellm/proxy/utils.py\", line 1296, in _is_valid_team_configs\n    raise Exception(\nException: Invalid model for team litellm-dev: BEDROCK_GROUP.  Valid models for team are: ['azure-gpt-3.5']\n\n","type":"None","param":"None","code":500}}%            
 ```         
 ### Set Budgets - Per Key
 Set `max_budget` in (USD $) param in the `key/generate` request. By default the `max_budget` is set to `null` and is not checked for keys
 ```shell
 curl 'http://0.0.0.0:4000/key/generate' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{
  "metadata": {"user": "ishaan@berri.ai"},
  "team_id": "core-infra",
  "max_budget": 10,
 }'
 ```
 #### Expected Behaviour
 - Costs Per key get auto-populated in `LiteLLM_VerificationToken` Table
 - After the key crosses it's `max_budget`, requests fail
 Example Request to `/chat/completions` when key has crossed budget
 ```shell
 curl --location 'http://0.0.0.0:4000/chat/completions' \
  --header 'Content-Type: application/json' \
  --header 'Authorization: Bearer sk-ULl_IKCVFy2EZRzQB16RUA' \
  --data ' {
  "model": "azure-gpt-3.5",
  "user": "e09b4da8-ed80-4b05-ac93-e16d9eb56fca",
  "messages": [
      {
      "role": "user",
      "content": "respond in 50 lines"
      }
  ],
 }'
 ```
 Expected Response from `/chat/completions` when key has crossed budget
 ```shell
 {
  "detail":"Authentication Error, ExceededTokenBudget: Current spend for token: 7.2e-05; Max Budget for Token: 2e-07"
 }   
 ```
 ### Set Budgets - Per User
 LiteLLM exposes a `/user/new` endpoint to create budgets for users, that persist across multiple keys. 
 This is documented in the swagger (live on your server root endpoint - e.g. `http://0.0.0.0:4000/`). Here's an example request. 
 ```shell 
 curl --location 'http://localhost:4000/user/new' \
 --header 'Authorization: Bearer <your-master-key>' \
 --header 'Content-Type: application/json' \
 --data-raw '{"models": ["azure-models"], "max_budget": 0, "user_id": "krrish3@berri.ai"}' 
 ```
 The request is a normal `/key/generate` request body + a `max_budget` field. 
 **Sample Response**
 ```shell
 {
    "key": "sk-YF2OxDbrgd1y2KgwxmEA2w",
    "expires": "2023-12-22T09:53:13.861000Z",
    "user_id": "krrish3@berri.ai",
    "max_budget": 0.0
 }
 ```
 ### Tracking Spend 
 You can get spend for a key by using the `/key/info` endpoint. 
 ```bash
 curl 'http://0.0.0.0:4000/key/info?key=<user-key>' \
     -X GET \
     -H 'Authorization: Bearer <your-master-key>'
 ```
 This is automatically updated (in USD) when calls are made to /completions, /chat/completions, /embeddings using litellm's completion_cost() function. [**See Code**](https://github.com/BerriAI/litellm/blob/1a6ea20a0bb66491968907c2bfaabb7fe45fc064/litellm/utils.py#L1654). 
 **Sample response**
 ```python
 {
    "key": "sk-tXL0wt5-lOOVK9sfY2UacA",
    "info": {
        "token": "sk-tXL0wt5-lOOVK9sfY2UacA",
        "spend": 0.0001065,
        "expires": "2023-11-24T23:19:11.131000Z",
        "models": [
            "gpt-3.5-turbo",
            "gpt-4",
            "claude-2"
        ],
        "aliases": {
            "mistral-7b": "gpt-3.5-turbo"
        },
        "config": {}
    }
 }
 ```
 ### Custom Auth 
 You can now override the default api key auth.
@ -738,3 +484,55 @@ litellm_settings:
 general_settings:
  custom_key_generate: custom_auth.custom_generate_key_fn
 ```
 ## Upperbound /key/generate params
 Use this, if you need to set default upperbounds for `max_budget`, `budget_duration` or any `key/generate` param per key. 
 Set `litellm_settings:upperbound_key_generate_params`:
 ```yaml
 litellm_settings:
  upperbound_key_generate_params:
    max_budget: 100 # upperbound of $100, for all /key/generate requests
    duration: "30d" # upperbound of 30 days for all /key/generate requests
 ```
 ** Expected Behavior **
 - Send a `/key/generate` request with `max_budget=200`
 - Key will be created with `max_budget=100` since 100 is the upper bound
 ## Default /key/generate params
 Use this, if you need to control the default `max_budget` or any `key/generate` param per key. 
 When a `/key/generate` request does not specify `max_budget`, it will use the `max_budget` specified in `default_key_generate_params`
 Set `litellm_settings:default_key_generate_params`:
 ```yaml
 litellm_settings:
  default_key_generate_params:
    max_budget: 1.5000
    models: ["azure-gpt-3.5"]
    duration:     # blank means `null`
    metadata: {"setting":"default"}
    team_id: "core-infra"
 ```
 ## Endpoints
 ### Keys 
 #### [**👉 API REFERENCE DOCS**](https://litellm-api.up.railway.app/#/key%20management/)
 ### Users
 #### [**👉 API REFERENCE DOCS**](https://litellm-api.up.railway.app/#/user%20management/)
 ### Teams
 #### [**👉 API REFERENCE DOCS**](https://litellm-api.up.railway.app/#/team%20management)
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@ -442,6 +442,8 @@ If a call fails after num_retries, fall back to another model group.
 If the error is a context window exceeded error, fall back to a larger model group (if given). 
 Fallbacks are done in-order - ["gpt-3.5-turbo, "gpt-4", "gpt-4-32k"], will do 'gpt-3.5-turbo' first, then 'gpt-4', etc.
 ```python
 from litellm import Router
@ -551,6 +553,156 @@ router = Router(model_list: Optional[list] = None,
 				 cache_responses=True)
 ```
 ## Pre-Call Checks (Context Window)
 Enable pre-call checks to filter out deployments with context window limit < messages for a call.
 <Tabs>
 <TabItem value="sdk" label="SDK">
 **1. Enable pre-call checks**
 ```python 
 from litellm import Router 
 # ...
 router = Router(model_list=model_list, enable_pre_call_checks=True) # 👈 Set to True
 ```
 **2. (Azure-only) Set base model**
 For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with `azure/`. 
 ```python
 model_list = [
            {
                "model_name": "gpt-3.5-turbo", # model group name
                "litellm_params": {  # params for litellm completion/embedding call
                    "model": "azure/chatgpt-v-2",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                },
 				"model_info": {
 					"base_model": "azure/gpt-35-turbo", # 👈 SET BASE MODEL
 				}
            },
            {
                "model_name": "gpt-3.5-turbo", # model group name
                "litellm_params": {  # params for litellm completion/embedding call
                    "model": "gpt-3.5-turbo-1106",
                    "api_key": os.getenv("OPENAI_API_KEY"),
                },
            },
        ]
 ```
 **3. Test it!**
 ```python
 """
 - Give a gpt-3.5-turbo model group with different context windows (4k vs. 16k)
 - Send a 5k prompt
 - Assert it works
 """
 from litellm import Router
 import os
 try:
 model_list = [
 	{
 		"model_name": "gpt-3.5-turbo",  # model group name
 		"litellm_params": {  # params for litellm completion/embedding call
 			"model": "azure/chatgpt-v-2",
 			"api_key": os.getenv("AZURE_API_KEY"),
 			"api_version": os.getenv("AZURE_API_VERSION"),
 			"api_base": os.getenv("AZURE_API_BASE"),
 		},
 		"model_info": {
 			"base_model": "azure/gpt-35-turbo", 
 		}
 	},
 	{
 		"model_name": "gpt-3.5-turbo",  # model group name
 		"litellm_params": {  # params for litellm completion/embedding call
 			"model": "gpt-3.5-turbo-1106",
 			"api_key": os.getenv("OPENAI_API_KEY"),
 		},
 	},
 ]
 router = Router(model_list=model_list, enable_pre_call_checks=True) 
 text = "What is the meaning of 42?" * 5000
 response = router.completion(
 	model="gpt-3.5-turbo",
 	messages=[
 		{"role": "system", "content": text},
 		{"role": "user", "content": "Who was Alexander?"},
 	],
 )
 print(f"response: {response}")
 ```
 </TabItem>
 <TabItem value="proxy" label="Proxy">
 **1. Setup config**
 For azure deployments, set the base model. Pick the base model from [this list](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json), all the azure models start with azure/.
 ```yaml
 router_settings:
 	enable_pre_call_checks: true # 1. Enable pre-call checks
 model_list:
 	- model_name: gpt-3.5-turbo
 	  litellm_params:
 		model: azure/chatgpt-v-2
 		api_base: os.environ/AZURE_API_BASE
 		api_key: os.environ/AZURE_API_KEY
 		api_version: "2023-07-01-preview"
 	  model_info:
 		base_model: azure/gpt-4-1106-preview # 2. 👈 (azure-only) SET BASE MODEL
 	- model_name: gpt-3.5-turbo
 	  litellm_params:
 		model: gpt-3.5-turbo-1106
 		api_key: os.environ/OPENAI_API_KEY
 ```
 **2. Start proxy**
 ```bash
 litellm --config /path/to/config.yaml
 # RUNNING on http://0.0.0.0:4000
 ```
 **3. Test it!**
 ```python
 import openai
 client = openai.OpenAI(
    api_key="anything",
    base_url="http://0.0.0.0:4000"
 )
 text = "What is the meaning of 42?" * 5000
 # request sent to model set on litellm proxy, `litellm --model`
 response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages = [
        {"role": "system", "content": text},
 		{"role": "user", "content": "Who was Alexander?"},
    ],
 )
 print(response)
 ```
 </TabItem>
 </Tabs>
 ## Caching across model groups
 If you want to cache across 2 different model groups (e.g. azure deployments, and openai), use caching groups. 
--- a/docs/my-website/docs/tutorials/ab_test_llms.md
+++ b/docs/my-website/docs/tutorials/ab_test_llms.md
@ -95,5 +95,4 @@ completion_with_split_tests(
 )
 ```
-### A/B Testing Dashboard after running code - https://admin.litellm.ai/
+
 <Image img={require('../../img/ab_test_logs.png')} />
--- a/docs/my-website/docs/tutorials/instructor.md
+++ b/docs/my-website/docs/tutorials/instructor.md
@ -0,0 +1,95 @@
 # Instructor - Function Calling
 Use LiteLLM Router with [jxnl's instructor library](https://github.com/jxnl/instructor) for function calling in prod. 
 ## Usage
 ```python
 import litellm
 from litellm import Router
 import instructor
 from pydantic import BaseModel
 litellm.set_verbose = True # 👈 print DEBUG LOGS
 client = instructor.patch(
    Router(
        model_list=[
            {
                "model_name": "gpt-3.5-turbo",  openai model name
                "litellm_params": {  # params for litellm completion/embedding call - e.g.: https://github.com/BerriAI/litellm/blob/62a591f90c99120e1a51a8445f5c3752586868ea/litellm/router.py#L111
                    "model": "azure/chatgpt-v-2",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                },
            }
        ]
    )
 )
 class UserDetail(BaseModel):
    name: str
    age: int
 user = client.chat.completions.create(
    model="gpt-3.5-turbo",
    response_model=UserDetail,
    messages=[
        {"role": "user", "content": "Extract Jason is 25 years old"},
    ],
 )
 assert isinstance(user, UserDetail)
 assert user.name == "Jason"
 assert user.age == 25
 print(f"user: {user}")
 ```
 ## Async Calls
 ```python
 import litellm
 from litellm import Router
 import instructor, asyncio
 from pydantic import BaseModel
 aclient = instructor.apatch(
    Router(
        model_list=[
            {
                "model_name": "gpt-3.5-turbo",
                "litellm_params": {
                    "model": "azure/chatgpt-v-2",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                },
            }
        ],
        default_litellm_params={"acompletion": True}, # 👈 IMPORTANT - tells litellm to route to async completion function.
    )
 )
 class UserExtract(BaseModel):
    name: str
    age: int
 async def main():
    model = await aclient.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=UserExtract,
        messages=[
            {"role": "user", "content": "Extract jason is 25 years old"},
        ],
    )
    print(f"model: {model}")
 asyncio.run(main())
 ```
--- a/docs/my-website/img/ab_test_code.png
+++ b/docs/my-website/img/ab_test_code.png
--- a/docs/my-website/img/ab_test_logs.png
+++ b/docs/my-website/img/ab_test_logs.png
--- a/docs/my-website/img/add_model.png
+++ b/docs/my-website/img/add_model.png
--- a/docs/my-website/img/admin_dashboard.png
+++ b/docs/my-website/img/admin_dashboard.png
--- a/docs/my-website/img/budget_alerts.png
+++ b/docs/my-website/img/budget_alerts.png
--- a/docs/my-website/img/create_key.png
+++ b/docs/my-website/img/create_key.png
--- a/docs/my-website/img/dashboard.png
+++ b/docs/my-website/img/dashboard.png
--- a/docs/my-website/img/dashboard_log_row.png
+++ b/docs/my-website/img/dashboard_log_row.png
--- a/docs/my-website/img/end_user_enforcement.png
+++ b/docs/my-website/img/end_user_enforcement.png
--- a/docs/my-website/img/lite_logs.png
+++ b/docs/my-website/img/lite_logs.png
--- a/docs/my-website/img/lite_logs2.png
+++ b/docs/my-website/img/lite_logs2.png
--- a/docs/my-website/img/spend_per_api_key.png
+++ b/docs/my-website/img/spend_per_api_key.png
--- a/docs/my-website/img/term_output.png
+++ b/docs/my-website/img/term_output.png
--- a/docs/my-website/img/test_alert.png
+++ b/docs/my-website/img/test_alert.png
--- a/docs/my-website/img/user_auth_screen.png
+++ b/docs/my-website/img/user_auth_screen.png
--- a/docs/my-website/img/user_create_key_screen.png
+++ b/docs/my-website/img/user_create_key_screen.png
--- a/docs/my-website/package-lock.json
+++ b/docs/my-website/package-lock.json
@ -5561,12 +5561,12 @@
      }
    },
    "node_modules/body-parser": {
-      "version": "1.20.1",
+      "version": "1.20.2",
-      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz",
-      "integrity": "sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw==",
+      "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==",
      "dependencies": {
        "bytes": "3.1.2",
-        "content-type": "~1.0.4",
+        "content-type": "~1.0.5",
        "debug": "2.6.9",
        "depd": "2.0.0",
        "destroy": "1.2.0",
@ -5574,7 +5574,7 @@
        "iconv-lite": "0.4.24",
        "on-finished": "2.4.1",
        "qs": "6.11.0",
-        "raw-body": "2.5.1",
+        "raw-body": "2.5.2",
        "type-is": "~1.6.18",
        "unpipe": "1.0.0"
      },
@ -6707,9 +6707,9 @@
      "integrity": "sha512-ASFBup0Mz1uyiIjANan1jzLQami9z1PoYSZCiiYW2FczPbenXc45FZdBZLzOT+r6+iciuEModtmCti+hjaAk0A=="
    },
    "node_modules/cookie": {
-      "version": "0.5.0",
+      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
-      "integrity": "sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==",
+      "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
      "engines": {
        "node": ">= 0.6"
      }
@ -10411,16 +10411,16 @@
      }
    },
    "node_modules/express": {
-      "version": "4.18.2",
+      "version": "4.19.2",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz",
-      "integrity": "sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==",
+      "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==",
      "dependencies": {
        "accepts": "~1.3.8",
        "array-flatten": "1.1.1",
-        "body-parser": "1.20.1",
+        "body-parser": "1.20.2",
        "content-disposition": "0.5.4",
        "content-type": "~1.0.4",
-        "cookie": "0.5.0",
+        "cookie": "0.6.0",
        "cookie-signature": "1.0.6",
        "debug": "2.6.9",
        "depd": "2.0.0",
@ -17016,9 +17016,9 @@
      }
    },
    "node_modules/raw-body": {
-      "version": "2.5.1",
+      "version": "2.5.2",
-      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz",
-      "integrity": "sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==",
+      "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==",
      "dependencies": {
        "bytes": "3.1.2",
        "http-errors": "2.0.0",
@ -21554,9 +21554,9 @@
      }
    },
    "node_modules/webpack-dev-middleware": {
-      "version": "5.3.3",
+      "version": "5.3.4",
-      "resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz",
+      "resolved": "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.4.tgz",
-      "integrity": "sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA==",
+      "integrity": "sha512-BVdTqhhs+0IfoeAf7EoH5WE+exCmqGerHfDM0IL096Px60Tq2Mn9MAbnaGUe6HiMa41KMCYF19gyzZmBcq/o4Q==",
      "dependencies": {
        "colorette": "^2.0.10",
        "memfs": "^3.4.3",
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -30,6 +30,7 @@ const sidebars = {
      items: [
        "proxy/quick_start",
        "proxy/deploy", 
        "proxy/prod", 
        "proxy/configs",
        {
          type: "link",
@ -42,7 +43,6 @@ const sidebars = {
        "proxy/users",
        "proxy/team_based_routing",
        "proxy/ui",
        "proxy/budget_alerts",
        "proxy/cost_tracking",
        "proxy/token_auth",
        {
@ -61,6 +61,7 @@ const sidebars = {
          label: "Logging, Alerting",
          items: ["proxy/logging", "proxy/alerting", "proxy/streaming_logging"],
        },
        "proxy/grafana_metrics",
        "proxy/call_hooks",
        "proxy/rules",
        "proxy/cli", 
@ -180,8 +181,9 @@ const sidebars = {
      type: "category",
      label: "Tutorials",
      items: [
-        "tutorials/azure_openai",
+        'tutorials/azure_openai',
-        "tutorials/oobabooga",
+        'tutorials/instructor',
        'tutorials/oobabooga',
        "tutorials/gradio_integration",
        "tutorials/huggingface_codellama",
        "tutorials/huggingface_tutorial",
--- a/docs/my-website/yarn.lock
+++ b/docs/my-website/yarn.lock
@ -3138,13 +3138,13 @@ bluebird@~3.4.1:
  resolved "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz"
  integrity sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==
-body-parser@1.20.1:
+body-parser@1.20.2:
-  version "1.20.1"
+  version "1.20.2"
-  resolved "https://registry.npmjs.org/body-parser/-/body-parser-1.20.1.tgz"
+  resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.2.tgz#6feb0e21c4724d06de7ff38da36dad4f57a747fd"
-  integrity sha512-jWi7abTbYwajOytWCQc37VulmWiRae5RyTpaCyDcS5/lMdtwSz5lOpDE67srw/HYe35f1z3fDQw+3txg7gNtWw==
+  integrity sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==
  dependencies:
    bytes "3.1.2"
-    content-type "~1.0.4"
+    content-type "~1.0.5"
    debug "2.6.9"
    depd "2.0.0"
    destroy "1.2.0"
@ -3152,7 +3152,7 @@ body-parser@1.20.1:
    iconv-lite "0.4.24"
    on-finished "2.4.1"
    qs "6.11.0"
-    raw-body "2.5.1"
+    raw-body "2.5.2"
    type-is "~1.6.18"
    unpipe "1.0.0"
@ -3921,7 +3921,7 @@ content-disposition@0.5.4:
  dependencies:
    safe-buffer "5.2.1"
-content-type@~1.0.4:
+content-type@~1.0.4, content-type@~1.0.5:
  version "1.0.5"
  resolved "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz"
  integrity sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==
@ -3941,10 +3941,10 @@ cookie-signature@1.0.6:
  resolved "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz"
  integrity sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==
-cookie@0.5.0:
+cookie@0.6.0:
-  version "0.5.0"
+  version "0.6.0"
-  resolved "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz"
+  resolved "https://registry.yarnpkg.com/cookie/-/cookie-0.6.0.tgz#2798b04b071b0ecbff0dbb62a505a8efa4e19051"
-  integrity sha512-YZ3GUyn/o8gfKJlnlX7g7xq4gyO6OSuhGPKaaGssGB2qgDUS0gPgtTvoyZLTt9Ab6dC4hfc9dV5arkvc/OCmrw==
+  integrity sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==
 copy-descriptor@^0.1.0:
  version "0.1.1"
@ -5325,16 +5325,16 @@ expand-template@^2.0.3:
  integrity sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==
 express@^4.17.1, express@^4.17.3:
-  version "4.18.2"
+  version "4.19.2"
-  resolved "https://registry.npmjs.org/express/-/express-4.18.2.tgz"
+  resolved "https://registry.yarnpkg.com/express/-/express-4.19.2.tgz#e25437827a3aa7f2a827bc8171bbbb664a356465"
-  integrity sha512-5/PsL6iGPdfQ/lKM1UuielYgv3BUoJfz1aUwU9vHZ+J7gyvwdQXFEBIEIaxeGf0GIcreATNyBExtalisDbuMqQ==
+  integrity sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==
  dependencies:
    accepts "~1.3.8"
    array-flatten "1.1.1"
-    body-parser "1.20.1"
+    body-parser "1.20.2"
    content-disposition "0.5.4"
    content-type "~1.0.4"
-    cookie "0.5.0"
+    cookie "0.6.0"
    cookie-signature "1.0.6"
    debug "2.6.9"
    depd "2.0.0"
@ -9924,10 +9924,10 @@ range-parser@^1.2.1, range-parser@~1.2.1:
  resolved "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz"
  integrity sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==
-raw-body@2.5.1:
+raw-body@2.5.2:
-  version "2.5.1"
+  version "2.5.2"
-  resolved "https://registry.npmjs.org/raw-body/-/raw-body-2.5.1.tgz"
+  resolved "https://registry.yarnpkg.com/raw-body/-/raw-body-2.5.2.tgz#99febd83b90e08975087e8f1f9419a149366b68a"
-  integrity sha512-qqJBtEyVgS0ZmPGdCFPWJ3FreoqvG4MVQln/kCgF7Olq95IbOp0/BWyMwbdtn4VTvkM8Y7khCQ2Xgk/tcrCXig==
+  integrity sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==
  dependencies:
    bytes "3.1.2"
    http-errors "2.0.0"
@ -12406,9 +12406,9 @@ webpack-bundle-analyzer@^4.5.0:
    ws "^7.3.1"
 webpack-dev-middleware@^5.3.1:
-  version "5.3.3"
+  version "5.3.4"
-  resolved "https://registry.npmjs.org/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz"
+  resolved "https://registry.yarnpkg.com/webpack-dev-middleware/-/webpack-dev-middleware-5.3.4.tgz#eb7b39281cbce10e104eb2b8bf2b63fce49a3517"
-  integrity sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA==
+  integrity sha512-BVdTqhhs+0IfoeAf7EoH5WE+exCmqGerHfDM0IL096Px60Tq2Mn9MAbnaGUe6HiMa41KMCYF19gyzZmBcq/o4Q==
  dependencies:
    colorette "^2.0.10"
    memfs "^3.4.3"
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@ -96,6 +96,8 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger):
    async def async_moderation_hook(
        self,
        data: dict,
        user_api_key_dict: UserAPIKeyAuth,
        call_type: Literal["completion", "embeddings", "image_generation"],
    ):
        """
        - Calls Google's Text Moderation API
--- a/enterprise/enterprise_hooks/llama_guard.py
+++ b/enterprise/enterprise_hooks/llama_guard.py
@ -99,6 +99,8 @@ class _ENTERPRISE_LlamaGuard(CustomLogger):
    async def async_moderation_hook(
        self,
        data: dict,
        user_api_key_dict: UserAPIKeyAuth,
        call_type: Literal["completion", "embeddings", "image_generation"],
    ):
        """
        - Calls the Llama Guard Endpoint
--- a/enterprise/enterprise_hooks/llm_guard.py
+++ b/enterprise/enterprise_hooks/llm_guard.py
@ -22,6 +22,7 @@ from litellm.utils import (
 )
 from datetime import datetime
 import aiohttp, asyncio
 from litellm.utils import get_formatted_prompt
 litellm.set_verbose = True
@ -29,9 +30,12 @@ litellm.set_verbose = True
 class _ENTERPRISE_LLMGuard(CustomLogger):
    # Class variables or attributes
    def __init__(
-        self, mock_testing: bool = False, mock_redacted_text: Optional[dict] = None
+        self,
        mock_testing: bool = False,
        mock_redacted_text: Optional[dict] = None,
    ):
        self.mock_redacted_text = mock_redacted_text
        self.llm_guard_mode = litellm.llm_guard_mode
        if mock_testing == True:  # for testing purposes only
            return
        self.llm_guard_api_base = litellm.get_secret("LLM_GUARD_API_BASE", None)
@ -59,7 +63,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
                else:
                    # Make the first request to /analyze
                    analyze_url = f"{self.llm_guard_api_base}analyze/prompt"
-                    verbose_proxy_logger.debug(f"Making request to: {analyze_url}")
+                    verbose_proxy_logger.debug("Making request to: %s", analyze_url)
                    analyze_payload = {"prompt": text}
                    redacted_text = None
                    async with session.post(
@ -72,7 +76,7 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
                if redacted_text is not None:
                    if (
                        redacted_text.get("is_valid", None) is not None
-                        and redacted_text["is_valid"] == "True"
+                        and redacted_text["is_valid"] != True
                    ):
                        raise HTTPException(
                            status_code=400,
@ -91,9 +95,26 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
            traceback.print_exc()
            raise e
    def should_proceed(self, user_api_key_dict: UserAPIKeyAuth) -> bool:
        if self.llm_guard_mode == "key-specific":
            # check if llm guard enabled for specific keys only
            self.print_verbose(
                f"user_api_key_dict.permissions: {user_api_key_dict.permissions}"
            )
            if (
                user_api_key_dict.permissions.get("enable_llm_guard_check", False)
                == True
            ):
                return True
        elif self.llm_guard_mode == "all":
            return True
        return False
    async def async_moderation_hook(
        self,
        data: dict,
        user_api_key_dict: UserAPIKeyAuth,
        call_type: Literal["completion", "embeddings", "image_generation"],
    ):
        """
        - Calls the LLM Guard Endpoint
@ -101,8 +122,33 @@ class _ENTERPRISE_LLMGuard(CustomLogger):
        - Use the sanitized prompt returned
            - LLM Guard can handle things like PII Masking, etc.
        """
        self.print_verbose(
            f"Inside LLM Guard Pre-Call Hook - llm_guard_mode={self.llm_guard_mode}"
        )
        _proceed = self.should_proceed(user_api_key_dict=user_api_key_dict)
        if _proceed == False:
            return
        self.print_verbose("Makes LLM Guard Check")
        try:
            assert call_type in [
                "completion",
                "embeddings",
                "image_generation",
                "moderation",
                "audio_transcription",
            ]
        except Exception as e:
            self.print_verbose(
                f"Call Type - {call_type}, not in accepted list - ['completion','embeddings','image_generation','moderation','audio_transcription']"
            )
            return data
        formatted_prompt = get_formatted_prompt(data=data, call_type=call_type)  # type: ignore
        self.print_verbose(f"LLM Guard, formatted_prompt: {formatted_prompt}")
        return await self.moderation_check(text=formatted_prompt)
    async def async_post_call_streaming_hook(
        self, user_api_key_dict: UserAPIKeyAuth, response: str
    ):
--- a/litellm-js/proxy/README.md
+++ b/litellm-js/proxy/README.md
@ -0,0 +1,8 @@
 ```
 npm install
 npm run dev
 ```
 ```
 npm run deploy
 ```
--- a/litellm-js/proxy/package.json
+++ b/litellm-js/proxy/package.json
@ -0,0 +1,14 @@
 {
  "scripts": {
    "dev": "wrangler dev src/index.ts",
    "deploy": "wrangler deploy --minify src/index.ts"
  },
  "dependencies": {
    "hono": "^4.1.4",
    "openai": "^4.29.2"
  },
  "devDependencies": {
    "@cloudflare/workers-types": "^4.20240208.0",
    "wrangler": "^3.32.0"
  }
 }
--- a/litellm-js/proxy/src/index.ts
+++ b/litellm-js/proxy/src/index.ts
@ -0,0 +1,59 @@
 import { Hono } from 'hono'
 import { Context } from 'hono';
 import { bearerAuth } from 'hono/bearer-auth'
 import OpenAI from "openai";
 const openai = new OpenAI({
  apiKey: "sk-1234",
  baseURL: "https://openai-endpoint.ishaanjaffer0324.workers.dev"
 });
 async function call_proxy() {
  const completion = await openai.chat.completions.create({
    messages: [{ role: "system", content: "You are a helpful assistant." }],
    model: "gpt-3.5-turbo",
  });
  return completion
 }
 const app = new Hono()
 // Middleware for API Key Authentication
 const apiKeyAuth = async (c: Context, next: Function) => {
  const apiKey = c.req.header('Authorization');
  if (!apiKey || apiKey !== 'Bearer sk-1234') {
    return c.text('Unauthorized', 401);
  }
  await next();
 };
 app.use('/*', apiKeyAuth)
 app.get('/', (c) => {
  return c.text('Hello Hono!')
 })
 // Handler for chat completions
 const chatCompletionHandler = async (c: Context) => {
  // Assuming your logic for handling chat completion goes here
  // For demonstration, just returning a simple JSON response
  const response = await call_proxy()
  return c.json(response);
 };
 // Register the above handler for different POST routes with the apiKeyAuth middleware
 app.post('/v1/chat/completions', chatCompletionHandler);
 app.post('/chat/completions', chatCompletionHandler);
 // Example showing how you might handle dynamic segments within the URL
 // Here, using ':model*' to capture the rest of the path as a parameter 'model'
 app.post('/openai/deployments/:model*/chat/completions', chatCompletionHandler);
 export default app
--- a/litellm-js/proxy/tsconfig.json
+++ b/litellm-js/proxy/tsconfig.json
@ -0,0 +1,16 @@
 {
  "compilerOptions": {
    "target": "ESNext",
    "module": "ESNext",
    "moduleResolution": "Bundler",
    "strict": true,
    "lib": [
      "ESNext"
    ],
    "types": [
      "@cloudflare/workers-types"
    ],
    "jsx": "react-jsx",
    "jsxImportSource": "hono/jsx"
  },
 }
--- a/litellm-js/proxy/wrangler.toml
+++ b/litellm-js/proxy/wrangler.toml
@ -0,0 +1,18 @@
 name = "my-app"
 compatibility_date = "2023-12-01"
 # [vars]
 # MY_VAR = "my-variable"
 # [[kv_namespaces]]
 # binding = "MY_KV_NAMESPACE"
 # id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
 # [[r2_buckets]]
 # binding = "MY_BUCKET"
 # bucket_name = "my-bucket"
 # [[d1_databases]]
 # binding = "DB"
 # database_name = "my-database"
 # database_id = ""
--- a/litellm-js/spend-logs/Dockerfile
+++ b/litellm-js/spend-logs/Dockerfile
@ -0,0 +1,26 @@
 # Use the specific Node.js v20.11.0 image
 FROM node:20.11.0
 # Set the working directory inside the container
 WORKDIR /app
 # Copy package.json and package-lock.json to the working directory
 COPY ./litellm-js/spend-logs/package*.json ./
 # Install dependencies
 RUN npm install
 # Install Prisma globally
 RUN npm install -g prisma
 # Copy the rest of the application code
 COPY ./litellm-js/spend-logs .
 # Generate Prisma client
 RUN npx prisma generate
 # Expose the port that the Node.js server will run on
 EXPOSE 3000
 # Command to run the Node.js app with npm run dev
 CMD ["npm", "run", "dev"]
--- a/litellm-js/spend-logs/README.md
+++ b/litellm-js/spend-logs/README.md
@ -0,0 +1,8 @@
 ```
 npm install
 npm run dev
 ```
 ```
 open http://localhost:3000
 ```
--- a/litellm-js/spend-logs/package-lock.json
+++ b/litellm-js/spend-logs/package-lock.json
@ -0,0 +1,508 @@
 {
  "name": "spend-logs",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "dependencies": {
        "@hono/node-server": "^1.9.0",
        "hono": "^4.1.5"
      },
      "devDependencies": {
        "@types/node": "^20.11.17",
        "tsx": "^4.7.1"
      }
    },
    "node_modules/@esbuild/aix-ppc64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz",
      "integrity": "sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA==",
      "cpu": [
        "ppc64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "aix"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/android-arm": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.19.12.tgz",
      "integrity": "sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w==",
      "cpu": [
        "arm"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "android"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/android-arm64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz",
      "integrity": "sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA==",
      "cpu": [
        "arm64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "android"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/android-x64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.19.12.tgz",
      "integrity": "sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew==",
      "cpu": [
        "x64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "android"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/darwin-arm64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz",
      "integrity": "sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g==",
      "cpu": [
        "arm64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "darwin"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/darwin-x64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz",
      "integrity": "sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A==",
      "cpu": [
        "x64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "darwin"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/freebsd-arm64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz",
      "integrity": "sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA==",
      "cpu": [
        "arm64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "freebsd"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/freebsd-x64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz",
      "integrity": "sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg==",
      "cpu": [
        "x64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "freebsd"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-arm": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz",
      "integrity": "sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w==",
      "cpu": [
        "arm"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-arm64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz",
      "integrity": "sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA==",
      "cpu": [
        "arm64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-ia32": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz",
      "integrity": "sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA==",
      "cpu": [
        "ia32"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-loong64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz",
      "integrity": "sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA==",
      "cpu": [
        "loong64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-mips64el": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz",
      "integrity": "sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w==",
      "cpu": [
        "mips64el"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-ppc64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz",
      "integrity": "sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg==",
      "cpu": [
        "ppc64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-riscv64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz",
      "integrity": "sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg==",
      "cpu": [
        "riscv64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-s390x": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz",
      "integrity": "sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg==",
      "cpu": [
        "s390x"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/linux-x64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz",
      "integrity": "sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg==",
      "cpu": [
        "x64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "linux"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/netbsd-x64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz",
      "integrity": "sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA==",
      "cpu": [
        "x64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "netbsd"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/openbsd-x64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz",
      "integrity": "sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw==",
      "cpu": [
        "x64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "openbsd"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/sunos-x64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz",
      "integrity": "sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA==",
      "cpu": [
        "x64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "sunos"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/win32-arm64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz",
      "integrity": "sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A==",
      "cpu": [
        "arm64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "win32"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/win32-ia32": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz",
      "integrity": "sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ==",
      "cpu": [
        "ia32"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "win32"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@esbuild/win32-x64": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz",
      "integrity": "sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA==",
      "cpu": [
        "x64"
      ],
      "dev": true,
      "optional": true,
      "os": [
        "win32"
      ],
      "engines": {
        "node": ">=12"
      }
    },
    "node_modules/@hono/node-server": {
      "version": "1.9.0",
      "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.9.0.tgz",
      "integrity": "sha512-oJjk7WXBlENeHhWiMqSyxPIZ3Kmf5ZYxqdlcSIXyN8Rn50bNJsPl99G4POBS03Jxh56FdfRJ0SEnC8mAVIiavQ==",
      "engines": {
        "node": ">=18.14.1"
      }
    },
    "node_modules/@types/node": {
      "version": "20.11.30",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.11.30.tgz",
      "integrity": "sha512-dHM6ZxwlmuZaRmUPfv1p+KrdD1Dci04FbdEm/9wEMouFqxYoFl5aMkt0VMAUtYRQDyYvD41WJLukhq/ha3YuTw==",
      "dev": true,
      "dependencies": {
        "undici-types": "~5.26.4"
      }
    },
    "node_modules/esbuild": {
      "version": "0.19.12",
      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.19.12.tgz",
      "integrity": "sha512-aARqgq8roFBj054KvQr5f1sFu0D65G+miZRCuJyJ0G13Zwx7vRar5Zhn2tkQNzIXcBrNVsv/8stehpj+GAjgbg==",
      "dev": true,
      "hasInstallScript": true,
      "bin": {
        "esbuild": "bin/esbuild"
      },
      "engines": {
        "node": ">=12"
      },
      "optionalDependencies": {
        "@esbuild/aix-ppc64": "0.19.12",
        "@esbuild/android-arm": "0.19.12",
        "@esbuild/android-arm64": "0.19.12",
        "@esbuild/android-x64": "0.19.12",
        "@esbuild/darwin-arm64": "0.19.12",
        "@esbuild/darwin-x64": "0.19.12",
        "@esbuild/freebsd-arm64": "0.19.12",
        "@esbuild/freebsd-x64": "0.19.12",
        "@esbuild/linux-arm": "0.19.12",
        "@esbuild/linux-arm64": "0.19.12",
        "@esbuild/linux-ia32": "0.19.12",
        "@esbuild/linux-loong64": "0.19.12",
        "@esbuild/linux-mips64el": "0.19.12",
        "@esbuild/linux-ppc64": "0.19.12",
        "@esbuild/linux-riscv64": "0.19.12",
        "@esbuild/linux-s390x": "0.19.12",
        "@esbuild/linux-x64": "0.19.12",
        "@esbuild/netbsd-x64": "0.19.12",
        "@esbuild/openbsd-x64": "0.19.12",
        "@esbuild/sunos-x64": "0.19.12",
        "@esbuild/win32-arm64": "0.19.12",
        "@esbuild/win32-ia32": "0.19.12",
        "@esbuild/win32-x64": "0.19.12"
      }
    },
    "node_modules/fsevents": {
      "version": "2.3.3",
      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
      "dev": true,
      "hasInstallScript": true,
      "optional": true,
      "os": [
        "darwin"
      ],
      "engines": {
        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
      }
    },
    "node_modules/get-tsconfig": {
      "version": "4.7.3",
      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.3.tgz",
      "integrity": "sha512-ZvkrzoUA0PQZM6fy6+/Hce561s+faD1rsNwhnO5FelNjyy7EMGJ3Rz1AQ8GYDWjhRs/7dBLOEJvhK8MiEJOAFg==",
      "dev": true,
      "dependencies": {
        "resolve-pkg-maps": "^1.0.0"
      },
      "funding": {
        "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
      }
    },
    "node_modules/hono": {
      "version": "4.1.5",
      "resolved": "https://registry.npmjs.org/hono/-/hono-4.1.5.tgz",
      "integrity": "sha512-3ChJiIoeCxvkt6vnkxJagplrt1YZg3NyNob7ssVeK2PUqEINp4q1F94HzFnvY9QE8asVmbW5kkTDlyWylfg2vg==",
      "engines": {
        "node": ">=16.0.0"
      }
    },
    "node_modules/resolve-pkg-maps": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
      "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
      "dev": true,
      "funding": {
        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
      }
    },
    "node_modules/tsx": {
      "version": "4.7.1",
      "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.7.1.tgz",
      "integrity": "sha512-8d6VuibXHtlN5E3zFkgY8u4DX7Y3Z27zvvPKVmLon/D4AjuKzarkUBTLDBgj9iTQ0hg5xM7c/mYiRVM+HETf0g==",
      "dev": true,
      "dependencies": {
        "esbuild": "~0.19.10",
        "get-tsconfig": "^4.7.2"
      },
      "bin": {
        "tsx": "dist/cli.mjs"
      },
      "engines": {
        "node": ">=18.0.0"
      },
      "optionalDependencies": {
        "fsevents": "~2.3.3"
      }
    },
    "node_modules/undici-types": {
      "version": "5.26.5",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
      "dev": true
    }
  }
 }
--- a/litellm-js/spend-logs/package.json
+++ b/litellm-js/spend-logs/package.json
@ -0,0 +1,13 @@
 {
  "scripts": {
    "dev": "tsx watch src/index.ts"
  },
  "dependencies": {
    "@hono/node-server": "^1.9.0",
    "hono": "^4.1.5"
  },
  "devDependencies": {
    "@types/node": "^20.11.17",
    "tsx": "^4.7.1"
  }
 }
--- a/litellm-js/spend-logs/schema.prisma
+++ b/litellm-js/spend-logs/schema.prisma
@ -0,0 +1,29 @@
 generator client {
  provider        = "prisma-client-js"
 }
 datasource client {
  provider = "postgresql"
  url      = env("DATABASE_URL")
 }
 model LiteLLM_SpendLogs {
  request_id        String   @id
  call_type         String
  api_key           String   @default("")
  spend             Float    @default(0.0)
  total_tokens      Int      @default(0)
  prompt_tokens     Int      @default(0)
  completion_tokens Int      @default(0)
  startTime         DateTime
  endTime           DateTime
  model             String   @default("")
  api_base          String   @default("")
  user              String   @default("")
  metadata          Json     @default("{}")
  cache_hit         String   @default("")
  cache_key         String   @default("")
  request_tags      Json     @default("[]")
  team_id           String?
  end_user          String?
 }
--- a/litellm-js/spend-logs/src/_types.ts
+++ b/litellm-js/spend-logs/src/_types.ts
@ -0,0 +1,32 @@
 export type LiteLLM_IncrementSpend = {
    key_transactions: Array<LiteLLM_IncrementObject>, // [{"key": spend},..]
    user_transactions: Array<LiteLLM_IncrementObject>, 
    team_transactions: Array<LiteLLM_IncrementObject>,
    spend_logs_transactions: Array<LiteLLM_SpendLogs>
 }
 export type LiteLLM_IncrementObject = {
    key: string,
    spend: number
 }
 export type LiteLLM_SpendLogs = {
    request_id: string; // @id means it's a unique identifier
    call_type: string;
    api_key: string; // @default("") means it defaults to an empty string if not provided
    spend: number; // Float in Prisma corresponds to number in TypeScript
    total_tokens: number; // Int in Prisma corresponds to number in TypeScript
    prompt_tokens: number;
    completion_tokens: number;
    startTime: Date; // DateTime in Prisma corresponds to Date in TypeScript
    endTime: Date;
    model: string; // @default("") means it defaults to an empty string if not provided
    api_base: string;
    user: string;
    metadata: any; // Json type in Prisma is represented by any in TypeScript; could also use a more specific type if the structure of JSON is known
    cache_hit: string;
    cache_key: string;
    request_tags: any; // Similarly, this could be an array or a more specific type depending on the expected structure
    team_id?: string | null; // ? indicates it's optional and can be undefined, but could also be null if not provided
    end_user?: string | null;
 };
--- a/litellm-js/spend-logs/src/index.ts
+++ b/litellm-js/spend-logs/src/index.ts
@ -0,0 +1,84 @@
 import { serve } from '@hono/node-server'
 import { Hono } from 'hono'
 import { PrismaClient } from '@prisma/client'
 import {LiteLLM_SpendLogs, LiteLLM_IncrementSpend, LiteLLM_IncrementObject} from './_types'
 const app = new Hono()
 const prisma = new PrismaClient()
 // In-memory storage for logs
 let spend_logs: LiteLLM_SpendLogs[] = [];
 const key_logs: LiteLLM_IncrementObject[] = [];
 const user_logs: LiteLLM_IncrementObject[] = [];
 const transaction_logs: LiteLLM_IncrementObject[] = [];
 app.get('/', (c) => {
  return c.text('Hello Hono!')
 })
 const MIN_LOGS = 1; // Minimum number of logs needed to initiate a flush
 const FLUSH_INTERVAL = 5000; // Time in ms to wait before trying to flush again
 const BATCH_SIZE = 100; // Preferred size of each batch to write to the database
 const MAX_LOGS_PER_INTERVAL = 1000; // Maximum number of logs to flush in a single interval
 const flushLogsToDb = async () => {
  if (spend_logs.length >= MIN_LOGS) {
    // Limit the logs to process in this interval to MAX_LOGS_PER_INTERVAL or less
    const logsToProcess = spend_logs.slice(0, MAX_LOGS_PER_INTERVAL);
    for (let i = 0; i < logsToProcess.length; i += BATCH_SIZE) {
      // Create subarray for current batch, ensuring it doesn't exceed the BATCH_SIZE
      const batch = logsToProcess.slice(i, i + BATCH_SIZE);
      // Convert datetime strings to Date objects
      const batchWithDates = batch.map(entry => ({
        ...entry,
        startTime: new Date(entry.startTime),
        endTime: new Date(entry.endTime),
        // Repeat for any other DateTime fields you may have
      }));
      await prisma.liteLLM_SpendLogs.createMany({
        data: batchWithDates,
      });
      console.log(`Flushed ${batch.length} logs to the DB.`);
    }
    // Remove the processed logs from spend_logs
    spend_logs = spend_logs.slice(logsToProcess.length);
    console.log(`${logsToProcess.length} logs processed. Remaining in queue: ${spend_logs.length}`);
  } else {
    // This will ensure it doesn't falsely claim "No logs to flush." when it's merely below the MIN_LOGS threshold.
    if(spend_logs.length > 0) {
      console.log(`Accumulating logs. Currently at ${spend_logs.length}, waiting for at least ${MIN_LOGS}.`);
    } else {
      console.log("No logs to flush.");
    }
  }
 };
 // Setup interval for attempting to flush the logs
 setInterval(flushLogsToDb, FLUSH_INTERVAL);
 // Route to receive log messages
 app.post('/spend/update', async (c) => {
  const incomingLogs = await c.req.json<LiteLLM_SpendLogs[]>();
  spend_logs.push(...incomingLogs);
  console.log(`Received and stored ${incomingLogs.length} logs. Total logs in memory: ${spend_logs.length}`);
  return c.json({ message: `Successfully stored ${incomingLogs.length} logs` });
 });
 const port = 3000
 console.log(`Server is running on port ${port}`)
 serve({
  fetch: app.fetch,
  port
 })
--- a/litellm-js/spend-logs/tsconfig.json
+++ b/litellm-js/spend-logs/tsconfig.json
@ -0,0 +1,13 @@
 {
  "compilerOptions": {
    "target": "ESNext",
    "module": "ESNext",
    "moduleResolution": "Bundler",
    "strict": true,
    "types": [
      "node"
    ],
    "jsx": "react-jsx",
    "jsxImportSource": "hono/jsx",
  }
 }
--- a/litellm/init.py
+++ b/litellm/init.py
@ -1,6 +1,6 @@
 ### INIT VARIABLES ###
 import threading, requests, os
-from typing import Callable, List, Optional, Dict, Union, Any
+from typing import Callable, List, Optional, Dict, Union, Any, Literal
 from litellm.caching import Cache
 from litellm._logging import set_verbose, _turn_on_debug, verbose_logger
 from litellm.proxy._types import KeyManagementSystem, KeyManagementSettings
@ -56,6 +56,7 @@ baseten_key: Optional[str] = None
 aleph_alpha_key: Optional[str] = None
 nlp_cloud_key: Optional[str] = None
 use_client: bool = False
 disable_streaming_logging: bool = False
 ### GUARDRAILS ###
 llamaguard_model_name: Optional[str] = None
 presidio_ad_hoc_recognizers: Optional[str] = None
@ -63,6 +64,7 @@ google_moderation_confidence_threshold: Optional[float] = None
 llamaguard_unsafe_content_categories: Optional[str] = None
 blocked_user_list: Optional[Union[str, List]] = None
 banned_keywords_list: Optional[Union[str, List]] = None
 llm_guard_mode: Literal["all", "key-specific"] = "all"
 ##################
 logging: bool = True
 caching: bool = (
@ -172,6 +174,7 @@ upperbound_key_generate_params: Optional[Dict] = None
 default_user_params: Optional[Dict] = None
 default_team_settings: Optional[List] = None
 max_user_budget: Optional[float] = None
 max_end_user_budget: Optional[float] = None
 #### RELIABILITY ####
 request_timeout: Optional[float] = 6000
 num_retries: Optional[int] = None  # per model endpoint
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -38,6 +38,9 @@ class BaseCache:
    async def async_get_cache(self, key, **kwargs):
        raise NotImplementedError
    async def batch_cache_write(self, result, *args, **kwargs):
        raise NotImplementedError
    async def disconnect(self):
        raise NotImplementedError
@ -96,7 +99,9 @@ class InMemoryCache(BaseCache):
 class RedisCache(BaseCache):
    # if users don't provider one, use the default litellm cache
-    def __init__(self, host=None, port=None, password=None, **kwargs):
+    def __init__(
        self, host=None, port=None, password=None, redis_flush_size=100, **kwargs
    ):
        from ._redis import get_redis_client, get_redis_connection_pool
        redis_kwargs = {}
@ -111,6 +116,10 @@ class RedisCache(BaseCache):
        self.redis_client = get_redis_client(**redis_kwargs)
        self.redis_kwargs = redis_kwargs
        self.async_redis_conn_pool = get_redis_connection_pool(**redis_kwargs)
        # for high traffic, we store the redis results in memory and then batch write to redis
        self.redis_batch_writing_buffer = []
        self.redis_flush_size = redis_flush_size
        self.redis_version = "Unknown"
        try:
            self.redis_version = self.redis_client.info()["redis_version"]
@ -161,8 +170,10 @@ class RedisCache(BaseCache):
                )
            except Exception as e:
                # NON blocking - notify users Redis is throwing an exception
-                print_verbose(
+                verbose_logger.error(
-                    f"LiteLLM Redis Caching: async set() - Got exception from REDIS : {str(e)}"
+                    "LiteLLM Redis Caching: async set() - Got exception from REDIS %s, Writing value=%s",
                    str(e),
                    value,
                )
                traceback.print_exc()
@ -191,7 +202,27 @@ class RedisCache(BaseCache):
            # Optionally, you could process 'results' to make sure that all set operations were successful.
            return results
        except Exception as e:
-            print_verbose(f"Error occurred in pipeline write - {str(e)}")
+            verbose_logger.error(
                "LiteLLM Redis Caching: async set_cache_pipeline() - Got exception from REDIS %s, Writing value=%s",
                str(e),
                cache_value,
            )
            traceback.print_exc()
    async def batch_cache_write(self, key, value, **kwargs):
        print_verbose(
            f"in batch cache writing for redis buffer size={len(self.redis_batch_writing_buffer)}",
        )
        self.redis_batch_writing_buffer.append((key, value))
        if len(self.redis_batch_writing_buffer) >= self.redis_flush_size:
            await self.flush_cache_buffer()
    async def flush_cache_buffer(self):
        print_verbose(
            f"flushing to redis....reached size of buffer {len(self.redis_batch_writing_buffer)}"
        )
        await self.async_set_cache_pipeline(self.redis_batch_writing_buffer)
        self.redis_batch_writing_buffer = []
    def _get_cache_logic(self, cached_response: Any):
        """
@ -287,6 +318,9 @@ class RedisCache(BaseCache):
    def flush_cache(self):
        self.redis_client.flushall()
    def flushall(self):
        self.redis_client.flushall()
    async def disconnect(self):
        await self.async_redis_conn_pool.disconnect(inuse_connections=True)
@ -874,6 +908,7 @@ class Cache:
        port: Optional[str] = None,
        password: Optional[str] = None,
        namespace: Optional[str] = None,
        ttl: Optional[float] = None,
        similarity_threshold: Optional[float] = None,
        supported_call_types: Optional[
            List[
@ -908,6 +943,7 @@ class Cache:
        s3_path: Optional[str] = None,
        redis_semantic_cache_use_async=False,
        redis_semantic_cache_embedding_model="text-embedding-ada-002",
        redis_flush_size=None,
        **kwargs,
    ):
        """
@ -930,7 +966,9 @@ class Cache:
            None. Cache is set as a litellm param
        """
        if type == "redis":
-            self.cache: BaseCache = RedisCache(host, port, password, **kwargs)
+            self.cache: BaseCache = RedisCache(
                host, port, password, redis_flush_size, **kwargs
            )
        elif type == "redis-semantic":
            self.cache = RedisSemanticCache(
                host,
@ -967,6 +1005,8 @@ class Cache:
        self.supported_call_types = supported_call_types  # default to ["completion", "acompletion", "embedding", "aembedding"]
        self.type = type
        self.namespace = namespace
        self.redis_flush_size = redis_flush_size
        self.ttl = ttl
    def get_cache_key(self, *args, **kwargs):
        """
@ -1206,6 +1246,9 @@ class Cache:
                if isinstance(result, OpenAIObject):
                    result = result.model_dump_json()
                ## DEFAULT TTL ##
                if self.ttl is not None:
                    kwargs["ttl"] = self.ttl
                ## Get Cache-Controls ##
                if kwargs.get("cache", None) is not None and isinstance(
                    kwargs.get("cache"), dict
@ -1213,6 +1256,7 @@ class Cache:
                    for k, v in kwargs.get("cache").items():
                        if k == "ttl":
                            kwargs["ttl"] = v
                cached_data = {"timestamp": time.time(), "response": result}
                return cache_key, cached_data, kwargs
            else:
@ -1246,6 +1290,10 @@ class Cache:
        Async implementation of add_cache
        """
        try:
            if self.type == "redis" and self.redis_flush_size is not None:
                # high traffic - fill in results in memory and then flush
                await self.batch_cache_write(result, *args, **kwargs)
            else:
                cache_key, cached_data, kwargs = self._add_cache_logic(
                    result=result, *args, **kwargs
                )
@ -1287,6 +1335,12 @@ class Cache:
            print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
            traceback.print_exc()
    async def batch_cache_write(self, result, *args, **kwargs):
        cache_key, cached_data, kwargs = self._add_cache_logic(
            result=result, *args, **kwargs
        )
        await self.cache.batch_cache_write(cache_key, cached_data, **kwargs)
    async def ping(self):
        if hasattr(self.cache, "ping"):
            return await self.cache.ping()
--- a/litellm/integrations/athina.py
+++ b/litellm/integrations/athina.py
@ -10,7 +10,7 @@ class AthinaLogger:
            "Content-Type": "application/json"
        }
        self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
-        self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response"]
+        self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response", "user_query"]
    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
        import requests
@ -32,8 +32,6 @@ class AthinaLogger:
            if "messages" in kwargs:
                data["prompt"] = kwargs.get("messages", None)
                if kwargs.get("messages") and len(kwargs.get("messages")) > 0:
                    data["user_query"] = kwargs.get("messages")[0].get("content", None)
            # Directly add tools or functions if present
            optional_params = kwargs.get("optional_params", {})
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@ -72,7 +72,12 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
    ):
        pass
-    async def async_moderation_hook(self, data: dict):
+    async def async_moderation_hook(
        self,
        data: dict,
        user_api_key_dict: UserAPIKeyAuth,
        call_type: Literal["completion", "embeddings", "image_generation"],
    ):
        pass
    async def async_post_call_streaming_hook(
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@ -246,13 +246,13 @@ class LangFuseLogger:
                metadata_tags = metadata.get("tags", [])
                tags = metadata_tags
-            generation_name = metadata.get("generation_name", None)
+            trace_name = metadata.get("trace_name", None)
-            if generation_name is None:
+            if trace_name is None:
-                # just log `litellm-{call_type}` as the generation name
+                # just log `litellm-{call_type}` as the trace name
-                generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
+                trace_name = f"litellm-{kwargs.get('call_type', 'completion')}"
            trace_params = {
-                "name": generation_name,
+                "name": trace_name,
                "input": input,
                "user_id": metadata.get("trace_user_id", user_id),
                "id": metadata.get("trace_id", None),
@ -311,6 +311,11 @@ class LangFuseLogger:
                    "completion_tokens": response_obj["usage"]["completion_tokens"],
                    "total_cost": cost if supports_costs else None,
                }
            generation_name = metadata.get("generation_name", None)
            if generation_name is None:
                # just log `litellm-{call_type}` as the generation name
                generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
            generation_params = {
                "name": generation_name,
                "id": metadata.get("generation_id", generation_id),
--- a/litellm/llms/anthropic.py
+++ b/litellm/llms/anthropic.py
@ -131,18 +131,24 @@ def completion(
        )
    else:
        # Separate system prompt from rest of message
-        system_prompt_idx: Optional[int] = None
+        system_prompt_indices = []
        system_prompt = ""
        for idx, message in enumerate(messages):
            if message["role"] == "system":
-                optional_params["system"] = message["content"]
+                system_prompt += message["content"]
-                system_prompt_idx = idx
+                system_prompt_indices.append(idx)
-                break
+        if len(system_prompt_indices) > 0:
-        if system_prompt_idx is not None:
+            for idx in reversed(system_prompt_indices):
-            messages.pop(system_prompt_idx)
+                messages.pop(idx)
        if len(system_prompt) > 0:
            optional_params["system"] = system_prompt
        # Format rest of message according to anthropic guidelines
        try:
            messages = prompt_factory(
                model=model, messages=messages, custom_llm_provider="anthropic"
            )
        except Exception as e:
            raise AnthropicError(status_code=400, message=str(e))
    ## Load Config
    config = litellm.AnthropicConfig.get_config()
@ -295,7 +301,7 @@ def completion(
                )
                streaming_choice.delta = delta_obj
                streaming_model_response.choices = [streaming_choice]
-                completion_stream = model_response_iterator(
+                completion_stream = ModelResponseIterator(
                    model_response=streaming_model_response
                )
                print_verbose(
@ -324,8 +330,30 @@ def completion(
        return model_response
-def model_response_iterator(model_response):
+class ModelResponseIterator:
-    yield model_response
+    def __init__(self, model_response):
        self.model_response = model_response
        self.is_done = False
    # Sync iterator
    def __iter__(self):
        return self
    def __next__(self):
        if self.is_done:
            raise StopIteration
        self.is_done = True
        return self.model_response
    # Async iterator
    def __aiter__(self):
        return self
    async def __anext__(self):
        if self.is_done:
            raise StopAsyncIteration
        self.is_done = True
        return self.model_response
 def embedding():
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@ -11,6 +11,7 @@ from .prompt_templates.factory import (
    construct_tool_use_system_prompt,
    extract_between_tags,
    parse_xml_params,
    contains_tag,
 )
 import httpx
@ -78,11 +79,13 @@ class AmazonTitanConfig:
 class AmazonAnthropicClaude3Config:
    """
-    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
+    Reference:
        https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
        https://docs.anthropic.com/claude/docs/models-overview#model-comparison
    Supported Params for the Amazon / Anthropic Claude 3 models:
-    - `max_tokens` Required (integer) max tokens,
+    - `max_tokens` Required (integer) max tokens. Default is 4096
    - `anthropic_version` Required (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
    - `system` Optional (string) the system prompt, conversion from openai format to this is handled in factory.py
    - `temperature` Optional (float) The amount of randomness injected into the response
@ -91,7 +94,7 @@ class AmazonAnthropicClaude3Config:
    - `stop_sequences` Optional (List[str]) Custom text sequences that cause the model to stop generating
    """
-    max_tokens: Optional[int] = litellm.max_tokens
+    max_tokens: Optional[int] = 4096  # Opus, Sonnet, and Haiku default
    anthropic_version: Optional[str] = "bedrock-2023-05-31"
    system: Optional[str] = None
    temperature: Optional[float] = None
@ -128,7 +131,15 @@ class AmazonAnthropicClaude3Config:
        }
    def get_supported_openai_params(self):
-        return ["max_tokens", "tools", "tool_choice", "stream"]
+        return [
            "max_tokens",
            "tools",
            "tool_choice",
            "stream",
            "stop",
            "temperature",
            "top_p",
        ]
    def map_openai_params(self, non_default_params: dict, optional_params: dict):
        for param, value in non_default_params.items():
@ -679,6 +690,7 @@ def completion(
    timeout=None,
 ):
    exception_mapping_worked = False
    _is_function_call = False
    try:
        # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
        aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
@ -727,8 +739,10 @@ def completion(
                        system_messages.append(message["content"])
                        system_prompt_idx.append(idx)
                if len(system_prompt_idx) > 0:
-                    inference_params["system"] = '\n'.join(system_messages)
+                    inference_params["system"] = "\n".join(system_messages)
-                    messages = [i for j, i in enumerate(messages) if j not in system_prompt_idx]
+                    messages = [
                        i for j, i in enumerate(messages) if j not in system_prompt_idx
                    ]
                # Format rest of message according to anthropic guidelines
                messages = prompt_factory(
                    model=model, messages=messages, custom_llm_provider="anthropic"
@ -742,6 +756,7 @@ def completion(
                        inference_params[k] = v
                ## Handle Tool Calling
                if "tools" in inference_params:
                    _is_function_call = True
                    tool_calling_system_prompt = construct_tool_use_system_prompt(
                        tools=inference_params["tools"]
                    )
@ -823,7 +838,7 @@ def completion(
        ## COMPLETION CALL
        accept = "application/json"
        contentType = "application/json"
-        if stream == True:
+        if stream == True and _is_function_call == False:
            if provider == "ai21":
                ## LOGGING
                request_str = f"""
@ -918,7 +933,9 @@ def completion(
        elif provider == "anthropic":
            if model.startswith("anthropic.claude-3"):
                outputText = response_body.get("content")[0].get("text", None)
-                if "<invoke>" in outputText:  # OUTPUT PARSE FUNCTION CALL
+                if outputText is not None and contains_tag(
                    "invoke", outputText
                ):  # OUTPUT PARSE FUNCTION CALL
                    function_name = extract_between_tags("tool_name", outputText)[0]
                    function_arguments_str = extract_between_tags("invoke", outputText)[
                        0
@ -941,6 +958,56 @@ def completion(
                        content=None,
                    )
                    model_response.choices[0].message = _message  # type: ignore
                if _is_function_call == True and stream is not None and stream == True:
                    print_verbose(
                        f"INSIDE BEDROCK STREAMING TOOL CALLING CONDITION BLOCK"
                    )
                    # return an iterator
                    streaming_model_response = ModelResponse(stream=True)
                    streaming_model_response.choices[0].finish_reason = (
                        model_response.choices[0].finish_reason
                    )
                    # streaming_model_response.choices = [litellm.utils.StreamingChoices()]
                    streaming_choice = litellm.utils.StreamingChoices()
                    streaming_choice.index = model_response.choices[0].index
                    _tool_calls = []
                    print_verbose(
                        f"type of model_response.choices[0]: {type(model_response.choices[0])}"
                    )
                    print_verbose(f"type of streaming_choice: {type(streaming_choice)}")
                    if isinstance(model_response.choices[0], litellm.Choices):
                        if getattr(
                            model_response.choices[0].message, "tool_calls", None
                        ) is not None and isinstance(
                            model_response.choices[0].message.tool_calls, list
                        ):
                            for tool_call in model_response.choices[
                                0
                            ].message.tool_calls:
                                _tool_call = {**tool_call.dict(), "index": 0}
                                _tool_calls.append(_tool_call)
                        delta_obj = litellm.utils.Delta(
                            content=getattr(
                                model_response.choices[0].message, "content", None
                            ),
                            role=model_response.choices[0].message.role,
                            tool_calls=_tool_calls,
                        )
                        streaming_choice.delta = delta_obj
                        streaming_model_response.choices = [streaming_choice]
                        completion_stream = model_response_iterator(
                            model_response=streaming_model_response
                        )
                        print_verbose(
                            f"Returns anthropic CustomStreamWrapper with 'cached_response' streaming object"
                        )
                        return litellm.CustomStreamWrapper(
                            completion_stream=completion_stream,
                            model=model,
                            custom_llm_provider="cached_response",
                            logging_obj=logging_obj,
                        )
                model_response["finish_reason"] = response_body["stop_reason"]
                _usage = litellm.Usage(
                    prompt_tokens=response_body["usage"]["input_tokens"],
@ -1029,6 +1096,10 @@ def completion(
            raise BedrockError(status_code=500, message=traceback.format_exc())
 async def model_response_iterator(model_response):
    yield model_response
 def _embedding_func_single(
    model: str,
    input: str,
--- a/litellm/llms/custom_httpx/httpx_handler.py
+++ b/litellm/llms/custom_httpx/httpx_handler.py
@ -0,0 +1,38 @@
 from typing import Optional
 import httpx
 class HTTPHandler:
    def __init__(self, concurrent_limit=1000):
        # Create a client with a connection pool
        self.client = httpx.AsyncClient(
            limits=httpx.Limits(
                max_connections=concurrent_limit,
                max_keepalive_connections=concurrent_limit,
            )
        )
    async def close(self):
        # Close the client when you're done with it
        await self.client.aclose()
    async def get(
        self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None
    ):
        response = await self.client.get(url, params=params, headers=headers)
        return response
    async def post(
        self,
        url: str,
        data: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
    ):
        try:
            response = await self.client.post(
                url, data=data, params=params, headers=headers
            )
            return response
        except Exception as e:
            raise e
--- a/litellm/llms/gemini.py
+++ b/litellm/llms/gemini.py
@ -118,7 +118,7 @@ def completion(
    logger_fn=None,
 ):
    try:
-        import google.generativeai as genai
+        import google.generativeai as genai  # type: ignore
    except:
        raise Exception(
            "Importing google.generativeai failed, please run 'pip install -q google-generativeai"
@ -308,7 +308,7 @@ async def async_completion(
    messages,
    encoding,
 ):
-    import google.generativeai as genai
+    import google.generativeai as genai  # type: ignore
    response = await _model.generate_content_async(
        contents=prompt,
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@ -68,9 +68,9 @@ class OllamaConfig:
    repeat_last_n: Optional[int] = None
    repeat_penalty: Optional[float] = None
    temperature: Optional[float] = None
-    stop: Optional[
+    stop: Optional[list] = (
-        list
+        None  # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
-    ] = None  # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
+    )
    tfs_z: Optional[float] = None
    num_predict: Optional[int] = None
    top_k: Optional[int] = None
@ -344,9 +344,9 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
 async def ollama_aembeddings(
-    api_base="http://localhost:11434",
+    api_base: str,
-    model="llama2",
+    model: str,
-    prompt="Why is the sky blue?",
+    prompts: list,
    optional_params=None,
    logging_obj=None,
    model_response=None,
@ -365,6 +365,11 @@ async def ollama_aembeddings(
        ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
            optional_params[k] = v
    total_input_tokens = 0
    output_data = []
    timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
    async with aiohttp.ClientSession(timeout=timeout) as session:
        for idx, prompt in enumerate(prompts):
            data = {
                "model": model,
                "prompt": prompt,
@ -373,12 +378,14 @@ async def ollama_aembeddings(
            logging_obj.pre_call(
                input=None,
                api_key=None,
-        additional_args={"api_base": url, "complete_input_dict": data, "headers": {}},
+                additional_args={
                    "api_base": url,
                    "complete_input_dict": data,
                    "headers": {},
                },
            )
    timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
    async with aiohttp.ClientSession(timeout=timeout) as session:
        response = await session.post(url, json=data)
            response = await session.post(url, json=data)
            if response.status != 200:
                text = await response.text()
                raise OllamaError(status_code=response.status, message=text)
@ -395,21 +402,19 @@ async def ollama_aembeddings(
            )
            response_json = await response.json()
-        embeddings = response_json["embedding"]
+            embeddings: list[float] = response_json["embedding"]
        ## RESPONSE OBJECT
        output_data = []
        for idx, embedding in enumerate(embeddings):
            output_data.append(
-                {"object": "embedding", "index": idx, "embedding": embedding}
+                {"object": "embedding", "index": idx, "embedding": embeddings}
            )
            input_tokens = len(encoding.encode(prompt))
            total_input_tokens += input_tokens
    model_response["object"] = "list"
    model_response["data"] = output_data
    model_response["model"] = model
        input_tokens = len(encoding.encode(prompt))
    model_response["usage"] = {
-            "prompt_tokens": input_tokens,
+        "prompt_tokens": total_input_tokens,
-            "total_tokens": input_tokens,
+        "total_tokens": total_input_tokens,
    }
    return model_response
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@ -173,10 +173,11 @@ class OllamaChatConfig:
                litellm.add_function_to_prompt = (
                    True  # so that main.py adds the function call to the prompt
                )
-                optional_params["functions_unsupported_model"] = non_default_params.pop(
+                optional_params["functions_unsupported_model"] = non_default_params.get(
                    "functions"
                )
        non_default_params.pop("tool_choice", None)  # causes ollama requests to hang
        non_default_params.pop("functions", None)  # causes ollama requests to hang
        return optional_params
--- a/litellm/llms/palm.py
+++ b/litellm/llms/palm.py
@ -98,7 +98,7 @@ def completion(
    logger_fn=None,
 ):
    try:
-        import google.generativeai as palm
+        import google.generativeai as palm  # type: ignore
    except:
        raise Exception(
            "Importing google.generativeai failed, please run 'pip install -q google-generativeai"
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@ -5,12 +5,17 @@ from jinja2 import Template, exceptions, Environment, meta
 from typing import Optional, Any
 import imghdr, base64
 from typing import List
 import litellm
 def default_pt(messages):
    return " ".join(message["content"] for message in messages)
 def prompt_injection_detection_default_pt():
    return """Detect if a prompt is safe to run. Return 'UNSAFE' if not."""
 # alpaca prompt template - for models like mythomax, etc.
 def alpaca_pt(messages):
    prompt = custom_prompt(
@ -638,11 +643,12 @@ def anthropic_messages_pt(messages: list):
    """
    # add role=tool support to allow function call result/error submission
    user_message_types = {"user", "tool"}
-    # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
+    # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
    new_messages = []
    msg_i = 0
    while msg_i < len(messages):
        user_content = []
        ## MERGE CONSECUTIVE USER CONTENT ##
        while msg_i < len(messages) and messages[msg_i]["role"] in user_message_types:
            if isinstance(messages[msg_i]["content"], list):
                for m in messages[msg_i]["content"]:
@ -676,6 +682,7 @@ def anthropic_messages_pt(messages: list):
            new_messages.append({"role": "user", "content": user_content})
        assistant_content = []
        ## MERGE CONSECUTIVE ASSISTANT CONTENT ##
        while msg_i < len(messages) and messages[msg_i]["role"] == "assistant":
            assistant_text = (
                messages[msg_i].get("content") or ""
@ -694,9 +701,14 @@ def anthropic_messages_pt(messages: list):
            new_messages.append({"role": "assistant", "content": assistant_content})
    if new_messages[0]["role"] != "user":
        if litellm.modify_params:
            new_messages.insert(
                0, {"role": "user", "content": [{"type": "text", "text": "."}]}
            )
        else:
            raise Exception(
                "Invalid first message. Should always start with 'role'='user' for Anthropic. System prompt is sent separately for Anthropic. set 'litellm.modify_params = True' or 'litellm_settings:modify_params = True' on proxy, to insert a placeholder user message - '.' as the first message, "
            )
    if new_messages[-1]["role"] == "assistant":
        for content in new_messages[-1]["content"]:
@ -714,17 +726,23 @@ def extract_between_tags(tag: str, string: str, strip: bool = False) -> List[str
        ext_list = [e.strip() for e in ext_list]
    return ext_list
 def contains_tag(tag: str, string: str) -> bool:
    return bool(re.search(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL))
 def parse_xml_params(xml_content):
    root = ET.fromstring(xml_content)
    params = {}
    for child in root.findall(".//parameters/*"):
        try:
            # Attempt to decode the element's text as JSON
            params[child.tag] = json.loads(child.text)
        except json.JSONDecodeError:
            # If JSON decoding fails, use the original text
            params[child.tag] = child.text
    return params
 ###
@ -917,7 +935,7 @@ def gemini_text_image_pt(messages: list):
    }
    """
    try:
-        import google.generativeai as genai
+        import google.generativeai as genai  # type: ignore
    except:
        raise Exception(
            "Importing google.generativeai failed, please run 'pip install -q google-generativeai"
@ -958,9 +976,7 @@ def azure_text_pt(messages: list):
 # Function call template
 def function_call_prompt(messages: list, functions: list):
-    function_prompt = (
+    function_prompt = """Produce JSON OUTPUT ONLY! Adhere to this format {"name": "function_name", "arguments":{"argument_name": "argument_value"}} The following functions are available to you:"""
        """Produce JSON OUTPUT ONLY! Adhere to this format {"name": "function_name", "arguments":{"argument_name": "argument_value"}} The following functions are available to you:"""
    )
    for function in functions:
        function_prompt += f"""\n{function}\n"""
--- a/litellm/llms/sagemaker.py
+++ b/litellm/llms/sagemaker.py
@ -166,6 +166,7 @@ def completion(
    aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
    aws_access_key_id = optional_params.pop("aws_access_key_id", None)
    aws_region_name = optional_params.pop("aws_region_name", None)
    model_id = optional_params.pop("model_id", None)
    if aws_access_key_id != None:
        # uses auth params passed to completion
@ -245,15 +246,28 @@ def completion(
                model=model,
                logging_obj=logging_obj,
                data=data,
                model_id=model_id,
                aws_secret_access_key=aws_secret_access_key,
                aws_access_key_id=aws_access_key_id,
                aws_region_name=aws_region_name,
            )
            return response
        if model_id is not None:
            response = client.invoke_endpoint_with_response_stream(
                EndpointName=model,
                InferenceComponentName=model_id,
                ContentType="application/json",
                Body=data,
                CustomAttributes="accept_eula=true",
            )
        else:
            response = client.invoke_endpoint_with_response_stream(
                EndpointName=model,
                ContentType="application/json",
                Body=data,
                CustomAttributes="accept_eula=true",
            )
        return response["Body"]
    elif acompletion == True:
        _data = {"inputs": prompt, "parameters": inference_params}
@ -264,10 +278,44 @@ def completion(
            model=model,
            logging_obj=logging_obj,
            data=_data,
            model_id=model_id,
            aws_secret_access_key=aws_secret_access_key,
            aws_access_key_id=aws_access_key_id,
            aws_region_name=aws_region_name,
        )
    data = json.dumps({"inputs": prompt, "parameters": inference_params}).encode(
        "utf-8"
    )
    ## COMPLETION CALL
    try:
        if model_id is not None:
            ## LOGGING
            request_str = f"""
            response = client.invoke_endpoint(
                EndpointName={model},
                InferenceComponentName={model_id},
                ContentType="application/json",
                Body={data},
                CustomAttributes="accept_eula=true",
            )
            """  # type: ignore
            logging_obj.pre_call(
                input=prompt,
                api_key="",
                additional_args={
                    "complete_input_dict": data,
                    "request_str": request_str,
                    "hf_model_name": hf_model_name,
                },
            )
            response = client.invoke_endpoint(
                EndpointName=model,
                InferenceComponentName=model_id,
                ContentType="application/json",
                Body=data,
                CustomAttributes="accept_eula=true",
            )
        else:
            ## LOGGING
            request_str = f"""
            response = client.invoke_endpoint(
@ -286,8 +334,6 @@ def completion(
                    "hf_model_name": hf_model_name,
                },
            )
    ## COMPLETION CALL
    try:
            response = client.invoke_endpoint(
                EndpointName=model,
                ContentType="application/json",
@ -303,6 +349,8 @@ def completion(
        error_message = (
            getattr(e, "response", {}).get("Error", {}).get("Message", str(e))
        )
        if "Inference Component Name header is required" in error_message:
            error_message += "\n pass in via `litellm.completion(..., model_id={InferenceComponentName})`"
        raise SagemakerError(status_code=status_code, message=error_message)
    response = response["Body"].read().decode("utf8")
@ -357,8 +405,12 @@ async def async_streaming(
    encoding,
    model_response: ModelResponse,
    model: str,
    model_id: Optional[str],
    logging_obj: Any,
    data,
    aws_secret_access_key: Optional[str],
    aws_access_key_id: Optional[str],
    aws_region_name: Optional[str],
 ):
    """
    Use aioboto3
@ -367,11 +419,6 @@ async def async_streaming(
    session = aioboto3.Session()
    # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
    aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
    aws_access_key_id = optional_params.pop("aws_access_key_id", None)
    aws_region_name = optional_params.pop("aws_region_name", None)
    if aws_access_key_id != None:
        # uses auth params passed to completion
        # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
@ -398,6 +445,15 @@ async def async_streaming(
    async with _client as client:
        try:
            if model_id is not None:
                response = await client.invoke_endpoint_with_response_stream(
                    EndpointName=model,
                    InferenceComponentName=model_id,
                    ContentType="application/json",
                    Body=data,
                    CustomAttributes="accept_eula=true",
                )
            else:
                response = await client.invoke_endpoint_with_response_stream(
                    EndpointName=model,
                    ContentType="application/json",
@ -418,6 +474,10 @@ async def async_completion(
    model: str,
    logging_obj: Any,
    data: dict,
    model_id: Optional[str],
    aws_secret_access_key: Optional[str],
    aws_access_key_id: Optional[str],
    aws_region_name: Optional[str],
 ):
    """
    Use aioboto3
@ -426,11 +486,6 @@ async def async_completion(
    session = aioboto3.Session()
    # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
    aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
    aws_access_key_id = optional_params.pop("aws_access_key_id", None)
    aws_region_name = optional_params.pop("aws_region_name", None)
    if aws_access_key_id != None:
        # uses auth params passed to completion
        # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
@ -456,6 +511,35 @@ async def async_completion(
        )
    async with _client as client:
        encoded_data = json.dumps(data).encode("utf-8")
        try:
            if model_id is not None:
                ## LOGGING
                request_str = f"""
                response = client.invoke_endpoint(
                    EndpointName={model},
                    InferenceComponentName={model_id},
                    ContentType="application/json",
                    Body={data},
                    CustomAttributes="accept_eula=true",
                )
                """  # type: ignore
                logging_obj.pre_call(
                    input=data["inputs"],
                    api_key="",
                    additional_args={
                        "complete_input_dict": data,
                        "request_str": request_str,
                    },
                )
                response = await client.invoke_endpoint(
                    EndpointName=model,
                    InferenceComponentName=model_id,
                    ContentType="application/json",
                    Body=encoded_data,
                    CustomAttributes="accept_eula=true",
                )
            else:
                ## LOGGING
                request_str = f"""
                response = client.invoke_endpoint(
@ -473,8 +557,6 @@ async def async_completion(
                        "request_str": request_str,
                    },
                )
        encoded_data = json.dumps(data).encode("utf-8")
        try:
                response = await client.invoke_endpoint(
                    EndpointName=model,
                    ContentType="application/json",
@ -482,7 +564,10 @@ async def async_completion(
                    CustomAttributes="accept_eula=true",
                )
        except Exception as e:
-            raise SagemakerError(status_code=500, message=f"{str(e)}")
+            error_message = f"{str(e)}"
            if "Inference Component Name header is required" in error_message:
                error_message += "\n pass in via `litellm.completion(..., model_id={InferenceComponentName})`"
            raise SagemakerError(status_code=500, message=error_message)
        response = await response["Body"].read()
        response = response.decode("utf8")
        ## LOGGING
--- a/litellm/llms/vertex_ai.py
+++ b/litellm/llms/vertex_ai.py
@ -289,11 +289,11 @@ def completion(
            Part,
            GenerationConfig,
        )
-        from google.cloud import aiplatform
+        from google.cloud import aiplatform  # type: ignore
        from google.protobuf import json_format  # type: ignore
        from google.protobuf.struct_pb2 import Value  # type: ignore
-        from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types
+        from google.cloud.aiplatform_v1beta1.types import content as gapic_content_types  # type: ignore
-        import google.auth
+        import google.auth  # type: ignore
        ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
        print_verbose(
@ -783,7 +783,7 @@ async def async_completion(
            """
            Vertex AI Model Garden
            """
-            from google.cloud import aiplatform
+            from google.cloud import aiplatform  # type: ignore
            ## LOGGING
            logging_obj.pre_call(
@ -969,7 +969,7 @@ async def async_streaming(
        )
        response = llm_model.predict_streaming_async(prompt, **optional_params)
    elif mode == "custom":
-        from google.cloud import aiplatform
+        from google.cloud import aiplatform  # type: ignore
        stream = optional_params.pop("stream", None)
@ -1059,7 +1059,7 @@ def embedding(
        )
    from vertexai.language_models import TextEmbeddingModel
-    import google.auth
+    import google.auth  # type: ignore
    ## Load credentials with the correct quota project ref: https://github.com/googleapis/python-aiplatform/issues/2557#issuecomment-1709284744
    try:
--- a/litellm/main.py
+++ b/litellm/main.py
@ -115,27 +115,57 @@ class LiteLLM:
        default_headers: Optional[Mapping[str, str]] = None,
    ):
        self.params = locals()
-        self.chat = Chat(self.params)
+        self.chat = Chat(self.params, router_obj=None)
 class Chat:
-    def __init__(self, params):
+    def __init__(self, params, router_obj: Optional[Any]):
        self.params = params
-        self.completions = Completions(self.params)
+        if self.params.get("acompletion", False) == True:
            self.params.pop("acompletion")
            self.completions: Union[AsyncCompletions, Completions] = AsyncCompletions(
                self.params, router_obj=router_obj
            )
        else:
            self.completions = Completions(self.params, router_obj=router_obj)
 class Completions:
-    def __init__(self, params):
+    def __init__(self, params, router_obj: Optional[Any]):
        self.params = params
        self.router_obj = router_obj
    def create(self, messages, model=None, **kwargs):
        for k, v in kwargs.items():
            self.params[k] = v
        model = model or self.params.get("model")
        if self.router_obj is not None:
            response = self.router_obj.completion(
                model=model, messages=messages, **self.params
            )
        else:
            response = completion(model=model, messages=messages, **self.params)
        return response
 class AsyncCompletions:
    def __init__(self, params, router_obj: Optional[Any]):
        self.params = params
        self.router_obj = router_obj
    async def create(self, messages, model=None, **kwargs):
        for k, v in kwargs.items():
            self.params[k] = v
        model = model or self.params.get("model")
        if self.router_obj is not None:
            response = await self.router_obj.acompletion(
                model=model, messages=messages, **self.params
            )
        else:
            response = await acompletion(model=model, messages=messages, **self.params)
        return response
@client
 async def acompletion(
    model: str,
@ -571,6 +601,7 @@ def completion(
        "ttl",
        "cache",
        "no-log",
        "base_model",
    ]
    default_params = openai_params + litellm_params
    non_default_params = {
@ -639,7 +670,7 @@ def completion(
        elif (
            input_cost_per_second is not None
        ):  # time based pricing just needs cost in place
-            output_cost_per_second = output_cost_per_second or 0.0
+            output_cost_per_second = output_cost_per_second
            litellm.register_model(
                {
                    f"{custom_llm_provider}/{model}": {
@ -1752,7 +1783,11 @@ def completion(
                timeout=timeout,
            )
-            if "stream" in optional_params and optional_params["stream"] == True:
+            if (
                "stream" in optional_params
                and optional_params["stream"] == True
                and not isinstance(response, CustomStreamWrapper)
            ):
                # don't try to access stream object,
                if "ai21" in model:
                    response = CustomStreamWrapper(
@ -2754,28 +2789,25 @@ def embedding(
                model_response=EmbeddingResponse(),
            )
        elif custom_llm_provider == "ollama":
-            ollama_input = None
+            api_base = (
-            if isinstance(input, list) and len(input) > 1:
+                litellm.api_base
-                raise litellm.BadRequestError(
+                or api_base
-                    message=f"Ollama Embeddings don't support batch embeddings",
+                or get_secret("OLLAMA_API_BASE")
-                    model=model,  # type: ignore
+                or "http://localhost:11434"
                    llm_provider="ollama",  # type: ignore
            )
-            if isinstance(input, list) and len(input) == 1:
+            if isinstance(input, str):
-                ollama_input = "".join(input[0])
+                input = [input]
-            elif isinstance(input, str):
+            if not all(isinstance(item, str) for item in input):
                ollama_input = input
            else:
                raise litellm.BadRequestError(
                    message=f"Invalid input for ollama embeddings. input={input}",
                    model=model,  # type: ignore
                    llm_provider="ollama",  # type: ignore
                )
-
+            if aembedding:
            if aembedding == True:
                response = ollama.ollama_aembeddings(
                    api_base=api_base,
                    model=model,
-                    prompt=ollama_input,
+                    prompts=input,
                    encoding=encoding,
                    logging_obj=logging,
                    optional_params=optional_params,
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/_next/static/DptMjzo5xd96cx0b56k4u/_buildManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/DptMjzo5xd96cx0b56k4u/_buildManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/DptMjzo5xd96cx0b56k4u/_ssgManifest.js
+++ b/litellm/proxy/_experimental/out/_next/static/DptMjzo5xd96cx0b56k4u/_ssgManifest.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/69-589b47e7a69d316f.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/69-589b47e7a69d316f.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/730-1411b729a1c79695.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/730-1411b729a1c79695.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/798-4baed68da0c5497d.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/798-4baed68da0c5497d.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found-f5ff8fa2e73dffb1.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/_not-found-f5ff8fa2e73dffb1.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-bdf72a417c5e4f1f.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/layout-bdf72a417c5e4f1f.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-a5a04da2a9356785.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-a5a04da2a9356785.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/app/page-df9015da04018cc1.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/app/page-df9015da04018cc1.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js
--- a/litellm/proxy/_experimental/out/_next/static/chunks/webpack-d1ad37b1875df240.js
+++ b/litellm/proxy/_experimental/out/_next/static/chunks/webpack-d1ad37b1875df240.js
@ -1 +1 @@
-!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/68a21c6e6697f7ca.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
+!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/f8da5a6a5b29d249.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
--- a/litellm/proxy/_experimental/out/_next/static/css/68a21c6e6697f7ca.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/68a21c6e6697f7ca.css
--- a/litellm/proxy/_experimental/out/_next/static/css/f8da5a6a5b29d249.css
+++ b/litellm/proxy/_experimental/out/_next/static/css/f8da5a6a5b29d249.css
--- a/litellm/proxy/_experimental/out/index.html
+++ b/litellm/proxy/_experimental/out/index.html
@ -1 +1 @@
-<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[19914,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-df9015da04018cc1.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tXZFkeqtgh-goIRVbw_9q\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
+<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-589b47e7a69d316f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[90177,[\"798\",\"static/chunks/798-4baed68da0c5497d.js\",\"931\",\"static/chunks/app/page-a5a04da2a9356785.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DptMjzo5xd96cx0b56k4u\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
--- a/litellm/proxy/_experimental/out/index.txt
+++ b/litellm/proxy/_experimental/out/index.txt
@ -1,7 +1,7 @@
 2:I[77831,[],""]
-3:I[19914,["730","static/chunks/730-1411b729a1c79695.js","931","static/chunks/app/page-df9015da04018cc1.js"],""]
+3:I[90177,["798","static/chunks/798-4baed68da0c5497d.js","931","static/chunks/app/page-a5a04da2a9356785.js"],""]
 4:I[5613,[],""]
 5:I[31778,[],""]
-0:["tXZFkeqtgh-goIRVbw_9q",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/68a21c6e6697f7ca.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
+0:["DptMjzo5xd96cx0b56k4u",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/f8da5a6a5b29d249.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
 1:null
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,21 +1,20 @@
 model_list:
- model_name: fake_openai
+- model_name: fake-openai-endpoint
  litellm_params:
    model: openai/my-fake-model
    api_key: my-fake-key
-    api_base: http://0.0.0.0:8080
+    api_base: https://exampleopenaiendpoint-production.up.railway.app/
 - model_name: gpt-3.5-turbo
  litellm_params:
    model: gpt-3.5-turbo-1106
    api_key: os.environ/OPENAI_API_KEY
 litellm_settings:
-  cache: true
+  max_budget: 600020
-  cache_params:
+  budget_duration: 30d
    type: redis
  callbacks: ["batch_redis_requests"]
  # success_callbacks: ["langfuse"]
 general_settings:
  master_key: sk-1234
-  database_url: "postgresql://neondb_owner:hz8tyUlJ5ivV@ep-cool-sunset-a5ywubeh.us-east-2.aws.neon.tech/neondb?sslmode=require"
+  proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
  enable_jwt_auth: True
  alerting: ["slack"]
  litellm_jwtauth:
    admin_jwt_scope: "litellm_proxy_admin"
    team_jwt_scope: "litellm_team" 
    public_key_ttl: 600
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -1,4 +1,5 @@
-from pydantic import BaseModel, Extra, Field, root_validator, Json
+from pydantic import BaseModel, Extra, Field, root_validator, Json, validator
 from dataclasses import fields
 import enum
 from typing import Optional, List, Union, Dict, Literal, Any
 from datetime import datetime
@ -14,11 +15,6 @@ def hash_token(token: str):
    return hashed_token
 class LiteLLMProxyRoles(enum.Enum):
    PROXY_ADMIN = "litellm_proxy_admin"
    USER = "litellm_user"
 class LiteLLMBase(BaseModel):
    """
    Implements default functions, all pydantic objects should have.
@ -42,6 +38,135 @@ class LiteLLMBase(BaseModel):
        protected_namespaces = ()
 class LiteLLMRoutes(enum.Enum):
    openai_routes: List = [  # chat completions
        "/openai/deployments/{model}/chat/completions",
        "/chat/completions",
        "/v1/chat/completions",
        # completions
        "/openai/deployments/{model}/completions",
        "/completions",
        "/v1/completions",
        # embeddings
        "/openai/deployments/{model}/embeddings",
        "/embeddings",
        "/v1/embeddings",
        # image generation
        "/images/generations",
        "/v1/images/generations",
        # audio transcription
        "/audio/transcriptions",
        "/v1/audio/transcriptions",
        # moderations
        "/moderations",
        "/v1/moderations",
        # models
        "/models",
        "/v1/models",
    ]
    info_routes: List = ["/key/info", "/team/info", "/user/info", "/model/info"]
    management_routes: List = [  # key
        "/key/generate",
        "/key/update",
        "/key/delete",
        "/key/info",
        # user
        "/user/new",
        "/user/update",
        "/user/delete",
        "/user/info",
        # team
        "/team/new",
        "/team/update",
        "/team/delete",
        "/team/info",
        "/team/block",
        "/team/unblock",
        # model
        "/model/new",
        "/model/update",
        "/model/delete",
        "/model/info",
    ]
 class LiteLLM_JWTAuth(LiteLLMBase):
    """
    A class to define the roles and permissions for a LiteLLM Proxy w/ JWT Auth.
    Attributes:
    - admin_jwt_scope: The JWT scope required for proxy admin roles.
    - admin_allowed_routes: list of allowed routes for proxy admin roles.
    - team_jwt_scope: The JWT scope required for proxy team roles.
    - team_id_jwt_field: The field in the JWT token that stores the team ID. Default - `client_id`.
    - team_allowed_routes: list of allowed routes for proxy team roles.
    - end_user_id_jwt_field: Default - `sub`. The field in the JWT token that stores the end-user ID. Turn this off by setting to `None`. Enables end-user cost tracking.
    - public_key_ttl: Default - 600s. TTL for caching public JWT keys.
    See `auth_checks.py` for the specific routes
    """
    admin_jwt_scope: str = "litellm_proxy_admin"
    admin_allowed_routes: List[
        Literal["openai_routes", "info_routes", "management_routes"]
    ] = ["management_routes"]
    team_jwt_scope: str = "litellm_team"
    team_id_jwt_field: str = "client_id"
    team_allowed_routes: List[
        Literal["openai_routes", "info_routes", "management_routes"]
    ] = ["openai_routes", "info_routes"]
    end_user_id_jwt_field: Optional[str] = "sub"
    public_key_ttl: float = 600
    def __init__(self, **kwargs: Any) -> None:
        # get the attribute names for this Pydantic model
        allowed_keys = self.__annotations__.keys()
        invalid_keys = set(kwargs.keys()) - allowed_keys
        if invalid_keys:
            raise ValueError(
                f"Invalid arguments provided: {', '.join(invalid_keys)}. Allowed arguments are: {', '.join(allowed_keys)}."
            )
        super().__init__(**kwargs)
 class LiteLLMPromptInjectionParams(LiteLLMBase):
    heuristics_check: bool = False
    vector_db_check: bool = False
    llm_api_check: bool = False
    llm_api_name: Optional[str] = None
    llm_api_system_prompt: Optional[str] = None
    llm_api_fail_call_string: Optional[str] = None
    @root_validator(pre=True)
    def check_llm_api_params(cls, values):
        llm_api_check = values.get("llm_api_check")
        if llm_api_check is True:
            if "llm_api_name" not in values or not values["llm_api_name"]:
                raise ValueError(
                    "If llm_api_check is set to True, llm_api_name must be provided"
                )
            if (
                "llm_api_system_prompt" not in values
                or not values["llm_api_system_prompt"]
            ):
                raise ValueError(
                    "If llm_api_check is set to True, llm_api_system_prompt must be provided"
                )
            if (
                "llm_api_fail_call_string" not in values
                or not values["llm_api_fail_call_string"]
            ):
                raise ValueError(
                    "If llm_api_check is set to True, llm_api_fail_call_string must be provided"
                )
        return values
 ######### Request Class Definition ######
 class ProxyChatCompletionRequest(LiteLLMBase):
    model: str
@ -180,7 +305,7 @@ class GenerateKeyResponse(GenerateKeyRequest):
    key: str
    key_name: Optional[str] = None
    expires: Optional[datetime]
-    user_id: str
+    user_id: Optional[str] = None
    @root_validator(pre=True)
    def set_model_info(cls, values):
@ -274,6 +399,7 @@ class TeamBase(LiteLLMBase):
    rpm_limit: Optional[int] = None
    max_budget: Optional[float] = None
    models: list = []
    blocked: bool = False
 class NewTeamRequest(TeamBase):
@ -301,19 +427,18 @@ class TeamMemberDeleteRequest(LiteLLMBase):
        return values
-class UpdateTeamRequest(LiteLLMBase):
+class UpdateTeamRequest(TeamBase):
    team_id: str  # required
    team_alias: Optional[str] = None
    admins: Optional[list] = None
    members: Optional[list] = None
    members_with_roles: Optional[List[Member]] = None
    metadata: Optional[dict] = None
 class DeleteTeamRequest(LiteLLMBase):
    team_ids: List[str]  # required
 class BlockTeamRequest(LiteLLMBase):
    team_id: str  # required
 class LiteLLM_TeamTable(TeamBase):
    spend: Optional[float] = None
    max_parallel_requests: Optional[int] = None
@ -498,6 +623,9 @@ class ConfigGeneralSettings(LiteLLMBase):
    ui_access_mode: Optional[Literal["admin_only", "all"]] = Field(
        "all", description="Control access to the Proxy UI"
    )
    allowed_routes: Optional[List] = Field(
        None, description="Proxy API Endpoints you want users to be able to access"
    )
 class ConfigYAML(LiteLLMBase):
@ -565,6 +693,8 @@ class LiteLLM_VerificationTokenView(LiteLLM_VerificationToken):
    team_tpm_limit: Optional[int] = None
    team_rpm_limit: Optional[int] = None
    team_max_budget: Optional[float] = None
    team_models: List = []
    team_blocked: bool = False
    soft_budget: Optional[float] = None
    team_model_aliases: Optional[Dict] = None
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@ -8,45 +8,160 @@ Run checks for:
 2. If user is in budget 
 3. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget 
 """
-from litellm.proxy._types import LiteLLM_UserTable, LiteLLM_EndUserTable
+from litellm.proxy._types import (
-from typing import Optional
+    LiteLLM_UserTable,
    LiteLLM_EndUserTable,
    LiteLLM_JWTAuth,
    LiteLLM_TeamTable,
    LiteLLMRoutes,
 )
 from typing import Optional, Literal, Union
 from litellm.proxy.utils import PrismaClient
 from litellm.caching import DualCache
 import litellm
 all_routes = LiteLLMRoutes.openai_routes.value + LiteLLMRoutes.management_routes.value
 def common_checks(
    request_body: dict,
-    user_object: LiteLLM_UserTable,
+    team_object: LiteLLM_TeamTable,
    end_user_object: Optional[LiteLLM_EndUserTable],
    global_proxy_spend: Optional[float],
    general_settings: dict,
    route: str,
 ) -> bool:
    """
    Common checks across jwt + key-based auth.
    1. If team is blocked
    2. If team can call model
    3. If team is in budget
    4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
    5. [OPTIONAL] If 'enforce_end_user' enabled - did developer pass in 'user' param for openai endpoints
    6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
    """
    _model = request_body.get("model", None)
-    # 1. If user can call model
+    if team_object.blocked == True:
        raise Exception(
            f"Team={team_object.team_id} is blocked. Update via `/team/unblock` if your admin."
        )
    # 2. If user can call model
    if (
        _model is not None
-        and len(user_object.models) > 0
+        and len(team_object.models) > 0
-        and _model not in user_object.models
+        and _model not in team_object.models
    ):
        raise Exception(
-            f"User={user_object.user_id} not allowed to call model={_model}. Allowed user models = {user_object.models}"
+            f"Team={team_object.team_id} not allowed to call model={_model}. Allowed team models = {team_object.models}"
        )
-    # 2. If user is in budget
+    # 3. If team is in budget
    if (
-        user_object.max_budget is not None
+        team_object.max_budget is not None
-        and user_object.spend > user_object.max_budget
+        and team_object.spend is not None
        and team_object.spend > team_object.max_budget
    ):
        raise Exception(
-            f"User={user_object.user_id} over budget. Spend={user_object.spend}, Budget={user_object.max_budget}"
+            f"Team={team_object.team_id} over budget. Spend={team_object.spend}, Budget={team_object.max_budget}"
        )
-    # 3. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
+    # 4. If end_user ('user' passed to /chat/completions, /embeddings endpoint) is in budget
    if end_user_object is not None and end_user_object.litellm_budget_table is not None:
        end_user_budget = end_user_object.litellm_budget_table.max_budget
        if end_user_budget is not None and end_user_object.spend > end_user_budget:
            raise Exception(
-                f"End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
+                f"ExceededBudget: End User={end_user_object.user_id} over budget. Spend={end_user_object.spend}, Budget={end_user_budget}"
            )
    # 5. [OPTIONAL] If 'enforce_user_param' enabled - did developer pass in 'user' param for openai endpoints
    if (
        general_settings.get("enforce_user_param", None) is not None
        and general_settings["enforce_user_param"] == True
    ):
        if route in LiteLLMRoutes.openai_routes.value and "user" not in request_body:
            raise Exception(
                f"'user' param not passed in. 'enforce_user_param'={general_settings['enforce_user_param']}"
            )
    # 6. [OPTIONAL] If 'litellm.max_budget' is set (>0), is proxy under budget
    if litellm.max_budget > 0 and global_proxy_spend is not None:
        if global_proxy_spend > litellm.max_budget:
            raise Exception(
                f"ExceededBudget: LiteLLM Proxy has exceeded its budget. Current spend: {global_proxy_spend}; Max Budget: {litellm.max_budget}"
            )
    return True
 def _allowed_routes_check(user_route: str, allowed_routes: list) -> bool:
    for allowed_route in allowed_routes:
        if (
            allowed_route == LiteLLMRoutes.openai_routes.name
            and user_route in LiteLLMRoutes.openai_routes.value
        ):
            return True
        elif (
            allowed_route == LiteLLMRoutes.info_routes.name
            and user_route in LiteLLMRoutes.info_routes.value
        ):
            return True
        elif (
            allowed_route == LiteLLMRoutes.management_routes.name
            and user_route in LiteLLMRoutes.management_routes.value
        ):
            return True
        elif allowed_route == user_route:
            return True
    return False
 def allowed_routes_check(
    user_role: Literal["proxy_admin", "team"],
    user_route: str,
    litellm_proxy_roles: LiteLLM_JWTAuth,
 ) -> bool:
    """
    Check if user -> not admin - allowed to access these routes
    """
    if user_role == "proxy_admin":
        if litellm_proxy_roles.admin_allowed_routes is None:
            is_allowed = _allowed_routes_check(
                user_route=user_route, allowed_routes=["management_routes"]
            )
            return is_allowed
        elif litellm_proxy_roles.admin_allowed_routes is not None:
            is_allowed = _allowed_routes_check(
                user_route=user_route,
                allowed_routes=litellm_proxy_roles.admin_allowed_routes,
            )
            return is_allowed
    elif user_role == "team":
        if litellm_proxy_roles.team_allowed_routes is None:
            """
            By default allow a team to call openai + info routes
            """
            is_allowed = _allowed_routes_check(
                user_route=user_route, allowed_routes=["openai_routes", "info_routes"]
            )
            return is_allowed
        elif litellm_proxy_roles.team_allowed_routes is not None:
            is_allowed = _allowed_routes_check(
                user_route=user_route,
                allowed_routes=litellm_proxy_roles.team_allowed_routes,
            )
            return is_allowed
    return False
 def get_actual_routes(allowed_routes: list) -> list:
    actual_routes: list = []
    for route_name in allowed_routes:
        try:
            route_value = LiteLLMRoutes[route_name].value
            actual_routes = actual_routes + route_value
        except KeyError:
            actual_routes.append(route_name)
    return actual_routes
 async def get_end_user_object(
    end_user_id: Optional[str],
    prisma_client: Optional[PrismaClient],
@ -82,3 +197,75 @@ async def get_end_user_object(
        return LiteLLM_EndUserTable(**response.dict())
    except Exception as e:  # if end-user not in db
        return None
 async def get_user_object(self, user_id: str) -> LiteLLM_UserTable:
    """
    - Check if user id in proxy User Table
    - if valid, return LiteLLM_UserTable object with defined limits
    - if not, then raise an error
    """
    if self.prisma_client is None:
        raise Exception(
            "No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
        )
    # check if in cache
    cached_user_obj = self.user_api_key_cache.async_get_cache(key=user_id)
    if cached_user_obj is not None:
        if isinstance(cached_user_obj, dict):
            return LiteLLM_UserTable(**cached_user_obj)
        elif isinstance(cached_user_obj, LiteLLM_UserTable):
            return cached_user_obj
    # else, check db
    try:
        response = await self.prisma_client.db.litellm_usertable.find_unique(
            where={"user_id": user_id}
        )
        if response is None:
            raise Exception
        return LiteLLM_UserTable(**response.dict())
    except Exception as e:
        raise Exception(
            f"User doesn't exist in db. User={user_id}. Create user via `/user/new` call."
        )
 async def get_team_object(
    team_id: str,
    prisma_client: Optional[PrismaClient],
    user_api_key_cache: DualCache,
 ) -> LiteLLM_TeamTable:
    """
    - Check if team id in proxy Team Table
    - if valid, return LiteLLM_TeamTable object with defined limits
    - if not, then raise an error
    """
    if prisma_client is None:
        raise Exception(
            "No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
        )
    # check if in cache
    cached_team_obj = user_api_key_cache.async_get_cache(key=team_id)
    if cached_team_obj is not None:
        if isinstance(cached_team_obj, dict):
            return LiteLLM_TeamTable(**cached_team_obj)
        elif isinstance(cached_team_obj, LiteLLM_TeamTable):
            return cached_team_obj
    # else, check db
    try:
        response = await prisma_client.db.litellm_teamtable.find_unique(
            where={"team_id": team_id}
        )
        if response is None:
            raise Exception
        return LiteLLM_TeamTable(**response.dict())
    except Exception as e:
        raise Exception(
            f"Team doesn't exist in db. Team={team_id}. Create team via `/team/new` call."
        )
--- a/litellm/proxy/auth/handle_jwt.py
+++ b/litellm/proxy/auth/handle_jwt.py
@ -6,50 +6,17 @@ Currently only supports admin.
 JWT token must have 'litellm_proxy_admin' in scope. 
 """
 import httpx
 import jwt
 from jwt.algorithms import RSAAlgorithm
 import json
 import os
 from litellm.caching import DualCache
-from litellm.proxy._types import LiteLLMProxyRoles, LiteLLM_UserTable
+from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import LiteLLM_JWTAuth, LiteLLM_UserTable
 from litellm.proxy.utils import PrismaClient
 from litellm.llms.custom_httpx.httpx_handler import HTTPHandler
 from typing import Optional
 class HTTPHandler:
    def __init__(self, concurrent_limit=1000):
        # Create a client with a connection pool
        self.client = httpx.AsyncClient(
            limits=httpx.Limits(
                max_connections=concurrent_limit,
                max_keepalive_connections=concurrent_limit,
            )
        )
    async def close(self):
        # Close the client when you're done with it
        await self.client.aclose()
    async def get(
        self, url: str, params: Optional[dict] = None, headers: Optional[dict] = None
    ):
        response = await self.client.get(url, params=params, headers=headers)
        return response
    async def post(
        self,
        url: str,
        data: Optional[dict] = None,
        params: Optional[dict] = None,
        headers: Optional[dict] = None,
    ):
        response = await self.client.post(
            url, data=data, params=params, headers=headers
        )
        return response
 class JWTHandler:
    """
    - treat the sub id passed in as the user id
@ -67,105 +34,131 @@ class JWTHandler:
        self.http_handler = HTTPHandler()
    def update_environment(
-        self, prisma_client: Optional[PrismaClient], user_api_key_cache: DualCache
+        self,
        prisma_client: Optional[PrismaClient],
        user_api_key_cache: DualCache,
        litellm_jwtauth: LiteLLM_JWTAuth,
    ) -> None:
        self.prisma_client = prisma_client
        self.user_api_key_cache = user_api_key_cache
        self.litellm_jwtauth = litellm_jwtauth
    def is_jwt(self, token: str):
        parts = token.split(".")
        return len(parts) == 3
    def is_admin(self, scopes: list) -> bool:
-        if LiteLLMProxyRoles.PROXY_ADMIN.value in scopes:
+        if self.litellm_jwtauth.admin_jwt_scope in scopes:
            return True
        return False
-    def get_user_id(self, token: dict, default_value: str) -> str:
+    def is_team(self, scopes: list) -> bool:
        if self.litellm_jwtauth.team_jwt_scope in scopes:
            return True
        return False
    def get_end_user_id(self, token: dict, default_value: Optional[str]) -> str:
        try:
-            user_id = token["sub"]
+            if self.litellm_jwtauth.end_user_id_jwt_field is not None:
                user_id = token[self.litellm_jwtauth.end_user_id_jwt_field]
            else:
                user_id = None
        except KeyError:
            user_id = default_value
        return user_id
    def get_team_id(self, token: dict, default_value: Optional[str]) -> Optional[str]:
        try:
-            team_id = token["azp"]
+            team_id = token[self.litellm_jwtauth.team_id_jwt_field]
        except KeyError:
            team_id = default_value
        return team_id
    async def get_user_object(self, user_id: str) -> LiteLLM_UserTable:
        """
        - Check if user id in proxy User Table
        - if valid, return LiteLLM_UserTable object with defined limits
        - if not, then raise an error
        """
        if self.prisma_client is None:
            raise Exception(
                "No DB Connected. See - https://docs.litellm.ai/docs/proxy/virtual_keys"
            )
        # check if in cache
        cached_user_obj = self.user_api_key_cache.async_get_cache(key=user_id)
        if cached_user_obj is not None:
            if isinstance(cached_user_obj, dict):
                return LiteLLM_UserTable(**cached_user_obj)
            elif isinstance(cached_user_obj, LiteLLM_UserTable):
                return cached_user_obj
        # else, check db
        try:
            response = await self.prisma_client.db.litellm_usertable.find_unique(
                where={"user_id": user_id}
            )
            if response is None:
                raise Exception
            return LiteLLM_UserTable(**response.dict())
        except Exception as e:
            raise Exception(
                f"User doesn't exist in db. User={user_id}. Create user via `/user/new` call."
            )
    def get_scopes(self, token: dict) -> list:
        try:
            if isinstance(token["scope"], str):
                # Assuming the scopes are stored in 'scope' claim and are space-separated
                scopes = token["scope"].split()
            elif isinstance(token["scope"], list):
                scopes = token["scope"]
            else:
                raise Exception(
                    f"Unmapped scope type - {type(token['scope'])}. Supported types - list, str."
                )
        except KeyError:
            scopes = []
        return scopes
-    async def auth_jwt(self, token: str) -> dict:
+    async def get_public_key(self, kid: Optional[str]) -> dict:
        keys_url = os.getenv("JWT_PUBLIC_KEY_URL")
        if keys_url is None:
            raise Exception("Missing JWT Public Key URL from environment.")
        cached_keys = await self.user_api_key_cache.async_get_cache(
            "litellm_jwt_auth_keys"
        )
        if cached_keys is None:
            response = await self.http_handler.get(keys_url)
            keys = response.json()["keys"]
-        header = jwt.get_unverified_header(token)
+            await self.user_api_key_cache.async_set_cache(
-        kid = header["kid"]
+                key="litellm_jwt_auth_keys",
                value=keys,
                ttl=self.litellm_jwtauth.public_key_ttl,  # cache for 10 mins
            )
        else:
            keys = cached_keys
        public_key: Optional[dict] = None
        if len(keys) == 1:
            if kid is None or key["kid"] == kid:
                public_key = keys[0]
        elif len(keys) > 1:
            for key in keys:
-            if key["kid"] == kid:
+                if kid is not None and key["kid"] == kid:
-                jwk = {
+                    public_key = key
-                    "kty": key["kty"],
+
-                    "kid": key["kid"],
+        if public_key is None:
-                    "n": key["n"],
+            raise Exception(
-                    "e": key["e"],
+                f"No matching public key found. kid={kid}, keys_url={keys_url}, cached_keys={cached_keys}"
-                }
+            )
-                public_key = RSAAlgorithm.from_jwk(json.dumps(jwk))
+
        return public_key
    async def auth_jwt(self, token: str) -> dict:
        from jwt.algorithms import RSAAlgorithm
        header = jwt.get_unverified_header(token)
        verbose_proxy_logger.debug("header: %s", header)
        kid = header.get("kid", None)
        public_key = await self.get_public_key(kid=kid)
        if public_key is not None and isinstance(public_key, dict):
            jwk = {}
            if "kty" in public_key:
                jwk["kty"] = public_key["kty"]
            if "kid" in public_key:
                jwk["kid"] = public_key["kid"]
            if "n" in public_key:
                jwk["n"] = public_key["n"]
            if "e" in public_key:
                jwk["e"] = public_key["e"]
            public_key_rsa = RSAAlgorithm.from_jwk(json.dumps(jwk))
            try:
                # decode the token using the public key
                payload = jwt.decode(
                    token,
-                        public_key,  # type: ignore
+                    public_key_rsa,  # type: ignore
                    algorithms=["RS256"],
-                        audience="account",
+                    options={"verify_aud": False},
                )
                return payload
--- a/Show more
+++ b/Show more
`@ -76,7 +76,6 @@ Click on your personal dashboard link. Here's how you can find it 👇`

	`Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider.`	`Oh! Looks like our request was made successfully. Let's click on it and see exactly what got sent to the LLM provider.`

	`<Image img={require('../../img/dashboard_log_row.png')} alt="Dashboard Log Row" />`
		`@ -1 +1 @@`
			!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o\|\|0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r\|\|"object"==typeof e&&e&&(4&r&&e.__esModule\|\|16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t\|\|[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/68a21c6e6697f7ca.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this\|\|Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e\|\|l.getAttribute("data-webpack")==o+n){i=l;break}}i\|\|(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children\|\|(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();				!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o\|\|0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r\|\|"object"==typeof e&&e&&(4&r&&e.__esModule\|\|16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t\|\|[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/f8da5a6a5b29d249.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this\|\|Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e\|\|l.getAttribute("data-webpack")==o+n){i=l;break}}i\|\|(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children\|\|(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E\|\|[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
		`@ -1 +1 @@`
			<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-3b0d290a8fe6941d.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[19914,[\"730\",\"static/chunks/730-1411b729a1c79695.js\",\"931\",\"static/chunks/app/page-df9015da04018cc1.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/68a21c6e6697f7ca.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"tXZFkeqtgh-goIRVbw_9q\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>				<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a507ee9e75a3be72.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-589b47e7a69d316f.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d1ad37b1875df240.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f\|\|[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[90177,[\"798\",\"static/chunks/798-4baed68da0c5497d.js\",\"931\",\"static/chunks/app/page-a5a04da2a9356785.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/f8da5a6a5b29d249.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"DptMjzo5xd96cx0b56k4u\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>