mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Merge branch 'BerriAI:main' into ollama-image-handling
This commit is contained in:
commit
c44970c813
102 changed files with 1394 additions and 393 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
@ -1,5 +1,6 @@
|
|||
.venv
|
||||
.env
|
||||
litellm/proxy/myenv/*
|
||||
litellm_uuid.txt
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
@ -52,3 +53,6 @@ litellm/proxy/_new_secret_config.yaml
|
|||
litellm/proxy/_new_secret_config.yaml
|
||||
litellm/proxy/_super_secret_config.yaml
|
||||
litellm/proxy/_super_secret_config.yaml
|
||||
litellm/proxy/myenv/bin/activate
|
||||
litellm/proxy/myenv/bin/Activate.ps1
|
||||
myenv/*
|
|
@ -16,11 +16,11 @@ repos:
|
|||
name: Check if files match
|
||||
entry: python3 ci_cd/check_files_match.py
|
||||
language: system
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: mypy
|
||||
entry: python3 -m mypy --ignore-missing-imports
|
||||
language: system
|
||||
types: [python]
|
||||
files: ^litellm/
|
||||
# - repo: local
|
||||
# hooks:
|
||||
# - id: mypy
|
||||
# name: mypy
|
||||
# entry: python3 -m mypy --ignore-missing-imports
|
||||
# language: system
|
||||
# types: [python]
|
||||
# files: ^litellm/
|
BIN
deploy/azure_resource_manager/azure_marketplace.zip
Normal file
BIN
deploy/azure_resource_manager/azure_marketplace.zip
Normal file
Binary file not shown.
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"$schema": "https://schema.management.azure.com/schemas/0.1.2-preview/CreateUIDefinition.MultiVm.json#",
|
||||
"handler": "Microsoft.Azure.CreateUIDef",
|
||||
"version": "0.1.2-preview",
|
||||
"parameters": {
|
||||
"config": {
|
||||
"isWizard": false,
|
||||
"basics": { }
|
||||
},
|
||||
"basics": [ ],
|
||||
"steps": [ ],
|
||||
"outputs": { },
|
||||
"resourceTypes": [ ]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,63 @@
|
|||
{
|
||||
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
|
||||
"contentVersion": "1.0.0.0",
|
||||
"parameters": {
|
||||
"imageName": {
|
||||
"type": "string",
|
||||
"defaultValue": "ghcr.io/berriai/litellm:main-latest"
|
||||
},
|
||||
"containerName": {
|
||||
"type": "string",
|
||||
"defaultValue": "litellm-container"
|
||||
},
|
||||
"dnsLabelName": {
|
||||
"type": "string",
|
||||
"defaultValue": "litellm"
|
||||
},
|
||||
"portNumber": {
|
||||
"type": "int",
|
||||
"defaultValue": 4000
|
||||
}
|
||||
},
|
||||
"resources": [
|
||||
{
|
||||
"type": "Microsoft.ContainerInstance/containerGroups",
|
||||
"apiVersion": "2021-03-01",
|
||||
"name": "[parameters('containerName')]",
|
||||
"location": "[resourceGroup().location]",
|
||||
"properties": {
|
||||
"containers": [
|
||||
{
|
||||
"name": "[parameters('containerName')]",
|
||||
"properties": {
|
||||
"image": "[parameters('imageName')]",
|
||||
"resources": {
|
||||
"requests": {
|
||||
"cpu": 1,
|
||||
"memoryInGB": 2
|
||||
}
|
||||
},
|
||||
"ports": [
|
||||
{
|
||||
"port": "[parameters('portNumber')]"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"osType": "Linux",
|
||||
"restartPolicy": "Always",
|
||||
"ipAddress": {
|
||||
"type": "Public",
|
||||
"ports": [
|
||||
{
|
||||
"protocol": "tcp",
|
||||
"port": "[parameters('portNumber')]"
|
||||
}
|
||||
],
|
||||
"dnsNameLabel": "[parameters('dnsLabelName')]"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
42
deploy/azure_resource_manager/main.bicep
Normal file
42
deploy/azure_resource_manager/main.bicep
Normal file
|
@ -0,0 +1,42 @@
|
|||
param imageName string = 'ghcr.io/berriai/litellm:main-latest'
|
||||
param containerName string = 'litellm-container'
|
||||
param dnsLabelName string = 'litellm'
|
||||
param portNumber int = 4000
|
||||
|
||||
resource containerGroupName 'Microsoft.ContainerInstance/containerGroups@2021-03-01' = {
|
||||
name: containerName
|
||||
location: resourceGroup().location
|
||||
properties: {
|
||||
containers: [
|
||||
{
|
||||
name: containerName
|
||||
properties: {
|
||||
image: imageName
|
||||
resources: {
|
||||
requests: {
|
||||
cpu: 1
|
||||
memoryInGB: 2
|
||||
}
|
||||
}
|
||||
ports: [
|
||||
{
|
||||
port: portNumber
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
osType: 'Linux'
|
||||
restartPolicy: 'Always'
|
||||
ipAddress: {
|
||||
type: 'Public'
|
||||
ports: [
|
||||
{
|
||||
protocol: 'tcp'
|
||||
port: portNumber
|
||||
}
|
||||
]
|
||||
dnsNameLabel: dnsLabelName
|
||||
}
|
||||
}
|
||||
}
|
|
@ -83,6 +83,7 @@ def completion(
|
|||
top_p: Optional[float] = None,
|
||||
n: Optional[int] = None,
|
||||
stream: Optional[bool] = None,
|
||||
stream_options: Optional[dict] = None,
|
||||
stop=None,
|
||||
max_tokens: Optional[int] = None,
|
||||
presence_penalty: Optional[float] = None,
|
||||
|
@ -139,6 +140,10 @@ def completion(
|
|||
|
||||
- `stream`: *boolean or null (optional)* - If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.
|
||||
|
||||
- `stream_options` *dict or null (optional)* - Options for streaming response. Only set this when you set `stream: true`
|
||||
|
||||
- `include_usage` *boolean (optional)* - If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.
|
||||
|
||||
- `stop`: *string/ array/ null (optional)* - Up to 4 sequences where the API will stop generating further tokens.
|
||||
|
||||
- `max_tokens`: *integer (optional)* - The maximum number of tokens to generate in the chat completion.
|
||||
|
|
|
@ -291,7 +291,7 @@ def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
|||
|
||||
|
||||
def _forecast_daily_cost(data: list):
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
if len(data) == 0:
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
|
||||
import os
|
||||
import inspect
|
||||
import redis, litellm
|
||||
import redis.asyncio as async_redis
|
||||
import redis, litellm # type: ignore
|
||||
import redis.asyncio as async_redis # type: ignore
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
import os, json, time
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse
|
||||
import requests, threading
|
||||
import requests, threading # type: ignore
|
||||
from typing import Optional, Union, Literal
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -4,18 +4,30 @@ import datetime
|
|||
class AthinaLogger:
|
||||
def __init__(self):
|
||||
import os
|
||||
|
||||
self.athina_api_key = os.getenv("ATHINA_API_KEY")
|
||||
self.headers = {
|
||||
"athina-api-key": self.athina_api_key,
|
||||
"Content-Type": "application/json"
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
|
||||
self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response", "user_query"]
|
||||
self.additional_keys = [
|
||||
"environment",
|
||||
"prompt_slug",
|
||||
"customer_id",
|
||||
"customer_user_id",
|
||||
"session_id",
|
||||
"external_reference_id",
|
||||
"context",
|
||||
"expected_response",
|
||||
"user_query",
|
||||
]
|
||||
|
||||
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import json
|
||||
import traceback
|
||||
|
||||
try:
|
||||
response_json = response_obj.model_dump() if response_obj else {}
|
||||
data = {
|
||||
|
@ -23,19 +35,30 @@ class AthinaLogger:
|
|||
"request": kwargs,
|
||||
"response": response_json,
|
||||
"prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"),
|
||||
"completion_tokens": response_json.get("usage", {}).get("completion_tokens"),
|
||||
"completion_tokens": response_json.get("usage", {}).get(
|
||||
"completion_tokens"
|
||||
),
|
||||
"total_tokens": response_json.get("usage", {}).get("total_tokens"),
|
||||
}
|
||||
|
||||
if type(end_time) == datetime.datetime and type(start_time) == datetime.datetime:
|
||||
data["response_time"] = int((end_time - start_time).total_seconds() * 1000)
|
||||
if (
|
||||
type(end_time) == datetime.datetime
|
||||
and type(start_time) == datetime.datetime
|
||||
):
|
||||
data["response_time"] = int(
|
||||
(end_time - start_time).total_seconds() * 1000
|
||||
)
|
||||
|
||||
if "messages" in kwargs:
|
||||
data["prompt"] = kwargs.get("messages", None)
|
||||
|
||||
# Directly add tools or functions if present
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
data.update((k, v) for k, v in optional_params.items() if k in ["tools", "functions"])
|
||||
data.update(
|
||||
(k, v)
|
||||
for k, v in optional_params.items()
|
||||
if k in ["tools", "functions"]
|
||||
)
|
||||
|
||||
# Add additional metadata keys
|
||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||
|
@ -44,11 +67,19 @@ class AthinaLogger:
|
|||
if key in metadata:
|
||||
data[key] = metadata[key]
|
||||
|
||||
response = requests.post(self.athina_logging_url, headers=self.headers, data=json.dumps(data, default=str))
|
||||
response = requests.post(
|
||||
self.athina_logging_url,
|
||||
headers=self.headers,
|
||||
data=json.dumps(data, default=str),
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print_verbose(f"Athina Logger Error - {response.text}, {response.status_code}")
|
||||
print_verbose(
|
||||
f"Athina Logger Error - {response.text}, {response.status_code}"
|
||||
)
|
||||
else:
|
||||
print_verbose(f"Athina Logger Succeeded - {response.text}")
|
||||
except Exception as e:
|
||||
print_verbose(f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}")
|
||||
print_verbose(
|
||||
f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}"
|
||||
)
|
||||
pass
|
|
@ -1,7 +1,7 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
import requests
|
||||
import requests # type: ignore
|
||||
import json
|
||||
import traceback
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
class GreenscaleLogger:
|
||||
def __init__(self):
|
||||
import os
|
||||
|
||||
self.greenscale_api_key = os.getenv("GREENSCALE_API_KEY")
|
||||
self.headers = {
|
||||
"api-key": self.greenscale_api_key,
|
||||
"Content-Type": "application/json"
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
self.greenscale_logging_url = os.getenv("GREENSCALE_ENDPOINT")
|
||||
|
||||
|
@ -19,13 +21,18 @@ class GreenscaleLogger:
|
|||
data = {
|
||||
"modelId": kwargs.get("model"),
|
||||
"inputTokenCount": response_json.get("usage", {}).get("prompt_tokens"),
|
||||
"outputTokenCount": response_json.get("usage", {}).get("completion_tokens"),
|
||||
"outputTokenCount": response_json.get("usage", {}).get(
|
||||
"completion_tokens"
|
||||
),
|
||||
}
|
||||
data["timestamp"] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
data["timestamp"] = datetime.now(timezone.utc).strftime(
|
||||
"%Y-%m-%dT%H:%M:%SZ"
|
||||
)
|
||||
|
||||
if type(end_time) == datetime and type(start_time) == datetime:
|
||||
data["invocationLatency"] = int((end_time - start_time).total_seconds() * 1000)
|
||||
|
||||
data["invocationLatency"] = int(
|
||||
(end_time - start_time).total_seconds() * 1000
|
||||
)
|
||||
|
||||
# Add additional metadata keys to tags
|
||||
tags = []
|
||||
|
@ -37,15 +44,25 @@ class GreenscaleLogger:
|
|||
elif key == "greenscale_application":
|
||||
data["application"] = value
|
||||
else:
|
||||
tags.append({"key": key.replace("greenscale_", ""), "value": str(value)})
|
||||
tags.append(
|
||||
{"key": key.replace("greenscale_", ""), "value": str(value)}
|
||||
)
|
||||
|
||||
data["tags"] = tags
|
||||
|
||||
response = requests.post(self.greenscale_logging_url, headers=self.headers, data=json.dumps(data, default=str))
|
||||
response = requests.post(
|
||||
self.greenscale_logging_url,
|
||||
headers=self.headers,
|
||||
data=json.dumps(data, default=str),
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print_verbose(f"Greenscale Logger Error - {response.text}, {response.status_code}")
|
||||
print_verbose(
|
||||
f"Greenscale Logger Error - {response.text}, {response.status_code}"
|
||||
)
|
||||
else:
|
||||
print_verbose(f"Greenscale Logger Succeeded - {response.text}")
|
||||
except Exception as e:
|
||||
print_verbose(f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}")
|
||||
print_verbose(
|
||||
f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}"
|
||||
)
|
||||
pass
|
|
@ -1,7 +1,7 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Helicone
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import litellm
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
|
|
|
@ -1,15 +1,14 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Langsmith
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests
|
||||
import dotenv, os # type: ignore
|
||||
import requests # type: ignore
|
||||
from datetime import datetime
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
import asyncio
|
||||
import types
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel # type: ignore
|
||||
|
||||
|
||||
def is_serializable(value):
|
||||
|
@ -79,8 +78,6 @@ class LangsmithLogger:
|
|||
except:
|
||||
response_obj = response_obj.dict() # type: ignore
|
||||
|
||||
print(f"response_obj: {response_obj}")
|
||||
|
||||
data = {
|
||||
"name": run_name,
|
||||
"run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
|
||||
|
@ -90,7 +87,6 @@ class LangsmithLogger:
|
|||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
}
|
||||
print(f"data: {data}")
|
||||
|
||||
response = requests.post(
|
||||
"https://api.smith.langchain.com/runs",
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268
|
||||
|
||||
import dotenv, os, json
|
||||
import requests
|
||||
import litellm
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
|
@ -60,7 +59,7 @@ class OpenMeterLogger(CustomLogger):
|
|||
"total_tokens": response_obj["usage"].get("total_tokens"),
|
||||
}
|
||||
|
||||
subject = kwargs.get("user", None), # end-user passed in via 'user' param
|
||||
subject = (kwargs.get("user", None),) # end-user passed in via 'user' param
|
||||
if not subject:
|
||||
raise Exception("OpenMeter: user is required")
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# On success, log events to Prometheus
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
@ -19,7 +19,6 @@ class PrometheusLogger:
|
|||
**kwargs,
|
||||
):
|
||||
try:
|
||||
print(f"in init prometheus metrics")
|
||||
from prometheus_client import Counter
|
||||
|
||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
@ -183,7 +183,6 @@ class PrometheusServicesLogger:
|
|||
)
|
||||
|
||||
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
|
||||
print(f"received error payload: {payload.error}")
|
||||
if self.mock_testing:
|
||||
self.mock_testing_failure_calls += 1
|
||||
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
from pydantic import BaseModel
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
||||
|
||||
class PromptLayerLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
|
@ -32,7 +33,11 @@ class PromptLayerLogger:
|
|||
tags = kwargs["litellm_params"]["metadata"]["pl_tags"]
|
||||
|
||||
# Remove "pl_tags" from metadata
|
||||
metadata = {k:v for k, v in kwargs["litellm_params"]["metadata"].items() if k != "pl_tags"}
|
||||
metadata = {
|
||||
k: v
|
||||
for k, v in kwargs["litellm_params"]["metadata"].items()
|
||||
if k != "pl_tags"
|
||||
}
|
||||
|
||||
print_verbose(
|
||||
f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import os, types, traceback
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import time, httpx
|
||||
import requests # type: ignore
|
||||
import time, httpx # type: ignore
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message
|
||||
import litellm
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
|
||||
|
||||
class AlephAlphaError(Exception):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests, copy
|
||||
import requests, copy # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||
|
@ -9,7 +9,7 @@ import litellm
|
|||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from .base import BaseLLM
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
|
||||
|
||||
class AnthropicConstants(Enum):
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Optional, Union, Any
|
||||
from typing import Optional, Union, Any, Literal
|
||||
import types, requests
|
||||
from .base import BaseLLM
|
||||
from litellm.utils import (
|
||||
|
@ -12,7 +12,7 @@ from litellm.utils import (
|
|||
from typing import Callable, Optional, BinaryIO
|
||||
from litellm import OpenAIConfig
|
||||
import litellm, json
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
|
||||
from openai import AzureOpenAI, AsyncAzureOpenAI
|
||||
import uuid
|
||||
|
@ -952,6 +952,81 @@ class AzureChatCompletion(BaseLLM):
|
|||
)
|
||||
raise e
|
||||
|
||||
def get_headers(
|
||||
self,
|
||||
model: Optional[str],
|
||||
api_key: str,
|
||||
api_base: str,
|
||||
api_version: str,
|
||||
timeout: float,
|
||||
mode: str,
|
||||
messages: Optional[list] = None,
|
||||
input: Optional[list] = None,
|
||||
prompt: Optional[str] = None,
|
||||
) -> dict:
|
||||
client_session = litellm.client_session or httpx.Client(
|
||||
transport=CustomHTTPTransport(), # handle dall-e-2 calls
|
||||
)
|
||||
if "gateway.ai.cloudflare.com" in api_base:
|
||||
## build base url - assume api base includes resource name
|
||||
if not api_base.endswith("/"):
|
||||
api_base += "/"
|
||||
api_base += f"{model}"
|
||||
client = AzureOpenAI(
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
http_client=client_session,
|
||||
)
|
||||
model = None
|
||||
# cloudflare ai gateway, needs model=None
|
||||
else:
|
||||
client = AzureOpenAI(
|
||||
api_version=api_version,
|
||||
azure_endpoint=api_base,
|
||||
api_key=api_key,
|
||||
timeout=timeout,
|
||||
http_client=client_session,
|
||||
)
|
||||
|
||||
# only run this check if it's not cloudflare ai gateway
|
||||
if model is None and mode != "image_generation":
|
||||
raise Exception("model is not set")
|
||||
|
||||
completion = None
|
||||
|
||||
if messages is None:
|
||||
messages = [{"role": "user", "content": "Hey"}]
|
||||
try:
|
||||
completion = client.chat.completions.with_raw_response.create(
|
||||
model=model, # type: ignore
|
||||
messages=messages, # type: ignore
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
response = {}
|
||||
|
||||
if completion is None or not hasattr(completion, "headers"):
|
||||
raise Exception("invalid completion response")
|
||||
|
||||
if (
|
||||
completion.headers.get("x-ratelimit-remaining-requests", None) is not None
|
||||
): # not provided for dall-e requests
|
||||
response["x-ratelimit-remaining-requests"] = completion.headers[
|
||||
"x-ratelimit-remaining-requests"
|
||||
]
|
||||
|
||||
if completion.headers.get("x-ratelimit-remaining-tokens", None) is not None:
|
||||
response["x-ratelimit-remaining-tokens"] = completion.headers[
|
||||
"x-ratelimit-remaining-tokens"
|
||||
]
|
||||
|
||||
if completion.headers.get("x-ms-region", None) is not None:
|
||||
response["x-ms-region"] = completion.headers["x-ms-region"]
|
||||
|
||||
return response
|
||||
|
||||
async def ahealth_check(
|
||||
self,
|
||||
model: Optional[str],
|
||||
|
@ -963,7 +1038,7 @@ class AzureChatCompletion(BaseLLM):
|
|||
messages: Optional[list] = None,
|
||||
input: Optional[list] = None,
|
||||
prompt: Optional[str] = None,
|
||||
):
|
||||
) -> dict:
|
||||
client_session = litellm.aclient_session or httpx.AsyncClient(
|
||||
transport=AsyncCustomHTTPTransport(), # handle dall-e-2 calls
|
||||
)
|
||||
|
@ -1040,4 +1115,8 @@ class AzureChatCompletion(BaseLLM):
|
|||
response["x-ratelimit-remaining-tokens"] = completion.headers[
|
||||
"x-ratelimit-remaining-tokens"
|
||||
]
|
||||
|
||||
if completion.headers.get("x-ms-region", None) is not None:
|
||||
response["x-ms-region"] = completion.headers["x-ms-region"]
|
||||
|
||||
return response
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Optional, Union, Any
|
||||
import types, requests
|
||||
import types, requests # type: ignore
|
||||
from .base import BaseLLM
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
|
|
|
@ -163,10 +163,9 @@ class AmazonAnthropicClaude3Config:
|
|||
"stop",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"extra_headers"
|
||||
"extra_headers",
|
||||
]
|
||||
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
|
@ -534,10 +533,12 @@ class AmazonStabilityConfig:
|
|||
|
||||
def add_custom_header(headers):
|
||||
"""Closure to capture the headers and add them."""
|
||||
|
||||
def callback(request, **kwargs):
|
||||
"""Actual callback function that Boto3 will call."""
|
||||
for header_name, header_value in headers.items():
|
||||
request.headers.add_header(header_name, header_value)
|
||||
|
||||
return callback
|
||||
|
||||
|
||||
|
@ -672,7 +673,9 @@ def init_bedrock_client(
|
|||
config=config,
|
||||
)
|
||||
if extra_headers:
|
||||
client.meta.events.register('before-sign.bedrock-runtime.*', add_custom_header(extra_headers))
|
||||
client.meta.events.register(
|
||||
"before-sign.bedrock-runtime.*", add_custom_header(extra_headers)
|
||||
)
|
||||
|
||||
return client
|
||||
|
||||
|
@ -1224,7 +1227,7 @@ def _embedding_func_single(
|
|||
"input_type", "search_document"
|
||||
) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
|
||||
data = {"texts": [input], **inference_params} # type: ignore
|
||||
body = json.dumps(data).encode("utf-8")
|
||||
body = json.dumps(data).encode("utf-8") # type: ignore
|
||||
## LOGGING
|
||||
request_str = f"""
|
||||
response = client.invoke_model(
|
||||
|
@ -1416,7 +1419,7 @@ def image_generation(
|
|||
## LOGGING
|
||||
request_str = f"""
|
||||
response = client.invoke_model(
|
||||
body={body},
|
||||
body={body}, # type: ignore
|
||||
modelId={modelId},
|
||||
accept="application/json",
|
||||
contentType="application/json",
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time, traceback
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
|
||||
|
||||
class CohereError(Exception):
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time, traceback
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from .prompt_templates.factory import cohere_message_pt
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time, traceback
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from itertools import chain
|
||||
import requests, types, time
|
||||
import requests, types, time # type: ignore
|
||||
import json, uuid
|
||||
import traceback
|
||||
from typing import Optional
|
||||
import litellm
|
||||
import httpx, aiohttp, asyncio
|
||||
import httpx, aiohttp, asyncio # type: ignore
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
||||
|
@ -245,7 +245,10 @@ def get_ollama_response(
|
|||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -257,7 +260,9 @@ def get_ollama_response(
|
|||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + model
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore
|
||||
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count", len(response_json.get("message", dict()).get("content", ""))
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -298,7 +303,10 @@ def ollama_completion_stream(url, data, logging_obj):
|
|||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -341,7 +349,8 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
|||
[
|
||||
chunk.choices[0].delta.content
|
||||
async for chunk in streamwrapper
|
||||
if chunk.choices[0].delta.content]
|
||||
if chunk.choices[0].delta.content
|
||||
]
|
||||
)
|
||||
function_call = json.loads(response_content)
|
||||
delta = litellm.utils.Delta(
|
||||
|
@ -349,7 +358,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
|||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -398,7 +410,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -412,7 +427,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + data["model"]
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore
|
||||
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count",
|
||||
len(response_json.get("message", dict()).get("content", "")),
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -500,6 +518,7 @@ async def ollama_aembeddings(
|
|||
}
|
||||
return model_response
|
||||
|
||||
|
||||
def ollama_embeddings(
|
||||
api_base: str,
|
||||
model: str,
|
||||
|
@ -517,5 +536,6 @@ def ollama_embeddings(
|
|||
optional_params,
|
||||
logging_obj,
|
||||
model_response,
|
||||
encoding)
|
||||
encoding,
|
||||
)
|
||||
)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
|
|
|
@ -22,7 +22,6 @@ from litellm.utils import (
|
|||
TextCompletionResponse,
|
||||
)
|
||||
from typing import Callable, Optional
|
||||
import aiohttp, requests
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from openai import OpenAI, AsyncOpenAI
|
||||
|
@ -531,6 +530,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
model=model,
|
||||
custom_llm_provider="openai",
|
||||
logging_obj=logging_obj,
|
||||
stream_options=data.get("stream_options", None),
|
||||
)
|
||||
return streamwrapper
|
||||
|
||||
|
@ -580,6 +580,7 @@ class OpenAIChatCompletion(BaseLLM):
|
|||
model=model,
|
||||
custom_llm_provider="openai",
|
||||
logging_obj=logging_obj,
|
||||
stream_options=data.get("stream_options", None),
|
||||
)
|
||||
return streamwrapper
|
||||
except (
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
|
|
|
@ -981,7 +981,7 @@ def anthropic_messages_pt(messages: list):
|
|||
# add role=tool support to allow function call result/error submission
|
||||
user_message_types = {"user", "tool", "function"}
|
||||
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
|
||||
new_messages = []
|
||||
new_messages: list = []
|
||||
msg_i = 0
|
||||
tool_use_param = False
|
||||
while msg_i < len(messages):
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import os, types
|
||||
import json
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
import os, types, traceback
|
||||
from enum import Enum
|
||||
import json
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, Any
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
||||
|
@ -295,7 +295,7 @@ def completion(
|
|||
EndpointName={model},
|
||||
InferenceComponentName={model_id},
|
||||
ContentType="application/json",
|
||||
Body={data},
|
||||
Body={data}, # type: ignore
|
||||
CustomAttributes="accept_eula=true",
|
||||
)
|
||||
""" # type: ignore
|
||||
|
@ -321,7 +321,7 @@ def completion(
|
|||
response = client.invoke_endpoint(
|
||||
EndpointName={model},
|
||||
ContentType="application/json",
|
||||
Body={data},
|
||||
Body={data}, # type: ignore
|
||||
CustomAttributes="accept_eula=true",
|
||||
)
|
||||
""" # type: ignore
|
||||
|
@ -688,7 +688,7 @@ def embedding(
|
|||
response = client.invoke_endpoint(
|
||||
EndpointName={model},
|
||||
ContentType="application/json",
|
||||
Body={data},
|
||||
Body={data}, # type: ignore
|
||||
CustomAttributes="accept_eula=true",
|
||||
)""" # type: ignore
|
||||
logging_obj.pre_call(
|
||||
|
|
|
@ -6,11 +6,11 @@ Reference: https://docs.together.ai/docs/openai-api-compatibility
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, Union, List
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
||||
import litellm, uuid
|
||||
import httpx, inspect
|
||||
import httpx, inspect # type: ignore
|
||||
|
||||
|
||||
class VertexAIError(Exception):
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests, copy
|
||||
import requests, copy # type: ignore
|
||||
import time, uuid
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||
|
@ -17,7 +17,7 @@ from .prompt_templates.factory import (
|
|||
extract_between_tags,
|
||||
parse_xml_params,
|
||||
)
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
|
||||
|
||||
class VertexAIError(Exception):
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import os
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import time, httpx
|
||||
import requests # type: ignore
|
||||
import time, httpx # type: ignore
|
||||
from typing import Callable, Any
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
|
|
@ -3,8 +3,8 @@ import json, types, time # noqa: E401
|
|||
from contextlib import contextmanager
|
||||
from typing import Callable, Dict, Optional, Any, Union, List
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, get_secret, Usage
|
||||
|
||||
|
|
|
@ -187,6 +187,7 @@ async def acompletion(
|
|||
top_p: Optional[float] = None,
|
||||
n: Optional[int] = None,
|
||||
stream: Optional[bool] = None,
|
||||
stream_options: Optional[dict] = None,
|
||||
stop=None,
|
||||
max_tokens: Optional[int] = None,
|
||||
presence_penalty: Optional[float] = None,
|
||||
|
@ -206,6 +207,7 @@ async def acompletion(
|
|||
api_version: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
|
||||
extra_headers: Optional[dict] = None,
|
||||
# Optional liteLLM function params
|
||||
**kwargs,
|
||||
):
|
||||
|
@ -223,6 +225,7 @@ async def acompletion(
|
|||
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
|
||||
n (int, optional): The number of completions to generate (default is 1).
|
||||
stream (bool, optional): If True, return a streaming response (default is False).
|
||||
stream_options (dict, optional): A dictionary containing options for the streaming response. Only use this if stream is True.
|
||||
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
|
||||
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
|
||||
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
|
||||
|
@ -260,6 +263,7 @@ async def acompletion(
|
|||
"top_p": top_p,
|
||||
"n": n,
|
||||
"stream": stream,
|
||||
"stream_options": stream_options,
|
||||
"stop": stop,
|
||||
"max_tokens": max_tokens,
|
||||
"presence_penalty": presence_penalty,
|
||||
|
@ -457,6 +461,7 @@ def completion(
|
|||
top_p: Optional[float] = None,
|
||||
n: Optional[int] = None,
|
||||
stream: Optional[bool] = None,
|
||||
stream_options: Optional[dict] = None,
|
||||
stop=None,
|
||||
max_tokens: Optional[int] = None,
|
||||
presence_penalty: Optional[float] = None,
|
||||
|
@ -496,6 +501,7 @@ def completion(
|
|||
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
|
||||
n (int, optional): The number of completions to generate (default is 1).
|
||||
stream (bool, optional): If True, return a streaming response (default is False).
|
||||
stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true.
|
||||
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
|
||||
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
|
||||
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
|
||||
|
@ -573,6 +579,7 @@ def completion(
|
|||
"top_p",
|
||||
"n",
|
||||
"stream",
|
||||
"stream_options",
|
||||
"stop",
|
||||
"max_tokens",
|
||||
"presence_penalty",
|
||||
|
@ -648,6 +655,8 @@ def completion(
|
|||
"base_model",
|
||||
"stream_timeout",
|
||||
"supports_system_message",
|
||||
"region_name",
|
||||
"allowed_model_region",
|
||||
]
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {
|
||||
|
@ -783,6 +792,7 @@ def completion(
|
|||
top_p=top_p,
|
||||
n=n,
|
||||
stream=stream,
|
||||
stream_options=stream_options,
|
||||
stop=stop,
|
||||
max_tokens=max_tokens,
|
||||
presence_penalty=presence_penalty,
|
||||
|
@ -2716,6 +2726,8 @@ def embedding(
|
|||
"ttl",
|
||||
"cache",
|
||||
"no-log",
|
||||
"region_name",
|
||||
"allowed_model_region",
|
||||
]
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {
|
||||
|
@ -3589,6 +3601,8 @@ def image_generation(
|
|||
"caching_groups",
|
||||
"ttl",
|
||||
"cache",
|
||||
"region_name",
|
||||
"allowed_model_region",
|
||||
]
|
||||
default_params = openai_params + litellm_params
|
||||
non_default_params = {
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
|
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/7de0c97d470f519f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""]
|
||||
3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -458,6 +458,27 @@ class UpdateUserRequest(GenerateRequestBase):
|
|||
return values
|
||||
|
||||
|
||||
class NewEndUserRequest(LiteLLMBase):
|
||||
user_id: str
|
||||
alias: Optional[str] = None # human-friendly alias
|
||||
blocked: bool = False # allow/disallow requests for this end-user
|
||||
max_budget: Optional[float] = None
|
||||
budget_id: Optional[str] = None # give either a budget_id or max_budget
|
||||
allowed_model_region: Optional[Literal["eu"]] = (
|
||||
None # require all user requests to use models in this specific region
|
||||
)
|
||||
default_model: Optional[str] = (
|
||||
None # if no equivalent model in allowed region - default all requests to this model
|
||||
)
|
||||
|
||||
@root_validator(pre=True)
|
||||
def check_user_info(cls, values):
|
||||
if values.get("max_budget") is not None and values.get("budget_id") is not None:
|
||||
raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
|
||||
|
||||
return values
|
||||
|
||||
|
||||
class Member(LiteLLMBase):
|
||||
role: Literal["admin", "user"]
|
||||
user_id: Optional[str] = None
|
||||
|
@ -494,6 +515,8 @@ class NewTeamRequest(TeamBase):
|
|||
|
||||
class GlobalEndUsersSpend(LiteLLMBase):
|
||||
api_key: Optional[str] = None
|
||||
startTime: Optional[datetime] = None
|
||||
endTime: Optional[datetime] = None
|
||||
|
||||
|
||||
class TeamMemberAddRequest(LiteLLMBase):
|
||||
|
@ -836,6 +859,7 @@ class UserAPIKeyAuth(
|
|||
|
||||
api_key: Optional[str] = None
|
||||
user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
|
||||
allowed_model_region: Optional[Literal["eu"]] = None
|
||||
|
||||
@root_validator(pre=True)
|
||||
def check_api_key(cls, values):
|
||||
|
@ -881,6 +905,8 @@ class LiteLLM_EndUserTable(LiteLLMBase):
|
|||
blocked: bool
|
||||
alias: Optional[str] = None
|
||||
spend: float = 0.0
|
||||
allowed_model_region: Optional[Literal["eu"]] = None
|
||||
default_model: Optional[str] = None
|
||||
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
|
||||
|
||||
@root_validator(pre=True)
|
||||
|
|
|
@ -208,7 +208,9 @@ async def get_end_user_object(
|
|||
return None
|
||||
|
||||
# check if in cache
|
||||
cached_user_obj = user_api_key_cache.async_get_cache(key=end_user_id)
|
||||
cached_user_obj = user_api_key_cache.async_get_cache(
|
||||
key="end_user_id:{}".format(end_user_id)
|
||||
)
|
||||
if cached_user_obj is not None:
|
||||
if isinstance(cached_user_obj, dict):
|
||||
return LiteLLM_EndUserTable(**cached_user_obj)
|
||||
|
@ -223,7 +225,14 @@ async def get_end_user_object(
|
|||
if response is None:
|
||||
raise Exception
|
||||
|
||||
return LiteLLM_EndUserTable(**response.dict())
|
||||
# save the end-user object to cache
|
||||
await user_api_key_cache.async_set_cache(
|
||||
key="end_user_id:{}".format(end_user_id), value=response
|
||||
)
|
||||
|
||||
_response = LiteLLM_EndUserTable(**response.dict())
|
||||
|
||||
return _response
|
||||
except Exception as e: # if end-user not in db
|
||||
return None
|
||||
|
||||
|
|
|
@ -252,7 +252,7 @@ def run_server(
|
|||
if model and "ollama" in model and api_base is None:
|
||||
run_ollama_serve()
|
||||
if test_async is True:
|
||||
import requests, concurrent, time
|
||||
import requests, concurrent, time # type: ignore
|
||||
|
||||
api_base = f"http://{host}:{port}"
|
||||
|
||||
|
@ -418,7 +418,7 @@ def run_server(
|
|||
read from there and save it to os.env['DATABASE_URL']
|
||||
"""
|
||||
try:
|
||||
import yaml, asyncio
|
||||
import yaml, asyncio # type: ignore
|
||||
except:
|
||||
raise ImportError(
|
||||
"yaml needs to be imported. Run - `pip install 'litellm[proxy]'`"
|
||||
|
|
|
@ -30,7 +30,7 @@ sys.path.insert(
|
|||
try:
|
||||
import fastapi
|
||||
import backoff
|
||||
import yaml
|
||||
import yaml # type: ignore
|
||||
import orjson
|
||||
import logging
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
|
@ -231,6 +231,11 @@ class SpecialModelNames(enum.Enum):
|
|||
all_team_models = "all-team-models"
|
||||
|
||||
|
||||
class CommonProxyErrors(enum.Enum):
|
||||
db_not_connected_error = "DB not connected"
|
||||
no_llm_router = "No models configured on proxy"
|
||||
|
||||
|
||||
@app.exception_handler(ProxyException)
|
||||
async def openai_exception_handler(request: Request, exc: ProxyException):
|
||||
# NOTE: DO NOT MODIFY THIS, its crucial to map to Openai exceptions
|
||||
|
@ -467,10 +472,6 @@ async def user_api_key_auth(
|
|||
prisma_client=prisma_client,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
)
|
||||
# save the end-user object to cache
|
||||
await user_api_key_cache.async_set_cache(
|
||||
key=end_user_id, value=end_user_object
|
||||
)
|
||||
|
||||
global_proxy_spend = None
|
||||
if litellm.max_budget > 0: # user set proxy max budget
|
||||
|
@ -952,13 +953,16 @@ async def user_api_key_auth(
|
|||
|
||||
_end_user_object = None
|
||||
if "user" in request_data:
|
||||
_id = "end_user_id:{}".format(request_data["user"])
|
||||
_end_user_object = await user_api_key_cache.async_get_cache(key=_id)
|
||||
if _end_user_object is not None:
|
||||
_end_user_object = LiteLLM_EndUserTable(**_end_user_object)
|
||||
_end_user_object = await get_end_user_object(
|
||||
end_user_id=request_data["user"],
|
||||
prisma_client=prisma_client,
|
||||
user_api_key_cache=user_api_key_cache,
|
||||
)
|
||||
|
||||
global_proxy_spend = None
|
||||
if litellm.max_budget > 0: # user set proxy max budget
|
||||
if (
|
||||
litellm.max_budget > 0 and prisma_client is not None
|
||||
): # user set proxy max budget
|
||||
# check cache
|
||||
global_proxy_spend = await user_api_key_cache.async_get_cache(
|
||||
key="{}:spend".format(litellm_proxy_admin_name)
|
||||
|
@ -1011,6 +1015,12 @@ async def user_api_key_auth(
|
|||
)
|
||||
valid_token_dict = _get_pydantic_json_dict(valid_token)
|
||||
valid_token_dict.pop("token", None)
|
||||
|
||||
if _end_user_object is not None:
|
||||
valid_token_dict["allowed_model_region"] = (
|
||||
_end_user_object.allowed_model_region
|
||||
)
|
||||
|
||||
"""
|
||||
asyncio create task to update the user api key cache with the user db table as well
|
||||
|
||||
|
@ -1035,10 +1045,7 @@ async def user_api_key_auth(
|
|||
# check if user can access this route
|
||||
query_params = request.query_params
|
||||
key = query_params.get("key")
|
||||
if (
|
||||
key is not None
|
||||
and prisma_client.hash_token(token=key) != api_key
|
||||
):
|
||||
if key is not None and hash_token(token=key) != api_key:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="user not allowed to access this key's info",
|
||||
|
@ -1091,6 +1098,7 @@ async def user_api_key_auth(
|
|||
# sso/login, ui/login, /key functions and /user functions
|
||||
# this will never be allowed to call /chat/completions
|
||||
token_team = getattr(valid_token, "team_id", None)
|
||||
|
||||
if token_team is not None and token_team == "litellm-dashboard":
|
||||
# this token is only used for managing the ui
|
||||
allowed_routes = [
|
||||
|
@ -3612,6 +3620,10 @@ async def chat_completion(
|
|||
**data,
|
||||
} # add the team-specific configs to the completion call
|
||||
|
||||
### END-USER SPECIFIC PARAMS ###
|
||||
if user_api_key_dict.allowed_model_region is not None:
|
||||
data["allowed_model_region"] = user_api_key_dict.allowed_model_region
|
||||
|
||||
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
||||
# override with user settings, these are params passed via cli
|
||||
if user_temperature:
|
||||
|
@ -3719,6 +3731,7 @@ async def chat_completion(
|
|||
"x-litellm-model-id": model_id,
|
||||
"x-litellm-cache-key": cache_key,
|
||||
"x-litellm-model-api-base": api_base,
|
||||
"x-litellm-version": version,
|
||||
}
|
||||
selected_data_generator = select_data_generator(
|
||||
response=response,
|
||||
|
@ -3734,6 +3747,7 @@ async def chat_completion(
|
|||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
### CALL HOOKS ### - modify outgoing data
|
||||
response = await proxy_logging_obj.post_call_success_hook(
|
||||
|
@ -3890,14 +3904,10 @@ async def completion(
|
|||
},
|
||||
)
|
||||
|
||||
if hasattr(response, "_hidden_params"):
|
||||
model_id = response._hidden_params.get("model_id", None) or ""
|
||||
original_response = (
|
||||
response._hidden_params.get("original_response", None) or ""
|
||||
)
|
||||
else:
|
||||
model_id = ""
|
||||
original_response = ""
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
verbose_proxy_logger.debug("final response: %s", response)
|
||||
if (
|
||||
|
@ -3905,6 +3915,9 @@ async def completion(
|
|||
): # use generate_responses to stream responses
|
||||
custom_headers = {
|
||||
"x-litellm-model-id": model_id,
|
||||
"x-litellm-cache-key": cache_key,
|
||||
"x-litellm-model-api-base": api_base,
|
||||
"x-litellm-version": version,
|
||||
}
|
||||
selected_data_generator = select_data_generator(
|
||||
response=response,
|
||||
|
@ -3919,6 +3932,10 @@ async def completion(
|
|||
)
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -3958,6 +3975,7 @@ async def completion(
|
|||
) # azure compatible endpoint
|
||||
async def embeddings(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
model: Optional[str] = None,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
|
@ -4104,6 +4122,17 @@ async def embeddings(
|
|||
### ALERTING ###
|
||||
data["litellm_status"] = "success" # used for alerting
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -4142,6 +4171,7 @@ async def embeddings(
|
|||
)
|
||||
async def image_generation(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
global proxy_logging_obj
|
||||
|
@ -4261,6 +4291,17 @@ async def image_generation(
|
|||
### ALERTING ###
|
||||
data["litellm_status"] = "success" # used for alerting
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -4297,6 +4338,7 @@ async def image_generation(
|
|||
)
|
||||
async def audio_transcriptions(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
file: UploadFile = File(...),
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
|
@ -4441,6 +4483,18 @@ async def audio_transcriptions(
|
|||
|
||||
### ALERTING ###
|
||||
data["litellm_status"] = "success" # used for alerting
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -4480,6 +4534,7 @@ async def audio_transcriptions(
|
|||
)
|
||||
async def moderations(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
|
@ -4604,6 +4659,17 @@ async def moderations(
|
|||
### ALERTING ###
|
||||
data["litellm_status"] = "success" # used for alerting
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -5809,35 +5875,38 @@ async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
|
|||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
if data is None:
|
||||
sql_query = f"""SELECT * FROM "Last30dTopEndUsersSpend";"""
|
||||
|
||||
response = await prisma_client.db.query_raw(query=sql_query)
|
||||
else:
|
||||
"""
|
||||
Gets the top 100 end-users for a given api key
|
||||
"""
|
||||
current_date = datetime.now()
|
||||
past_date = current_date - timedelta(days=30)
|
||||
response = await prisma_client.db.litellm_spendlogs.group_by( # type: ignore
|
||||
by=["end_user"],
|
||||
where={
|
||||
"AND": [{"startTime": {"gte": past_date}}, {"api_key": data.api_key}] # type: ignore
|
||||
},
|
||||
sum={"spend": True},
|
||||
order={"_sum": {"spend": "desc"}}, # type: ignore
|
||||
take=100,
|
||||
count=True,
|
||||
startTime = None
|
||||
endTime = None
|
||||
selected_api_key = None
|
||||
if data is not None:
|
||||
startTime = data.startTime
|
||||
endTime = data.endTime
|
||||
selected_api_key = data.api_key
|
||||
|
||||
startTime = startTime or datetime.now() - timedelta(days=30)
|
||||
endTime = endTime or datetime.now()
|
||||
|
||||
sql_query = """
|
||||
SELECT end_user, COUNT(*) AS total_count, SUM(spend) AS total_spend
|
||||
FROM "LiteLLM_SpendLogs"
|
||||
WHERE "startTime" >= $1::timestamp
|
||||
AND "startTime" < $2::timestamp
|
||||
AND (
|
||||
CASE
|
||||
WHEN $3::TEXT IS NULL THEN TRUE
|
||||
ELSE api_key = $3
|
||||
END
|
||||
)
|
||||
GROUP BY end_user
|
||||
ORDER BY total_spend DESC
|
||||
LIMIT 100
|
||||
"""
|
||||
response = await prisma_client.db.query_raw(
|
||||
sql_query, startTime, endTime, selected_api_key
|
||||
)
|
||||
if response is not None and isinstance(response, list):
|
||||
new_response = []
|
||||
for r in response:
|
||||
new_r = r
|
||||
new_r["total_spend"] = r["_sum"]["spend"]
|
||||
new_r["total_count"] = r["_count"]["_all"]
|
||||
new_r.pop("_sum")
|
||||
new_r.pop("_count")
|
||||
new_response.append(new_r)
|
||||
|
||||
return response
|
||||
|
||||
|
@ -5883,7 +5952,7 @@ async def global_predict_spend_logs(request: Request):
|
|||
return _forecast_daily_cost(data)
|
||||
|
||||
|
||||
#### USER MANAGEMENT ####
|
||||
#### INTERNAL USER MANAGEMENT ####
|
||||
@router.post(
|
||||
"/user/new",
|
||||
tags=["user management"],
|
||||
|
@ -6376,6 +6445,43 @@ async def user_get_requests():
|
|||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/user/get_users",
|
||||
tags=["user management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def get_users(
|
||||
role: str = fastapi.Query(
|
||||
default=None,
|
||||
description="Either 'proxy_admin', 'proxy_viewer', 'app_owner', 'app_user'",
|
||||
)
|
||||
):
|
||||
"""
|
||||
[BETA] This could change without notice. Give feedback - https://github.com/BerriAI/litellm/issues
|
||||
|
||||
Get all users who are a specific `user_role`.
|
||||
|
||||
Used by the UI to populate the user lists.
|
||||
|
||||
Currently - admin-only endpoint.
|
||||
"""
|
||||
global prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={"error": f"No db connected. prisma client={prisma_client}"},
|
||||
)
|
||||
all_users = await prisma_client.get_data(
|
||||
table_name="user", query_type="find_all", key_val={"user_role": role}
|
||||
)
|
||||
|
||||
return all_users
|
||||
|
||||
|
||||
#### END-USER MANAGEMENT ####
|
||||
|
||||
|
||||
@router.post(
|
||||
"/end_user/block",
|
||||
tags=["End User Management"],
|
||||
|
@ -6466,38 +6572,140 @@ async def unblock_user(data: BlockUsers):
|
|||
return {"blocked_users": litellm.blocked_user_list}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/user/get_users",
|
||||
tags=["user management"],
|
||||
@router.post(
|
||||
"/end_user/new",
|
||||
tags=["End User Management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def get_users(
|
||||
role: str = fastapi.Query(
|
||||
default=None,
|
||||
description="Either 'proxy_admin', 'proxy_viewer', 'app_owner', 'app_user'",
|
||||
)
|
||||
async def new_end_user(
|
||||
data: NewEndUserRequest,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
[BETA] This could change without notice. Give feedback - https://github.com/BerriAI/litellm/issues
|
||||
[TODO] Needs to be implemented.
|
||||
|
||||
Get all users who are a specific `user_role`.
|
||||
Allow creating a new end-user
|
||||
|
||||
Used by the UI to populate the user lists.
|
||||
- Allow specifying allowed regions
|
||||
- Allow specifying default model
|
||||
|
||||
Currently - admin-only endpoint.
|
||||
Example curl:
|
||||
```
|
||||
curl --location 'http://0.0.0.0:4000/end_user/new' \
|
||||
--header 'Authorization: Bearer sk-1234' \
|
||||
--header 'Content-Type: application/json' \
|
||||
--data '{
|
||||
"end_user_id" : "ishaan-jaff-3", <- specific customer
|
||||
|
||||
"allowed_region": "eu" <- set region for models
|
||||
|
||||
+
|
||||
|
||||
"default_model": "azure/gpt-3.5-turbo-eu" <- all calls from this user, use this model?
|
||||
|
||||
}'
|
||||
|
||||
# return end-user object
|
||||
```
|
||||
"""
|
||||
global prisma_client
|
||||
global prisma_client, llm_router
|
||||
"""
|
||||
Validation:
|
||||
- check if default model exists
|
||||
- create budget object if not already created
|
||||
|
||||
- Add user to end user table
|
||||
|
||||
Return
|
||||
- end-user object
|
||||
- currently allowed models
|
||||
"""
|
||||
if prisma_client is None:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail={"error": f"No db connected. prisma client={prisma_client}"},
|
||||
)
|
||||
all_users = await prisma_client.get_data(
|
||||
table_name="user", query_type="find_all", key_val={"user_role": role}
|
||||
detail={"error": CommonProxyErrors.db_not_connected_error.value},
|
||||
)
|
||||
|
||||
return all_users
|
||||
## VALIDATION ##
|
||||
if data.default_model is not None:
|
||||
if llm_router is None:
|
||||
raise HTTPException(
|
||||
status_code=422, detail={"error": CommonProxyErrors.no_llm_router.value}
|
||||
)
|
||||
elif data.default_model not in llm_router.get_model_names():
|
||||
raise HTTPException(
|
||||
status_code=422,
|
||||
detail={
|
||||
"error": "Default Model not on proxy. Configure via `/model/new` or config.yaml. Default_model={}, proxy_model_names={}".format(
|
||||
data.default_model, set(llm_router.get_model_names())
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
new_end_user_obj: Dict = {}
|
||||
|
||||
## CREATE BUDGET ## if set
|
||||
if data.max_budget is not None:
|
||||
budget_record = await prisma_client.db.litellm_budgettable.create(
|
||||
data={
|
||||
"max_budget": data.max_budget,
|
||||
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name, # type: ignore
|
||||
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
|
||||
}
|
||||
)
|
||||
|
||||
new_end_user_obj["budget_id"] = budget_record.budget_id
|
||||
elif data.budget_id is not None:
|
||||
new_end_user_obj["budget_id"] = data.budget_id
|
||||
|
||||
_user_data = data.dict(exclude_none=True)
|
||||
|
||||
for k, v in _user_data.items():
|
||||
if k != "max_budget" and k != "budget_id":
|
||||
new_end_user_obj[k] = v
|
||||
|
||||
## WRITE TO DB ##
|
||||
end_user_record = await prisma_client.db.litellm_endusertable.create(
|
||||
data=new_end_user_obj # type: ignore
|
||||
)
|
||||
|
||||
return end_user_record
|
||||
|
||||
|
||||
@router.post(
|
||||
"/end_user/info",
|
||||
tags=["End User Management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def end_user_info():
|
||||
"""
|
||||
[TODO] Needs to be implemented.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@router.post(
|
||||
"/end_user/update",
|
||||
tags=["End User Management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def update_end_user():
|
||||
"""
|
||||
[TODO] Needs to be implemented.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@router.post(
|
||||
"/end_user/delete",
|
||||
tags=["End User Management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def delete_end_user():
|
||||
"""
|
||||
[TODO] Needs to be implemented.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
#### TEAM MANAGEMENT ####
|
||||
|
|
|
@ -150,6 +150,8 @@ model LiteLLM_EndUserTable {
|
|||
user_id String @id
|
||||
alias String? // admin-facing alias
|
||||
spend Float @default(0.0)
|
||||
allowed_model_region String? // require all user requests to use models in this specific region
|
||||
default_model String? // use along with 'allowed_model_region'. if no available model in region, default to this model.
|
||||
budget_id String?
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
blocked Boolean @default(false)
|
||||
|
|
|
@ -526,7 +526,7 @@ class PrismaClient:
|
|||
finally:
|
||||
os.chdir(original_dir)
|
||||
# Now you can import the Prisma Client
|
||||
from prisma import Prisma # type: ignore
|
||||
from prisma import Prisma
|
||||
|
||||
self.db = Prisma() # Client to connect to Prisma db
|
||||
|
||||
|
@ -1689,12 +1689,12 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
|
|||
module_file_path = os.path.join(directory, *module_name.split("."))
|
||||
module_file_path += ".py"
|
||||
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_file_path)
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_file_path) # type: ignore
|
||||
if spec is None:
|
||||
raise ImportError(
|
||||
f"Could not find a module specification for {module_file_path}"
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
module = importlib.util.module_from_spec(spec) # type: ignore
|
||||
spec.loader.exec_module(module) # type: ignore
|
||||
else:
|
||||
# Dynamically import the module
|
||||
|
|
|
@ -32,6 +32,7 @@ from litellm.utils import (
|
|||
CustomStreamWrapper,
|
||||
get_utc_datetime,
|
||||
calculate_max_parallel_requests,
|
||||
_is_region_eu,
|
||||
)
|
||||
import copy
|
||||
from litellm._logging import verbose_router_logger
|
||||
|
@ -1999,7 +2000,11 @@ class Router:
|
|||
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
|
||||
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
|
||||
api_key = litellm_params.get("api_key") or default_api_key
|
||||
if api_key and api_key.startswith("os.environ/"):
|
||||
if (
|
||||
api_key
|
||||
and isinstance(api_key, str)
|
||||
and api_key.startswith("os.environ/")
|
||||
):
|
||||
api_key_env_name = api_key.replace("os.environ/", "")
|
||||
api_key = litellm.get_secret(api_key_env_name)
|
||||
litellm_params["api_key"] = api_key
|
||||
|
@ -2023,6 +2028,7 @@ class Router:
|
|||
if (
|
||||
is_azure_ai_studio_model == True
|
||||
and api_base is not None
|
||||
and isinstance(api_base, str)
|
||||
and not api_base.endswith("/v1/")
|
||||
):
|
||||
# check if it ends with a trailing slash
|
||||
|
@ -2103,13 +2109,14 @@ class Router:
|
|||
organization = litellm.get_secret(organization_env_name)
|
||||
litellm_params["organization"] = organization
|
||||
|
||||
if "azure" in model_name:
|
||||
if api_base is None:
|
||||
if "azure" in model_name and isinstance(api_key, str):
|
||||
if api_base is None or not isinstance(api_base, str):
|
||||
raise ValueError(
|
||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {model}"
|
||||
)
|
||||
if api_version is None:
|
||||
api_version = "2023-07-01-preview"
|
||||
|
||||
if "gateway.ai.cloudflare.com" in api_base:
|
||||
if not api_base.endswith("/"):
|
||||
api_base += "/"
|
||||
|
@ -2532,7 +2539,7 @@ class Router:
|
|||
self.default_deployment = deployment.to_json(exclude_none=True)
|
||||
|
||||
# Azure GPT-Vision Enhancements, users can pass os.environ/
|
||||
data_sources = deployment.litellm_params.get("dataSources", [])
|
||||
data_sources = deployment.litellm_params.get("dataSources", []) or []
|
||||
|
||||
for data_source in data_sources:
|
||||
params = data_source.get("parameters", {})
|
||||
|
@ -2549,6 +2556,22 @@ class Router:
|
|||
# init OpenAI, Azure clients
|
||||
self.set_client(model=deployment.to_json(exclude_none=True))
|
||||
|
||||
# set region (if azure model)
|
||||
try:
|
||||
if "azure" in deployment.litellm_params.model:
|
||||
region = litellm.utils.get_model_region(
|
||||
litellm_params=deployment.litellm_params, mode=None
|
||||
)
|
||||
|
||||
deployment.litellm_params.region_name = region
|
||||
except Exception as e:
|
||||
verbose_router_logger.error(
|
||||
"Unable to get the region for azure model - {}, {}".format(
|
||||
deployment.litellm_params.model, str(e)
|
||||
)
|
||||
)
|
||||
pass # [NON-BLOCKING]
|
||||
|
||||
return deployment
|
||||
|
||||
def add_deployment(self, deployment: Deployment) -> Optional[Deployment]:
|
||||
|
@ -2820,14 +2843,17 @@ class Router:
|
|||
model: str,
|
||||
healthy_deployments: List,
|
||||
messages: List[Dict[str, str]],
|
||||
allowed_model_region: Optional[Literal["eu"]] = None,
|
||||
):
|
||||
"""
|
||||
Filter out model in model group, if:
|
||||
|
||||
- model context window < message length
|
||||
- filter models above rpm limits
|
||||
- if region given, filter out models not in that region / unknown region
|
||||
- [TODO] function call and model doesn't support function calling
|
||||
"""
|
||||
|
||||
verbose_router_logger.debug(
|
||||
f"Starting Pre-call checks for deployments in model={model}"
|
||||
)
|
||||
|
@ -2878,9 +2904,9 @@ class Router:
|
|||
except Exception as e:
|
||||
verbose_router_logger.debug("An error occurs - {}".format(str(e)))
|
||||
|
||||
## RPM CHECK ##
|
||||
_litellm_params = deployment.get("litellm_params", {})
|
||||
model_id = deployment.get("model_info", {}).get("id", "")
|
||||
## RPM CHECK ##
|
||||
### get local router cache ###
|
||||
current_request_cache_local = (
|
||||
self.cache.get_cache(key=model_id, local_only=True) or 0
|
||||
|
@ -2908,6 +2934,28 @@ class Router:
|
|||
_rate_limit_error = True
|
||||
continue
|
||||
|
||||
## REGION CHECK ##
|
||||
if allowed_model_region is not None:
|
||||
if _litellm_params.get("region_name") is not None and isinstance(
|
||||
_litellm_params["region_name"], str
|
||||
):
|
||||
# check if in allowed_model_region
|
||||
if (
|
||||
_is_region_eu(model_region=_litellm_params["region_name"])
|
||||
== False
|
||||
):
|
||||
invalid_model_indices.append(idx)
|
||||
continue
|
||||
else:
|
||||
verbose_router_logger.debug(
|
||||
"Filtering out model - {}, as model_region=None, and allowed_model_region={}".format(
|
||||
model_id, allowed_model_region
|
||||
)
|
||||
)
|
||||
# filter out since region unknown, and user wants to filter for specific region
|
||||
invalid_model_indices.append(idx)
|
||||
continue
|
||||
|
||||
if len(invalid_model_indices) == len(_returned_deployments):
|
||||
"""
|
||||
- no healthy deployments available b/c context window checks or rate limit error
|
||||
|
@ -3047,8 +3095,29 @@ class Router:
|
|||
|
||||
# filter pre-call checks
|
||||
if self.enable_pre_call_checks and messages is not None:
|
||||
_allowed_model_region = (
|
||||
request_kwargs.get("allowed_model_region")
|
||||
if request_kwargs is not None
|
||||
else None
|
||||
)
|
||||
|
||||
if _allowed_model_region == "eu":
|
||||
healthy_deployments = self._pre_call_checks(
|
||||
model=model, healthy_deployments=healthy_deployments, messages=messages
|
||||
model=model,
|
||||
healthy_deployments=healthy_deployments,
|
||||
messages=messages,
|
||||
allowed_model_region=_allowed_model_region,
|
||||
)
|
||||
else:
|
||||
verbose_router_logger.debug(
|
||||
"Ignoring given 'allowed_model_region'={}. Only 'eu' is allowed".format(
|
||||
_allowed_model_region
|
||||
)
|
||||
)
|
||||
healthy_deployments = self._pre_call_checks(
|
||||
model=model,
|
||||
healthy_deployments=healthy_deployments,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
if len(healthy_deployments) == 0:
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
# - use litellm.success + failure callbacks to log when a request completed
|
||||
# - in get_available_deployment, for a given model group name -> pick based on traffic
|
||||
|
||||
import dotenv, os, requests, random
|
||||
import dotenv, os, requests, random # type: ignore
|
||||
from typing import Optional
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#### What this does ####
|
||||
# picks based on response time (for streaming, this is time to first token)
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
import dotenv, os, requests, random
|
||||
import dotenv, os, requests, random # type: ignore
|
||||
from typing import Optional, Union, List, Dict
|
||||
from datetime import datetime, timedelta
|
||||
import random
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#### What this does ####
|
||||
# picks based on response time (for streaming, this is time to first token)
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
import dotenv, os, requests, random
|
||||
from pydantic import BaseModel, Extra, Field, root_validator # type: ignore
|
||||
import dotenv, os, requests, random # type: ignore
|
||||
from typing import Optional, Union, List, Dict
|
||||
from datetime import datetime, timedelta
|
||||
import random
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import pytest
|
||||
from litellm import acompletion
|
||||
from litellm import completion
|
||||
|
||||
|
||||
def test_acompletion_params():
|
||||
|
@ -7,17 +8,29 @@ def test_acompletion_params():
|
|||
from litellm.types.completion import CompletionRequest
|
||||
|
||||
acompletion_params_odict = inspect.signature(acompletion).parameters
|
||||
acompletion_params = {name: param.annotation for name, param in acompletion_params_odict.items()}
|
||||
completion_params = {field_name: field_type for field_name, field_type in CompletionRequest.__annotations__.items()}
|
||||
completion_params_dict = inspect.signature(completion).parameters
|
||||
|
||||
# remove kwargs
|
||||
acompletion_params.pop("kwargs", None)
|
||||
acompletion_params = {
|
||||
name: param.annotation for name, param in acompletion_params_odict.items()
|
||||
}
|
||||
completion_params = {
|
||||
name: param.annotation for name, param in completion_params_dict.items()
|
||||
}
|
||||
|
||||
keys_acompletion = set(acompletion_params.keys())
|
||||
keys_completion = set(completion_params.keys())
|
||||
|
||||
print(keys_acompletion)
|
||||
print("\n\n\n")
|
||||
print(keys_completion)
|
||||
|
||||
print("diff=", keys_completion - keys_acompletion)
|
||||
|
||||
# Assert that the parameters are the same
|
||||
if keys_acompletion != keys_completion:
|
||||
pytest.fail("The parameters of the acompletion function and the CompletionRequest class are not the same.")
|
||||
pytest.fail(
|
||||
"The parameters of the litellm.acompletion function and litellm.completion are not the same."
|
||||
)
|
||||
|
||||
|
||||
# test_acompletion_params()
|
||||
|
|
|
@ -231,14 +231,17 @@ def test_cost_bedrock_pricing():
|
|||
assert cost == predicted_cost
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS disabled our access")
|
||||
def test_cost_bedrock_pricing_actual_calls():
|
||||
litellm.set_verbose = True
|
||||
model = "anthropic.claude-instant-v1"
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
response = litellm.completion(model=model, messages=messages)
|
||||
assert response._hidden_params["region_name"] is not None
|
||||
response = litellm.completion(
|
||||
model=model, messages=messages, mock_response="hello cool one"
|
||||
)
|
||||
|
||||
print("response", response)
|
||||
cost = litellm.completion_cost(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
completion_response=response,
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
|
|
|
@ -16,7 +16,7 @@ litellm.set_verbose = True
|
|||
model_alias_map = {"good-model": "anyscale/meta-llama/Llama-2-7b-chat-hf"}
|
||||
|
||||
|
||||
def test_model_alias_map():
|
||||
def test_model_alias_map(caplog):
|
||||
try:
|
||||
litellm.model_alias_map = model_alias_map
|
||||
response = completion(
|
||||
|
@ -27,9 +27,15 @@ def test_model_alias_map():
|
|||
max_tokens=10,
|
||||
)
|
||||
print(response.model)
|
||||
|
||||
captured_logs = [rec.levelname for rec in caplog.records]
|
||||
|
||||
for log in captured_logs:
|
||||
assert "ERROR" not in log
|
||||
|
||||
assert "Llama-2-7b-chat-hf" in response.model
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
test_model_alias_map()
|
||||
# test_model_alias_map()
|
||||
|
|
|
@ -1501,6 +1501,37 @@ def test_openai_chat_completion_complete_response_call():
|
|||
|
||||
|
||||
# test_openai_chat_completion_complete_response_call()
|
||||
def test_openai_stream_options_call():
|
||||
litellm.set_verbose = False
|
||||
response = litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "system", "content": "say GM - we're going to make it "}],
|
||||
stream=True,
|
||||
stream_options={"include_usage": True},
|
||||
max_tokens=10,
|
||||
)
|
||||
usage = None
|
||||
chunks = []
|
||||
for chunk in response:
|
||||
print("chunk: ", chunk)
|
||||
chunks.append(chunk)
|
||||
|
||||
last_chunk = chunks[-1]
|
||||
print("last chunk: ", last_chunk)
|
||||
|
||||
"""
|
||||
Assert that:
|
||||
- Last Chunk includes Usage
|
||||
- All chunks prior to last chunk have usage=None
|
||||
"""
|
||||
|
||||
assert last_chunk.usage is not None
|
||||
assert last_chunk.usage.total_tokens > 0
|
||||
assert last_chunk.usage.prompt_tokens > 0
|
||||
assert last_chunk.usage.completion_tokens > 0
|
||||
|
||||
# assert all non last chunks have usage=None
|
||||
assert all(chunk.usage is None for chunk in chunks[:-1])
|
||||
|
||||
|
||||
def test_openai_text_completion_call():
|
||||
|
|
|
@ -123,6 +123,8 @@ class GenericLiteLLMParams(BaseModel):
|
|||
)
|
||||
max_retries: Optional[int] = None
|
||||
organization: Optional[str] = None # for openai orgs
|
||||
## UNIFIED PROJECT/REGION ##
|
||||
region_name: Optional[str] = None
|
||||
## VERTEX AI ##
|
||||
vertex_project: Optional[str] = None
|
||||
vertex_location: Optional[str] = None
|
||||
|
@ -150,6 +152,8 @@ class GenericLiteLLMParams(BaseModel):
|
|||
None # timeout when making stream=True calls, if str, pass in as os.environ/
|
||||
),
|
||||
organization: Optional[str] = None, # for openai orgs
|
||||
## UNIFIED PROJECT/REGION ##
|
||||
region_name: Optional[str] = None,
|
||||
## VERTEX AI ##
|
||||
vertex_project: Optional[str] = None,
|
||||
vertex_location: Optional[str] = None,
|
||||
|
|
111
litellm/utils.py
111
litellm/utils.py
|
@ -14,7 +14,7 @@ import subprocess, os
|
|||
from os.path import abspath, join, dirname
|
||||
import litellm, openai
|
||||
import itertools
|
||||
import random, uuid, requests
|
||||
import random, uuid, requests # type: ignore
|
||||
from functools import wraps
|
||||
import datetime, time
|
||||
import tiktoken
|
||||
|
@ -36,7 +36,7 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
|||
|
||||
try:
|
||||
# this works in python 3.8
|
||||
import pkg_resources
|
||||
import pkg_resources # type: ignore
|
||||
|
||||
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
|
||||
# try:
|
||||
|
@ -612,6 +612,7 @@ class ModelResponse(OpenAIObject):
|
|||
system_fingerprint=None,
|
||||
usage=None,
|
||||
stream=None,
|
||||
stream_options=None,
|
||||
response_ms=None,
|
||||
hidden_params=None,
|
||||
**params,
|
||||
|
@ -658,6 +659,12 @@ class ModelResponse(OpenAIObject):
|
|||
usage = usage
|
||||
elif stream is None or stream == False:
|
||||
usage = Usage()
|
||||
elif (
|
||||
stream == True
|
||||
and stream_options is not None
|
||||
and stream_options.get("include_usage") == True
|
||||
):
|
||||
usage = Usage()
|
||||
if hidden_params:
|
||||
self._hidden_params = hidden_params
|
||||
|
||||
|
@ -4161,8 +4168,30 @@ def cost_per_token(
|
|||
model_with_provider_and_region in model_cost_ref
|
||||
): # use region based pricing, if it's available
|
||||
model_with_provider = model_with_provider_and_region
|
||||
if model_with_provider in model_cost_ref:
|
||||
|
||||
model_without_prefix = model
|
||||
model_parts = model.split("/")
|
||||
if len(model_parts) > 1:
|
||||
model_without_prefix = model_parts[1]
|
||||
else:
|
||||
model_without_prefix = model
|
||||
"""
|
||||
Code block that formats model to lookup in litellm.model_cost
|
||||
Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
|
||||
Option2. model = "openai/gpt-4" - model = provider/model
|
||||
Option3. model = "anthropic.claude-3" - model = model
|
||||
"""
|
||||
if (
|
||||
model_with_provider in model_cost_ref
|
||||
): # Option 2. use model with provider, model = "openai/gpt-4"
|
||||
model = model_with_provider
|
||||
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
|
||||
model = model
|
||||
elif (
|
||||
model_without_prefix in model_cost_ref
|
||||
): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
|
||||
model = model_without_prefix
|
||||
|
||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||
if model in model_cost_ref:
|
||||
|
@ -4817,6 +4846,7 @@ def get_optional_params(
|
|||
top_p=None,
|
||||
n=None,
|
||||
stream=False,
|
||||
stream_options=None,
|
||||
stop=None,
|
||||
max_tokens=None,
|
||||
presence_penalty=None,
|
||||
|
@ -4886,6 +4916,7 @@ def get_optional_params(
|
|||
"top_p": None,
|
||||
"n": None,
|
||||
"stream": None,
|
||||
"stream_options": None,
|
||||
"stop": None,
|
||||
"max_tokens": None,
|
||||
"presence_penalty": None,
|
||||
|
@ -5757,6 +5788,8 @@ def get_optional_params(
|
|||
optional_params["n"] = n
|
||||
if stream is not None:
|
||||
optional_params["stream"] = stream
|
||||
if stream_options is not None:
|
||||
optional_params["stream_options"] = stream_options
|
||||
if stop is not None:
|
||||
optional_params["stop"] = stop
|
||||
if max_tokens is not None:
|
||||
|
@ -5844,6 +5877,40 @@ def calculate_max_parallel_requests(
|
|||
return None
|
||||
|
||||
|
||||
def _is_region_eu(model_region: str) -> bool:
|
||||
EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"]
|
||||
for region in EU_Regions:
|
||||
if "europe" in model_region.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def get_model_region(
|
||||
litellm_params: LiteLLM_Params, mode: Optional[str]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Pass the litellm params for an azure model, and get back the region
|
||||
"""
|
||||
if (
|
||||
"azure" in litellm_params.model
|
||||
and isinstance(litellm_params.api_key, str)
|
||||
and isinstance(litellm_params.api_base, str)
|
||||
):
|
||||
_model = litellm_params.model.replace("azure/", "")
|
||||
response: dict = litellm.AzureChatCompletion().get_headers(
|
||||
model=_model,
|
||||
api_key=litellm_params.api_key,
|
||||
api_base=litellm_params.api_base,
|
||||
api_version=litellm_params.api_version or "2023-07-01-preview",
|
||||
timeout=10,
|
||||
mode=mode or "chat",
|
||||
)
|
||||
|
||||
region: Optional[str] = response.get("x-ms-region", None)
|
||||
return region
|
||||
return None
|
||||
|
||||
|
||||
def get_api_base(model: str, optional_params: dict) -> Optional[str]:
|
||||
"""
|
||||
Returns the api base used for calling the model.
|
||||
|
@ -5878,6 +5945,8 @@ def get_api_base(model: str, optional_params: dict) -> Optional[str]:
|
|||
if _optional_params.api_base is not None:
|
||||
return _optional_params.api_base
|
||||
|
||||
if litellm.model_alias_map and model in litellm.model_alias_map:
|
||||
model = litellm.model_alias_map[model]
|
||||
try:
|
||||
model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
|
||||
get_llm_provider(
|
||||
|
@ -6027,6 +6096,7 @@ def get_supported_openai_params(model: str, custom_llm_provider: str):
|
|||
"top_p",
|
||||
"n",
|
||||
"stream",
|
||||
"stream_options",
|
||||
"stop",
|
||||
"max_tokens",
|
||||
"presence_penalty",
|
||||
|
@ -7732,11 +7802,11 @@ def _calculate_retry_after(
|
|||
try:
|
||||
retry_after = int(retry_header)
|
||||
except Exception:
|
||||
retry_date_tuple = email.utils.parsedate_tz(retry_header)
|
||||
retry_date_tuple = email.utils.parsedate_tz(retry_header) # type: ignore
|
||||
if retry_date_tuple is None:
|
||||
retry_after = -1
|
||||
else:
|
||||
retry_date = email.utils.mktime_tz(retry_date_tuple)
|
||||
retry_date = email.utils.mktime_tz(retry_date_tuple) # type: ignore
|
||||
retry_after = int(retry_date - time.time())
|
||||
else:
|
||||
retry_after = -1
|
||||
|
@ -9423,7 +9493,9 @@ def get_secret(
|
|||
else:
|
||||
secret = os.environ.get(secret_name)
|
||||
try:
|
||||
secret_value_as_bool = ast.literal_eval(secret) if secret is not None else None
|
||||
secret_value_as_bool = (
|
||||
ast.literal_eval(secret) if secret is not None else None
|
||||
)
|
||||
if isinstance(secret_value_as_bool, bool):
|
||||
return secret_value_as_bool
|
||||
else:
|
||||
|
@ -9442,7 +9514,12 @@ def get_secret(
|
|||
# replicate/anthropic/cohere
|
||||
class CustomStreamWrapper:
|
||||
def __init__(
|
||||
self, completion_stream, model, custom_llm_provider=None, logging_obj=None
|
||||
self,
|
||||
completion_stream,
|
||||
model,
|
||||
custom_llm_provider=None,
|
||||
logging_obj=None,
|
||||
stream_options=None,
|
||||
):
|
||||
self.model = model
|
||||
self.custom_llm_provider = custom_llm_provider
|
||||
|
@ -9468,6 +9545,7 @@ class CustomStreamWrapper:
|
|||
self.response_id = None
|
||||
self.logging_loop = None
|
||||
self.rules = Rules()
|
||||
self.stream_options = stream_options
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
@ -9908,6 +9986,7 @@ class CustomStreamWrapper:
|
|||
is_finished = False
|
||||
finish_reason = None
|
||||
logprobs = None
|
||||
usage = None
|
||||
original_chunk = None # this is used for function/tool calling
|
||||
if len(str_line.choices) > 0:
|
||||
if (
|
||||
|
@ -9942,12 +10021,15 @@ class CustomStreamWrapper:
|
|||
else:
|
||||
logprobs = None
|
||||
|
||||
usage = getattr(str_line, "usage", None)
|
||||
|
||||
return {
|
||||
"text": text,
|
||||
"is_finished": is_finished,
|
||||
"finish_reason": finish_reason,
|
||||
"logprobs": logprobs,
|
||||
"original_chunk": str_line,
|
||||
"usage": usage,
|
||||
}
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
|
@ -10250,7 +10332,9 @@ class CustomStreamWrapper:
|
|||
raise e
|
||||
|
||||
def model_response_creator(self):
|
||||
model_response = ModelResponse(stream=True, model=self.model)
|
||||
model_response = ModelResponse(
|
||||
stream=True, model=self.model, stream_options=self.stream_options
|
||||
)
|
||||
if self.response_id is not None:
|
||||
model_response.id = self.response_id
|
||||
else:
|
||||
|
@ -10570,6 +10654,12 @@ class CustomStreamWrapper:
|
|||
if response_obj["logprobs"] is not None:
|
||||
model_response.choices[0].logprobs = response_obj["logprobs"]
|
||||
|
||||
if (
|
||||
self.stream_options is not None
|
||||
and self.stream_options["include_usage"] == True
|
||||
):
|
||||
model_response.usage = response_obj["usage"]
|
||||
|
||||
model_response.model = self.model
|
||||
print_verbose(
|
||||
f"model_response finish reason 3: {self.received_finish_reason}; response_obj={response_obj}"
|
||||
|
@ -10657,6 +10747,11 @@ class CustomStreamWrapper:
|
|||
except Exception as e:
|
||||
model_response.choices[0].delta = Delta()
|
||||
else:
|
||||
if (
|
||||
self.stream_options is not None
|
||||
and self.stream_options["include_usage"] == True
|
||||
):
|
||||
return model_response
|
||||
return
|
||||
print_verbose(
|
||||
f"model_response.choices[0].delta: {model_response.choices[0].delta}; completion_obj: {completion_obj}"
|
||||
|
|
|
@ -1,4 +1,9 @@
|
|||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/gpt-35-turbo
|
||||
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
|
||||
api_key: os.environ/AZURE_EUROPE_API_KEY
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.36.3"
|
||||
version = "1.36.4"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.36.3"
|
||||
version = "1.36.4"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -150,6 +150,8 @@ model LiteLLM_EndUserTable {
|
|||
user_id String @id
|
||||
alias String? // admin-facing alias
|
||||
spend Float @default(0.0)
|
||||
allowed_model_region String? // require all user requests to use models in this specific region
|
||||
default_model String? // use along with 'allowed_model_region'. if no available model in region, default to this model.
|
||||
budget_id String?
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
blocked Boolean @default(false)
|
||||
|
|
173
tests/test_end_users.py
Normal file
173
tests/test_end_users.py
Normal file
|
@ -0,0 +1,173 @@
|
|||
# What is this?
|
||||
## Unit tests for the /end_users/* endpoints
|
||||
import pytest
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import time
|
||||
import uuid
|
||||
from openai import AsyncOpenAI
|
||||
from typing import Optional
|
||||
|
||||
"""
|
||||
- `/end_user/new`
|
||||
- `/end_user/info`
|
||||
"""
|
||||
|
||||
|
||||
async def chat_completion_with_headers(session, key, model="gpt-4"):
|
||||
url = "http://0.0.0.0:4000/chat/completions"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a helpful assistant."},
|
||||
{"role": "user", "content": "Hello!"},
|
||||
],
|
||||
}
|
||||
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
||||
print(response_text)
|
||||
print()
|
||||
|
||||
if status != 200:
|
||||
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||
|
||||
response_header_check(
|
||||
response
|
||||
) # calling the function to check response headers
|
||||
|
||||
raw_headers = response.raw_headers
|
||||
raw_headers_json = {}
|
||||
|
||||
for (
|
||||
item
|
||||
) in (
|
||||
response.raw_headers
|
||||
): # ((b'date', b'Fri, 19 Apr 2024 21:17:29 GMT'), (), )
|
||||
raw_headers_json[item[0].decode("utf-8")] = item[1].decode("utf-8")
|
||||
|
||||
return raw_headers_json
|
||||
|
||||
|
||||
async def generate_key(
|
||||
session,
|
||||
i,
|
||||
budget=None,
|
||||
budget_duration=None,
|
||||
models=["azure-models", "gpt-4", "dall-e-3"],
|
||||
max_parallel_requests: Optional[int] = None,
|
||||
user_id: Optional[str] = None,
|
||||
team_id: Optional[str] = None,
|
||||
calling_key="sk-1234",
|
||||
):
|
||||
url = "http://0.0.0.0:4000/key/generate"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {calling_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = {
|
||||
"models": models,
|
||||
"aliases": {"mistral-7b": "gpt-3.5-turbo"},
|
||||
"duration": None,
|
||||
"max_budget": budget,
|
||||
"budget_duration": budget_duration,
|
||||
"max_parallel_requests": max_parallel_requests,
|
||||
"user_id": user_id,
|
||||
"team_id": team_id,
|
||||
}
|
||||
|
||||
print(f"data: {data}")
|
||||
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
||||
print(f"Response {i} (Status code: {status}):")
|
||||
print(response_text)
|
||||
print()
|
||||
|
||||
if status != 200:
|
||||
raise Exception(f"Request {i} did not return a 200 status code: {status}")
|
||||
|
||||
return await response.json()
|
||||
|
||||
|
||||
async def new_end_user(
|
||||
session, i, user_id=str(uuid.uuid4()), model_region=None, default_model=None
|
||||
):
|
||||
url = "http://0.0.0.0:4000/end_user/new"
|
||||
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
|
||||
data = {
|
||||
"user_id": user_id,
|
||||
"allowed_model_region": model_region,
|
||||
"default_model": default_model,
|
||||
}
|
||||
|
||||
async with session.post(url, headers=headers, json=data) as response:
|
||||
status = response.status
|
||||
response_text = await response.text()
|
||||
|
||||
print(f"Response {i} (Status code: {status}):")
|
||||
print(response_text)
|
||||
print()
|
||||
|
||||
if status != 200:
|
||||
raise Exception(f"Request {i} did not return a 200 status code: {status}")
|
||||
|
||||
return await response.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_end_user_new():
|
||||
"""
|
||||
Make 20 parallel calls to /user/new. Assert all worked.
|
||||
"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
tasks = [new_end_user(session, i, str(uuid.uuid4())) for i in range(1, 11)]
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_end_user_specific_region():
|
||||
"""
|
||||
- Specify region user can make calls in
|
||||
- Make a generic call
|
||||
- assert returned api base is for model in region
|
||||
|
||||
Repeat 3 times
|
||||
"""
|
||||
key: str = ""
|
||||
## CREATE USER ##
|
||||
async with aiohttp.ClientSession() as session:
|
||||
end_user_obj = await new_end_user(
|
||||
session=session,
|
||||
i=0,
|
||||
user_id=str(uuid.uuid4()),
|
||||
model_region="eu",
|
||||
)
|
||||
|
||||
## MAKE CALL ##
|
||||
key_gen = await generate_key(session=session, i=0, models=["gpt-3.5-turbo"])
|
||||
|
||||
key = key_gen["key"]
|
||||
|
||||
for _ in range(3):
|
||||
client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
|
||||
|
||||
print("SENDING USER PARAM - {}".format(end_user_obj["user_id"]))
|
||||
result = await client.chat.completions.with_raw_response.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey!"}],
|
||||
user=end_user_obj["user_id"],
|
||||
)
|
||||
|
||||
assert (
|
||||
result.headers.get("x-litellm-model-api-base")
|
||||
== "https://my-endpoint-europe-berri-992.openai.azure.com/"
|
||||
)
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -0,0 +1 @@
|
|||
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -0,0 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""]
|
||||
3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -189,6 +189,7 @@ const CreateKeyPage = () => {
|
|||
userRole={userRole}
|
||||
token={token}
|
||||
accessToken={accessToken}
|
||||
keys={keys}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
|
|
@ -786,7 +786,9 @@ export const adminTopKeysCall = async (accessToken: String) => {
|
|||
|
||||
export const adminTopEndUsersCall = async (
|
||||
accessToken: String,
|
||||
keyToken: String | null
|
||||
keyToken: String | null,
|
||||
startTime: String | undefined,
|
||||
endTime: String | undefined
|
||||
) => {
|
||||
try {
|
||||
let url = proxyBaseUrl
|
||||
|
@ -795,8 +797,11 @@ export const adminTopEndUsersCall = async (
|
|||
|
||||
let body = "";
|
||||
if (keyToken) {
|
||||
body = JSON.stringify({ api_key: keyToken });
|
||||
body = JSON.stringify({ api_key: keyToken, startTime: startTime, endTime: endTime });
|
||||
} else {
|
||||
body = JSON.stringify({ startTime: startTime, endTime: endTime });
|
||||
}
|
||||
|
||||
//message.info("Making top end users request");
|
||||
|
||||
// Define requestOptions with body as an optional property
|
||||
|
@ -815,9 +820,7 @@ export const adminTopEndUsersCall = async (
|
|||
},
|
||||
};
|
||||
|
||||
if (keyToken) {
|
||||
requestOptions.body = JSON.stringify({ api_key: keyToken });
|
||||
}
|
||||
requestOptions.body = body;
|
||||
|
||||
const response = await fetch(url, requestOptions);
|
||||
if (!response.ok) {
|
||||
|
|
|
@ -341,7 +341,7 @@ const Settings: React.FC<SettingsPageProps> = ({
|
|||
return (
|
||||
<div className="w-full mx-4">
|
||||
<Grid numItems={1} className="gap-2 p-8 w-full mt-2">
|
||||
<Callout title="Presidio PII / Guardrails Coming Soon" color="sky">
|
||||
<Callout title="[UI] Presidio PII + Guardrails Coming Soon. https://docs.litellm.ai/docs/proxy/pii_masking" color="sky">
|
||||
|
||||
</Callout>
|
||||
<TabGroup>
|
||||
|
@ -353,9 +353,6 @@ const Settings: React.FC<SettingsPageProps> = ({
|
|||
<TabPanel>
|
||||
|
||||
<Card >
|
||||
<Text>
|
||||
Presidio + Guardrails coming soon
|
||||
</Text>
|
||||
<Table>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue