Merge branch 'BerriAI:main' into ollama-image-handling

This commit is contained in:
frob 2024-05-09 20:25:30 +02:00 committed by GitHub
commit c44970c813
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
102 changed files with 1394 additions and 393 deletions

4
.gitignore vendored
View file

@ -1,5 +1,6 @@
.venv .venv
.env .env
litellm/proxy/myenv/*
litellm_uuid.txt litellm_uuid.txt
__pycache__/ __pycache__/
*.pyc *.pyc
@ -52,3 +53,6 @@ litellm/proxy/_new_secret_config.yaml
litellm/proxy/_new_secret_config.yaml litellm/proxy/_new_secret_config.yaml
litellm/proxy/_super_secret_config.yaml litellm/proxy/_super_secret_config.yaml
litellm/proxy/_super_secret_config.yaml litellm/proxy/_super_secret_config.yaml
litellm/proxy/myenv/bin/activate
litellm/proxy/myenv/bin/Activate.ps1
myenv/*

View file

@ -16,11 +16,11 @@ repos:
name: Check if files match name: Check if files match
entry: python3 ci_cd/check_files_match.py entry: python3 ci_cd/check_files_match.py
language: system language: system
- repo: local # - repo: local
hooks: # hooks:
- id: mypy # - id: mypy
name: mypy # name: mypy
entry: python3 -m mypy --ignore-missing-imports # entry: python3 -m mypy --ignore-missing-imports
language: system # language: system
types: [python] # types: [python]
files: ^litellm/ # files: ^litellm/

Binary file not shown.

View file

@ -0,0 +1,15 @@
{
"$schema": "https://schema.management.azure.com/schemas/0.1.2-preview/CreateUIDefinition.MultiVm.json#",
"handler": "Microsoft.Azure.CreateUIDef",
"version": "0.1.2-preview",
"parameters": {
"config": {
"isWizard": false,
"basics": { }
},
"basics": [ ],
"steps": [ ],
"outputs": { },
"resourceTypes": [ ]
}
}

View file

@ -0,0 +1,63 @@
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"imageName": {
"type": "string",
"defaultValue": "ghcr.io/berriai/litellm:main-latest"
},
"containerName": {
"type": "string",
"defaultValue": "litellm-container"
},
"dnsLabelName": {
"type": "string",
"defaultValue": "litellm"
},
"portNumber": {
"type": "int",
"defaultValue": 4000
}
},
"resources": [
{
"type": "Microsoft.ContainerInstance/containerGroups",
"apiVersion": "2021-03-01",
"name": "[parameters('containerName')]",
"location": "[resourceGroup().location]",
"properties": {
"containers": [
{
"name": "[parameters('containerName')]",
"properties": {
"image": "[parameters('imageName')]",
"resources": {
"requests": {
"cpu": 1,
"memoryInGB": 2
}
},
"ports": [
{
"port": "[parameters('portNumber')]"
}
]
}
}
],
"osType": "Linux",
"restartPolicy": "Always",
"ipAddress": {
"type": "Public",
"ports": [
{
"protocol": "tcp",
"port": "[parameters('portNumber')]"
}
],
"dnsNameLabel": "[parameters('dnsLabelName')]"
}
}
}
]
}

View file

@ -0,0 +1,42 @@
param imageName string = 'ghcr.io/berriai/litellm:main-latest'
param containerName string = 'litellm-container'
param dnsLabelName string = 'litellm'
param portNumber int = 4000
resource containerGroupName 'Microsoft.ContainerInstance/containerGroups@2021-03-01' = {
name: containerName
location: resourceGroup().location
properties: {
containers: [
{
name: containerName
properties: {
image: imageName
resources: {
requests: {
cpu: 1
memoryInGB: 2
}
}
ports: [
{
port: portNumber
}
]
}
}
]
osType: 'Linux'
restartPolicy: 'Always'
ipAddress: {
type: 'Public'
ports: [
{
protocol: 'tcp'
port: portNumber
}
]
dnsNameLabel: dnsLabelName
}
}
}

View file

@ -83,6 +83,7 @@ def completion(
top_p: Optional[float] = None, top_p: Optional[float] = None,
n: Optional[int] = None, n: Optional[int] = None,
stream: Optional[bool] = None, stream: Optional[bool] = None,
stream_options: Optional[dict] = None,
stop=None, stop=None,
max_tokens: Optional[int] = None, max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
@ -139,6 +140,10 @@ def completion(
- `stream`: *boolean or null (optional)* - If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message. - `stream`: *boolean or null (optional)* - If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.
- `stream_options` *dict or null (optional)* - Options for streaming response. Only set this when you set `stream: true`
- `include_usage` *boolean (optional)* - If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk shows the token usage statistics for the entire request, and the choices field will always be an empty array. All other chunks will also include a usage field, but with a null value.
- `stop`: *string/ array/ null (optional)* - Up to 4 sequences where the API will stop generating further tokens. - `stop`: *string/ array/ null (optional)* - Up to 4 sequences where the API will stop generating further tokens.
- `max_tokens`: *integer (optional)* - The maximum number of tokens to generate in the chat completion. - `max_tokens`: *integer (optional)* - The maximum number of tokens to generate in the chat completion.

View file

@ -291,7 +291,7 @@ def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
def _forecast_daily_cost(data: list): def _forecast_daily_cost(data: list):
import requests import requests # type: ignore
from datetime import datetime, timedelta from datetime import datetime, timedelta
if len(data) == 0: if len(data) == 0:

View file

@ -10,8 +10,8 @@
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation # s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
import os import os
import inspect import inspect
import redis, litellm import redis, litellm # type: ignore
import redis.asyncio as async_redis import redis.asyncio as async_redis # type: ignore
from typing import List, Optional from typing import List, Optional

View file

@ -10,7 +10,7 @@
import os, json, time import os, json, time
import litellm import litellm
from litellm.utils import ModelResponse from litellm.utils import ModelResponse
import requests, threading import requests, threading # type: ignore
from typing import Optional, Union, Literal from typing import Optional, Union, Literal

View file

@ -1,7 +1,6 @@
#### What this does #### #### What this does ####
# On success + failure, log events to aispend.io # On success + failure, log events to aispend.io
import dotenv, os import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback

View file

@ -4,18 +4,30 @@ import datetime
class AthinaLogger: class AthinaLogger:
def __init__(self): def __init__(self):
import os import os
self.athina_api_key = os.getenv("ATHINA_API_KEY") self.athina_api_key = os.getenv("ATHINA_API_KEY")
self.headers = { self.headers = {
"athina-api-key": self.athina_api_key, "athina-api-key": self.athina_api_key,
"Content-Type": "application/json" "Content-Type": "application/json",
} }
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference" self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response", "user_query"] self.additional_keys = [
"environment",
"prompt_slug",
"customer_id",
"customer_user_id",
"session_id",
"external_reference_id",
"context",
"expected_response",
"user_query",
]
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose): def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
import requests import requests # type: ignore
import json import json
import traceback import traceback
try: try:
response_json = response_obj.model_dump() if response_obj else {} response_json = response_obj.model_dump() if response_obj else {}
data = { data = {
@ -23,32 +35,51 @@ class AthinaLogger:
"request": kwargs, "request": kwargs,
"response": response_json, "response": response_json,
"prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"), "prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"),
"completion_tokens": response_json.get("usage", {}).get("completion_tokens"), "completion_tokens": response_json.get("usage", {}).get(
"completion_tokens"
),
"total_tokens": response_json.get("usage", {}).get("total_tokens"), "total_tokens": response_json.get("usage", {}).get("total_tokens"),
} }
if type(end_time) == datetime.datetime and type(start_time) == datetime.datetime: if (
data["response_time"] = int((end_time - start_time).total_seconds() * 1000) type(end_time) == datetime.datetime
and type(start_time) == datetime.datetime
):
data["response_time"] = int(
(end_time - start_time).total_seconds() * 1000
)
if "messages" in kwargs: if "messages" in kwargs:
data["prompt"] = kwargs.get("messages", None) data["prompt"] = kwargs.get("messages", None)
# Directly add tools or functions if present # Directly add tools or functions if present
optional_params = kwargs.get("optional_params", {}) optional_params = kwargs.get("optional_params", {})
data.update((k, v) for k, v in optional_params.items() if k in ["tools", "functions"]) data.update(
(k, v)
for k, v in optional_params.items()
if k in ["tools", "functions"]
)
# Add additional metadata keys # Add additional metadata keys
metadata = kwargs.get("litellm_params", {}).get("metadata", {}) metadata = kwargs.get("litellm_params", {}).get("metadata", {})
if metadata: if metadata:
for key in self.additional_keys: for key in self.additional_keys:
if key in metadata: if key in metadata:
data[key] = metadata[key] data[key] = metadata[key]
response = requests.post(self.athina_logging_url, headers=self.headers, data=json.dumps(data, default=str)) response = requests.post(
self.athina_logging_url,
headers=self.headers,
data=json.dumps(data, default=str),
)
if response.status_code != 200: if response.status_code != 200:
print_verbose(f"Athina Logger Error - {response.text}, {response.status_code}") print_verbose(
f"Athina Logger Error - {response.text}, {response.status_code}"
)
else: else:
print_verbose(f"Athina Logger Succeeded - {response.text}") print_verbose(f"Athina Logger Succeeded - {response.text}")
except Exception as e: except Exception as e:
print_verbose(f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}") print_verbose(
pass f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}"
)
pass

View file

@ -1,7 +1,7 @@
#### What this does #### #### What this does ####
# On success + failure, log events to aispend.io # On success + failure, log events to aispend.io
import dotenv, os import dotenv, os
import requests import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback

View file

@ -3,7 +3,6 @@
#### What this does #### #### What this does ####
# On success, logs events to Promptlayer # On success, logs events to Promptlayer
import dotenv, os import dotenv, os
import requests
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache from litellm.caching import DualCache

View file

@ -1,7 +1,6 @@
#### What this does #### #### What this does ####
# On success, logs events to Promptlayer # On success, logs events to Promptlayer
import dotenv, os import dotenv, os
import requests
from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache from litellm.caching import DualCache

View file

@ -2,7 +2,7 @@
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import dotenv, os import dotenv, os
import requests import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback

View file

@ -2,7 +2,7 @@
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import dotenv, os import dotenv, os
import requests import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback

View file

@ -1,15 +1,17 @@
import requests import requests # type: ignore
import json import json
import traceback import traceback
from datetime import datetime, timezone from datetime import datetime, timezone
class GreenscaleLogger: class GreenscaleLogger:
def __init__(self): def __init__(self):
import os import os
self.greenscale_api_key = os.getenv("GREENSCALE_API_KEY") self.greenscale_api_key = os.getenv("GREENSCALE_API_KEY")
self.headers = { self.headers = {
"api-key": self.greenscale_api_key, "api-key": self.greenscale_api_key,
"Content-Type": "application/json" "Content-Type": "application/json",
} }
self.greenscale_logging_url = os.getenv("GREENSCALE_ENDPOINT") self.greenscale_logging_url = os.getenv("GREENSCALE_ENDPOINT")
@ -19,33 +21,48 @@ class GreenscaleLogger:
data = { data = {
"modelId": kwargs.get("model"), "modelId": kwargs.get("model"),
"inputTokenCount": response_json.get("usage", {}).get("prompt_tokens"), "inputTokenCount": response_json.get("usage", {}).get("prompt_tokens"),
"outputTokenCount": response_json.get("usage", {}).get("completion_tokens"), "outputTokenCount": response_json.get("usage", {}).get(
"completion_tokens"
),
} }
data["timestamp"] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') data["timestamp"] = datetime.now(timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
if type(end_time) == datetime and type(start_time) == datetime: )
data["invocationLatency"] = int((end_time - start_time).total_seconds() * 1000)
if type(end_time) == datetime and type(start_time) == datetime:
data["invocationLatency"] = int(
(end_time - start_time).total_seconds() * 1000
)
# Add additional metadata keys to tags # Add additional metadata keys to tags
tags = [] tags = []
metadata = kwargs.get("litellm_params", {}).get("metadata", {}) metadata = kwargs.get("litellm_params", {}).get("metadata", {})
for key, value in metadata.items(): for key, value in metadata.items():
if key.startswith("greenscale"): if key.startswith("greenscale"):
if key == "greenscale_project": if key == "greenscale_project":
data["project"] = value data["project"] = value
elif key == "greenscale_application": elif key == "greenscale_application":
data["application"] = value data["application"] = value
else: else:
tags.append({"key": key.replace("greenscale_", ""), "value": str(value)}) tags.append(
{"key": key.replace("greenscale_", ""), "value": str(value)}
)
data["tags"] = tags data["tags"] = tags
response = requests.post(self.greenscale_logging_url, headers=self.headers, data=json.dumps(data, default=str)) response = requests.post(
self.greenscale_logging_url,
headers=self.headers,
data=json.dumps(data, default=str),
)
if response.status_code != 200: if response.status_code != 200:
print_verbose(f"Greenscale Logger Error - {response.text}, {response.status_code}") print_verbose(
f"Greenscale Logger Error - {response.text}, {response.status_code}"
)
else: else:
print_verbose(f"Greenscale Logger Succeeded - {response.text}") print_verbose(f"Greenscale Logger Succeeded - {response.text}")
except Exception as e: except Exception as e:
print_verbose(f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}") print_verbose(
pass f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}"
)
pass

View file

@ -1,7 +1,7 @@
#### What this does #### #### What this does ####
# On success, logs events to Helicone # On success, logs events to Helicone
import dotenv, os import dotenv, os
import requests import requests # type: ignore
import litellm import litellm
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv

View file

@ -1,15 +1,14 @@
#### What this does #### #### What this does ####
# On success, logs events to Langsmith # On success, logs events to Langsmith
import dotenv, os import dotenv, os # type: ignore
import requests import requests # type: ignore
import requests
from datetime import datetime from datetime import datetime
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
import asyncio import asyncio
import types import types
from pydantic import BaseModel from pydantic import BaseModel # type: ignore
def is_serializable(value): def is_serializable(value):
@ -79,8 +78,6 @@ class LangsmithLogger:
except: except:
response_obj = response_obj.dict() # type: ignore response_obj = response_obj.dict() # type: ignore
print(f"response_obj: {response_obj}")
data = { data = {
"name": run_name, "name": run_name,
"run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain" "run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
@ -90,7 +87,6 @@ class LangsmithLogger:
"start_time": start_time, "start_time": start_time,
"end_time": end_time, "end_time": end_time,
} }
print(f"data: {data}")
response = requests.post( response = requests.post(
"https://api.smith.langchain.com/runs", "https://api.smith.langchain.com/runs",

View file

@ -2,7 +2,6 @@
## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268 ## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268
import dotenv, os, json import dotenv, os, json
import requests
import litellm import litellm
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
@ -60,7 +59,7 @@ class OpenMeterLogger(CustomLogger):
"total_tokens": response_obj["usage"].get("total_tokens"), "total_tokens": response_obj["usage"].get("total_tokens"),
} }
subject = kwargs.get("user", None), # end-user passed in via 'user' param subject = (kwargs.get("user", None),) # end-user passed in via 'user' param
if not subject: if not subject:
raise Exception("OpenMeter: user is required") raise Exception("OpenMeter: user is required")

View file

@ -3,7 +3,7 @@
# On success, log events to Prometheus # On success, log events to Prometheus
import dotenv, os import dotenv, os
import requests import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
@ -19,7 +19,6 @@ class PrometheusLogger:
**kwargs, **kwargs,
): ):
try: try:
print(f"in init prometheus metrics")
from prometheus_client import Counter from prometheus_client import Counter
self.litellm_llm_api_failed_requests_metric = Counter( self.litellm_llm_api_failed_requests_metric = Counter(

View file

@ -4,7 +4,7 @@
import dotenv, os import dotenv, os
import requests import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
@ -183,7 +183,6 @@ class PrometheusServicesLogger:
) )
async def async_service_failure_hook(self, payload: ServiceLoggerPayload): async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
print(f"received error payload: {payload.error}")
if self.mock_testing: if self.mock_testing:
self.mock_testing_failure_calls += 1 self.mock_testing_failure_calls += 1

View file

@ -1,12 +1,13 @@
#### What this does #### #### What this does ####
# On success, logs events to Promptlayer # On success, logs events to Promptlayer
import dotenv, os import dotenv, os
import requests import requests # type: ignore
from pydantic import BaseModel from pydantic import BaseModel
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback
class PromptLayerLogger: class PromptLayerLogger:
# Class variables or attributes # Class variables or attributes
def __init__(self): def __init__(self):
@ -32,7 +33,11 @@ class PromptLayerLogger:
tags = kwargs["litellm_params"]["metadata"]["pl_tags"] tags = kwargs["litellm_params"]["metadata"]["pl_tags"]
# Remove "pl_tags" from metadata # Remove "pl_tags" from metadata
metadata = {k:v for k, v in kwargs["litellm_params"]["metadata"].items() if k != "pl_tags"} metadata = {
k: v
for k, v in kwargs["litellm_params"]["metadata"].items()
if k != "pl_tags"
}
print_verbose( print_verbose(
f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}" f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"

View file

@ -2,7 +2,6 @@
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import dotenv, os import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback

View file

@ -2,7 +2,7 @@
# On success + failure, log events to Supabase # On success + failure, log events to Supabase
import dotenv, os import dotenv, os
import requests import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
import traceback import traceback

View file

@ -1,8 +1,8 @@
import os, types, traceback import os, types, traceback
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time, httpx import time, httpx # type: ignore
from typing import Callable, Optional from typing import Callable, Optional
from litellm.utils import ModelResponse, Choices, Message from litellm.utils import ModelResponse, Choices, Message
import litellm import litellm

View file

@ -1,12 +1,12 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional from typing import Callable, Optional
import litellm import litellm
from litellm.utils import ModelResponse, Choices, Message, Usage from litellm.utils import ModelResponse, Choices, Message, Usage
import httpx import httpx # type: ignore
class AlephAlphaError(Exception): class AlephAlphaError(Exception):

View file

@ -1,7 +1,7 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests, copy import requests, copy # type: ignore
import time import time
from typing import Callable, Optional, List from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
@ -9,7 +9,7 @@ import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from .base import BaseLLM from .base import BaseLLM
import httpx import httpx # type: ignore
class AnthropicConstants(Enum): class AnthropicConstants(Enum):

View file

@ -1,4 +1,4 @@
from typing import Optional, Union, Any from typing import Optional, Union, Any, Literal
import types, requests import types, requests
from .base import BaseLLM from .base import BaseLLM
from litellm.utils import ( from litellm.utils import (
@ -12,7 +12,7 @@ from litellm.utils import (
from typing import Callable, Optional, BinaryIO from typing import Callable, Optional, BinaryIO
from litellm import OpenAIConfig from litellm import OpenAIConfig
import litellm, json import litellm, json
import httpx import httpx # type: ignore
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
from openai import AzureOpenAI, AsyncAzureOpenAI from openai import AzureOpenAI, AsyncAzureOpenAI
import uuid import uuid
@ -952,6 +952,81 @@ class AzureChatCompletion(BaseLLM):
) )
raise e raise e
def get_headers(
self,
model: Optional[str],
api_key: str,
api_base: str,
api_version: str,
timeout: float,
mode: str,
messages: Optional[list] = None,
input: Optional[list] = None,
prompt: Optional[str] = None,
) -> dict:
client_session = litellm.client_session or httpx.Client(
transport=CustomHTTPTransport(), # handle dall-e-2 calls
)
if "gateway.ai.cloudflare.com" in api_base:
## build base url - assume api base includes resource name
if not api_base.endswith("/"):
api_base += "/"
api_base += f"{model}"
client = AzureOpenAI(
base_url=api_base,
api_version=api_version,
api_key=api_key,
timeout=timeout,
http_client=client_session,
)
model = None
# cloudflare ai gateway, needs model=None
else:
client = AzureOpenAI(
api_version=api_version,
azure_endpoint=api_base,
api_key=api_key,
timeout=timeout,
http_client=client_session,
)
# only run this check if it's not cloudflare ai gateway
if model is None and mode != "image_generation":
raise Exception("model is not set")
completion = None
if messages is None:
messages = [{"role": "user", "content": "Hey"}]
try:
completion = client.chat.completions.with_raw_response.create(
model=model, # type: ignore
messages=messages, # type: ignore
)
except Exception as e:
raise e
response = {}
if completion is None or not hasattr(completion, "headers"):
raise Exception("invalid completion response")
if (
completion.headers.get("x-ratelimit-remaining-requests", None) is not None
): # not provided for dall-e requests
response["x-ratelimit-remaining-requests"] = completion.headers[
"x-ratelimit-remaining-requests"
]
if completion.headers.get("x-ratelimit-remaining-tokens", None) is not None:
response["x-ratelimit-remaining-tokens"] = completion.headers[
"x-ratelimit-remaining-tokens"
]
if completion.headers.get("x-ms-region", None) is not None:
response["x-ms-region"] = completion.headers["x-ms-region"]
return response
async def ahealth_check( async def ahealth_check(
self, self,
model: Optional[str], model: Optional[str],
@ -963,7 +1038,7 @@ class AzureChatCompletion(BaseLLM):
messages: Optional[list] = None, messages: Optional[list] = None,
input: Optional[list] = None, input: Optional[list] = None,
prompt: Optional[str] = None, prompt: Optional[str] = None,
): ) -> dict:
client_session = litellm.aclient_session or httpx.AsyncClient( client_session = litellm.aclient_session or httpx.AsyncClient(
transport=AsyncCustomHTTPTransport(), # handle dall-e-2 calls transport=AsyncCustomHTTPTransport(), # handle dall-e-2 calls
) )
@ -1040,4 +1115,8 @@ class AzureChatCompletion(BaseLLM):
response["x-ratelimit-remaining-tokens"] = completion.headers[ response["x-ratelimit-remaining-tokens"] = completion.headers[
"x-ratelimit-remaining-tokens" "x-ratelimit-remaining-tokens"
] ]
if completion.headers.get("x-ms-region", None) is not None:
response["x-ms-region"] = completion.headers["x-ms-region"]
return response return response

View file

@ -1,5 +1,5 @@
from typing import Optional, Union, Any from typing import Optional, Union, Any
import types, requests import types, requests # type: ignore
from .base import BaseLLM from .base import BaseLLM
from litellm.utils import ( from litellm.utils import (
ModelResponse, ModelResponse,

View file

@ -1,7 +1,7 @@
import os import os
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time import time
from typing import Callable from typing import Callable
from litellm.utils import ModelResponse, Usage from litellm.utils import ModelResponse, Usage

View file

@ -163,10 +163,9 @@ class AmazonAnthropicClaude3Config:
"stop", "stop",
"temperature", "temperature",
"top_p", "top_p",
"extra_headers" "extra_headers",
] ]
def map_openai_params(self, non_default_params: dict, optional_params: dict): def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items(): for param, value in non_default_params.items():
if param == "max_tokens": if param == "max_tokens":
@ -534,10 +533,12 @@ class AmazonStabilityConfig:
def add_custom_header(headers): def add_custom_header(headers):
"""Closure to capture the headers and add them.""" """Closure to capture the headers and add them."""
def callback(request, **kwargs): def callback(request, **kwargs):
"""Actual callback function that Boto3 will call.""" """Actual callback function that Boto3 will call."""
for header_name, header_value in headers.items(): for header_name, header_value in headers.items():
request.headers.add_header(header_name, header_value) request.headers.add_header(header_name, header_value)
return callback return callback
@ -672,7 +673,9 @@ def init_bedrock_client(
config=config, config=config,
) )
if extra_headers: if extra_headers:
client.meta.events.register('before-sign.bedrock-runtime.*', add_custom_header(extra_headers)) client.meta.events.register(
"before-sign.bedrock-runtime.*", add_custom_header(extra_headers)
)
return client return client
@ -1224,7 +1227,7 @@ def _embedding_func_single(
"input_type", "search_document" "input_type", "search_document"
) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3 ) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
data = {"texts": [input], **inference_params} # type: ignore data = {"texts": [input], **inference_params} # type: ignore
body = json.dumps(data).encode("utf-8") body = json.dumps(data).encode("utf-8") # type: ignore
## LOGGING ## LOGGING
request_str = f""" request_str = f"""
response = client.invoke_model( response = client.invoke_model(
@ -1416,7 +1419,7 @@ def image_generation(
## LOGGING ## LOGGING
request_str = f""" request_str = f"""
response = client.invoke_model( response = client.invoke_model(
body={body}, body={body}, # type: ignore
modelId={modelId}, modelId={modelId},
accept="application/json", accept="application/json",
contentType="application/json", contentType="application/json",

View file

@ -1,11 +1,11 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional from typing import Callable, Optional
import litellm import litellm
import httpx import httpx # type: ignore
from litellm.utils import ModelResponse, Usage from litellm.utils import ModelResponse, Usage
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt

View file

@ -1,12 +1,12 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time, traceback import time, traceback
from typing import Callable, Optional from typing import Callable, Optional
from litellm.utils import ModelResponse, Choices, Message, Usage from litellm.utils import ModelResponse, Choices, Message, Usage
import litellm import litellm
import httpx import httpx # type: ignore
class CohereError(Exception): class CohereError(Exception):

View file

@ -1,12 +1,12 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time, traceback import time, traceback
from typing import Callable, Optional from typing import Callable, Optional
from litellm.utils import ModelResponse, Choices, Message, Usage from litellm.utils import ModelResponse, Choices, Message, Usage
import litellm import litellm
import httpx import httpx # type: ignore
from .prompt_templates.factory import cohere_message_pt from .prompt_templates.factory import cohere_message_pt

View file

@ -1,7 +1,7 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time, traceback import time, traceback
from typing import Callable, Optional, List from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Choices, Message, Usage from litellm.utils import ModelResponse, Choices, Message, Usage

View file

@ -1,7 +1,7 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional from typing import Callable, Optional
import litellm import litellm

View file

@ -1,10 +1,10 @@
from itertools import chain from itertools import chain
import requests, types, time import requests, types, time # type: ignore
import json, uuid import json, uuid
import traceback import traceback
from typing import Optional from typing import Optional
import litellm import litellm
import httpx, aiohttp, asyncio import httpx, aiohttp, asyncio # type: ignore
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
@ -245,7 +245,10 @@ def get_ollama_response(
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, "function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function", "type": "function",
} }
], ],
@ -257,7 +260,9 @@ def get_ollama_response(
model_response["created"] = int(time.time()) model_response["created"] = int(time.time())
model_response["model"] = "ollama/" + model model_response["model"] = "ollama/" + model
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", ""))) completion_tokens = response_json.get(
"eval_count", len(response_json.get("message", dict()).get("content", ""))
)
model_response["usage"] = litellm.Usage( model_response["usage"] = litellm.Usage(
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
@ -298,7 +303,10 @@ def ollama_completion_stream(url, data, logging_obj):
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, "function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function", "type": "function",
} }
], ],
@ -339,9 +347,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
first_chunk_content = first_chunk.choices[0].delta.content or "" first_chunk_content = first_chunk.choices[0].delta.content or ""
response_content = first_chunk_content + "".join( response_content = first_chunk_content + "".join(
[ [
chunk.choices[0].delta.content chunk.choices[0].delta.content
async for chunk in streamwrapper async for chunk in streamwrapper
if chunk.choices[0].delta.content] if chunk.choices[0].delta.content
]
) )
function_call = json.loads(response_content) function_call = json.loads(response_content)
delta = litellm.utils.Delta( delta = litellm.utils.Delta(
@ -349,7 +358,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, "function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function", "type": "function",
} }
], ],
@ -398,7 +410,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
tool_calls=[ tool_calls=[
{ {
"id": f"call_{str(uuid.uuid4())}", "id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])}, "function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function", "type": "function",
} }
], ],
@ -412,7 +427,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
model_response["created"] = int(time.time()) model_response["created"] = int(time.time())
model_response["model"] = "ollama/" + data["model"] model_response["model"] = "ollama/" + data["model"]
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", ""))) completion_tokens = response_json.get(
"eval_count",
len(response_json.get("message", dict()).get("content", "")),
)
model_response["usage"] = litellm.Usage( model_response["usage"] = litellm.Usage(
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
@ -500,6 +518,7 @@ async def ollama_aembeddings(
} }
return model_response return model_response
def ollama_embeddings( def ollama_embeddings(
api_base: str, api_base: str,
model: str, model: str,
@ -517,5 +536,6 @@ def ollama_embeddings(
optional_params, optional_params,
logging_obj, logging_obj,
model_response, model_response,
encoding) encoding,
) )
)

View file

@ -1,7 +1,7 @@
import os import os
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional from typing import Callable, Optional
from litellm.utils import ModelResponse, Usage from litellm.utils import ModelResponse, Usage

View file

@ -22,7 +22,6 @@ from litellm.utils import (
TextCompletionResponse, TextCompletionResponse,
) )
from typing import Callable, Optional from typing import Callable, Optional
import aiohttp, requests
import litellm import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
from openai import OpenAI, AsyncOpenAI from openai import OpenAI, AsyncOpenAI
@ -531,6 +530,7 @@ class OpenAIChatCompletion(BaseLLM):
model=model, model=model,
custom_llm_provider="openai", custom_llm_provider="openai",
logging_obj=logging_obj, logging_obj=logging_obj,
stream_options=data.get("stream_options", None),
) )
return streamwrapper return streamwrapper
@ -580,6 +580,7 @@ class OpenAIChatCompletion(BaseLLM):
model=model, model=model,
custom_llm_provider="openai", custom_llm_provider="openai",
logging_obj=logging_obj, logging_obj=logging_obj,
stream_options=data.get("stream_options", None),
) )
return streamwrapper return streamwrapper
except ( except (

View file

@ -1,7 +1,7 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional from typing import Callable, Optional
import litellm import litellm

View file

@ -981,7 +981,7 @@ def anthropic_messages_pt(messages: list):
# add role=tool support to allow function call result/error submission # add role=tool support to allow function call result/error submission
user_message_types = {"user", "tool", "function"} user_message_types = {"user", "tool", "function"}
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them. # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
new_messages = [] new_messages: list = []
msg_i = 0 msg_i = 0
tool_use_param = False tool_use_param = False
while msg_i < len(messages): while msg_i < len(messages):

View file

@ -1,11 +1,11 @@
import os, types import os, types
import json import json
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional from typing import Callable, Optional
from litellm.utils import ModelResponse, Usage from litellm.utils import ModelResponse, Usage
import litellm import litellm
import httpx import httpx # type: ignore
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt

View file

@ -1,14 +1,14 @@
import os, types, traceback import os, types, traceback
from enum import Enum from enum import Enum
import json import json
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional, Any from typing import Callable, Optional, Any
import litellm import litellm
from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage
import sys import sys
from copy import deepcopy from copy import deepcopy
import httpx import httpx # type: ignore
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt
@ -295,7 +295,7 @@ def completion(
EndpointName={model}, EndpointName={model},
InferenceComponentName={model_id}, InferenceComponentName={model_id},
ContentType="application/json", ContentType="application/json",
Body={data}, Body={data}, # type: ignore
CustomAttributes="accept_eula=true", CustomAttributes="accept_eula=true",
) )
""" # type: ignore """ # type: ignore
@ -321,7 +321,7 @@ def completion(
response = client.invoke_endpoint( response = client.invoke_endpoint(
EndpointName={model}, EndpointName={model},
ContentType="application/json", ContentType="application/json",
Body={data}, Body={data}, # type: ignore
CustomAttributes="accept_eula=true", CustomAttributes="accept_eula=true",
) )
""" # type: ignore """ # type: ignore
@ -688,7 +688,7 @@ def embedding(
response = client.invoke_endpoint( response = client.invoke_endpoint(
EndpointName={model}, EndpointName={model},
ContentType="application/json", ContentType="application/json",
Body={data}, Body={data}, # type: ignore
CustomAttributes="accept_eula=true", CustomAttributes="accept_eula=true",
)""" # type: ignore )""" # type: ignore
logging_obj.pre_call( logging_obj.pre_call(

View file

@ -6,11 +6,11 @@ Reference: https://docs.together.ai/docs/openai-api-compatibility
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional from typing import Callable, Optional
import litellm import litellm
import httpx import httpx # type: ignore
from litellm.utils import ModelResponse, Usage from litellm.utils import ModelResponse, Usage
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt

View file

@ -1,12 +1,12 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time import time
from typing import Callable, Optional, Union, List from typing import Callable, Optional, Union, List
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
import litellm, uuid import litellm, uuid
import httpx, inspect import httpx, inspect # type: ignore
class VertexAIError(Exception): class VertexAIError(Exception):

View file

@ -3,7 +3,7 @@
import os, types import os, types
import json import json
from enum import Enum from enum import Enum
import requests, copy import requests, copy # type: ignore
import time, uuid import time, uuid
from typing import Callable, Optional, List from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
@ -17,7 +17,7 @@ from .prompt_templates.factory import (
extract_between_tags, extract_between_tags,
parse_xml_params, parse_xml_params,
) )
import httpx import httpx # type: ignore
class VertexAIError(Exception): class VertexAIError(Exception):

View file

@ -1,8 +1,8 @@
import os import os
import json import json
from enum import Enum from enum import Enum
import requests import requests # type: ignore
import time, httpx import time, httpx # type: ignore
from typing import Callable, Any from typing import Callable, Any
from litellm.utils import ModelResponse, Usage from litellm.utils import ModelResponse, Usage
from .prompt_templates.factory import prompt_factory, custom_prompt from .prompt_templates.factory import prompt_factory, custom_prompt

View file

@ -3,8 +3,8 @@ import json, types, time # noqa: E401
from contextlib import contextmanager from contextlib import contextmanager
from typing import Callable, Dict, Optional, Any, Union, List from typing import Callable, Dict, Optional, Any, Union, List
import httpx import httpx # type: ignore
import requests import requests # type: ignore
import litellm import litellm
from litellm.utils import ModelResponse, get_secret, Usage from litellm.utils import ModelResponse, get_secret, Usage

View file

@ -187,6 +187,7 @@ async def acompletion(
top_p: Optional[float] = None, top_p: Optional[float] = None,
n: Optional[int] = None, n: Optional[int] = None,
stream: Optional[bool] = None, stream: Optional[bool] = None,
stream_options: Optional[dict] = None,
stop=None, stop=None,
max_tokens: Optional[int] = None, max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
@ -206,6 +207,7 @@ async def acompletion(
api_version: Optional[str] = None, api_version: Optional[str] = None,
api_key: Optional[str] = None, api_key: Optional[str] = None,
model_list: Optional[list] = None, # pass in a list of api_base,keys, etc. model_list: Optional[list] = None, # pass in a list of api_base,keys, etc.
extra_headers: Optional[dict] = None,
# Optional liteLLM function params # Optional liteLLM function params
**kwargs, **kwargs,
): ):
@ -223,6 +225,7 @@ async def acompletion(
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0). top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
n (int, optional): The number of completions to generate (default is 1). n (int, optional): The number of completions to generate (default is 1).
stream (bool, optional): If True, return a streaming response (default is False). stream (bool, optional): If True, return a streaming response (default is False).
stream_options (dict, optional): A dictionary containing options for the streaming response. Only use this if stream is True.
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens. stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity). max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far. presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
@ -260,6 +263,7 @@ async def acompletion(
"top_p": top_p, "top_p": top_p,
"n": n, "n": n,
"stream": stream, "stream": stream,
"stream_options": stream_options,
"stop": stop, "stop": stop,
"max_tokens": max_tokens, "max_tokens": max_tokens,
"presence_penalty": presence_penalty, "presence_penalty": presence_penalty,
@ -457,6 +461,7 @@ def completion(
top_p: Optional[float] = None, top_p: Optional[float] = None,
n: Optional[int] = None, n: Optional[int] = None,
stream: Optional[bool] = None, stream: Optional[bool] = None,
stream_options: Optional[dict] = None,
stop=None, stop=None,
max_tokens: Optional[int] = None, max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None, presence_penalty: Optional[float] = None,
@ -496,6 +501,7 @@ def completion(
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0). top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0).
n (int, optional): The number of completions to generate (default is 1). n (int, optional): The number of completions to generate (default is 1).
stream (bool, optional): If True, return a streaming response (default is False). stream (bool, optional): If True, return a streaming response (default is False).
stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true.
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens. stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens.
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity). max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity).
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far. presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far.
@ -573,6 +579,7 @@ def completion(
"top_p", "top_p",
"n", "n",
"stream", "stream",
"stream_options",
"stop", "stop",
"max_tokens", "max_tokens",
"presence_penalty", "presence_penalty",
@ -648,6 +655,8 @@ def completion(
"base_model", "base_model",
"stream_timeout", "stream_timeout",
"supports_system_message", "supports_system_message",
"region_name",
"allowed_model_region",
] ]
default_params = openai_params + litellm_params default_params = openai_params + litellm_params
non_default_params = { non_default_params = {
@ -783,6 +792,7 @@ def completion(
top_p=top_p, top_p=top_p,
n=n, n=n,
stream=stream, stream=stream,
stream_options=stream_options,
stop=stop, stop=stop,
max_tokens=max_tokens, max_tokens=max_tokens,
presence_penalty=presence_penalty, presence_penalty=presence_penalty,
@ -2716,6 +2726,8 @@ def embedding(
"ttl", "ttl",
"cache", "cache",
"no-log", "no-log",
"region_name",
"allowed_model_region",
] ]
default_params = openai_params + litellm_params default_params = openai_params + litellm_params
non_default_params = { non_default_params = {
@ -3589,6 +3601,8 @@ def image_generation(
"caching_groups", "caching_groups",
"ttl", "ttl",
"cache", "cache",
"region_name",
"allowed_model_region",
] ]
default_params = openai_params + litellm_params default_params = openai_params + litellm_params
non_default_params = { non_default_params = {

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]); (self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/7de0c97d470f519f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}(); !function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""] 3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -458,6 +458,27 @@ class UpdateUserRequest(GenerateRequestBase):
return values return values
class NewEndUserRequest(LiteLLMBase):
user_id: str
alias: Optional[str] = None # human-friendly alias
blocked: bool = False # allow/disallow requests for this end-user
max_budget: Optional[float] = None
budget_id: Optional[str] = None # give either a budget_id or max_budget
allowed_model_region: Optional[Literal["eu"]] = (
None # require all user requests to use models in this specific region
)
default_model: Optional[str] = (
None # if no equivalent model in allowed region - default all requests to this model
)
@root_validator(pre=True)
def check_user_info(cls, values):
if values.get("max_budget") is not None and values.get("budget_id") is not None:
raise ValueError("Set either 'max_budget' or 'budget_id', not both.")
return values
class Member(LiteLLMBase): class Member(LiteLLMBase):
role: Literal["admin", "user"] role: Literal["admin", "user"]
user_id: Optional[str] = None user_id: Optional[str] = None
@ -494,6 +515,8 @@ class NewTeamRequest(TeamBase):
class GlobalEndUsersSpend(LiteLLMBase): class GlobalEndUsersSpend(LiteLLMBase):
api_key: Optional[str] = None api_key: Optional[str] = None
startTime: Optional[datetime] = None
endTime: Optional[datetime] = None
class TeamMemberAddRequest(LiteLLMBase): class TeamMemberAddRequest(LiteLLMBase):
@ -836,6 +859,7 @@ class UserAPIKeyAuth(
api_key: Optional[str] = None api_key: Optional[str] = None
user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None user_role: Optional[Literal["proxy_admin", "app_owner", "app_user"]] = None
allowed_model_region: Optional[Literal["eu"]] = None
@root_validator(pre=True) @root_validator(pre=True)
def check_api_key(cls, values): def check_api_key(cls, values):
@ -881,6 +905,8 @@ class LiteLLM_EndUserTable(LiteLLMBase):
blocked: bool blocked: bool
alias: Optional[str] = None alias: Optional[str] = None
spend: float = 0.0 spend: float = 0.0
allowed_model_region: Optional[Literal["eu"]] = None
default_model: Optional[str] = None
litellm_budget_table: Optional[LiteLLM_BudgetTable] = None litellm_budget_table: Optional[LiteLLM_BudgetTable] = None
@root_validator(pre=True) @root_validator(pre=True)

View file

@ -208,7 +208,9 @@ async def get_end_user_object(
return None return None
# check if in cache # check if in cache
cached_user_obj = user_api_key_cache.async_get_cache(key=end_user_id) cached_user_obj = user_api_key_cache.async_get_cache(
key="end_user_id:{}".format(end_user_id)
)
if cached_user_obj is not None: if cached_user_obj is not None:
if isinstance(cached_user_obj, dict): if isinstance(cached_user_obj, dict):
return LiteLLM_EndUserTable(**cached_user_obj) return LiteLLM_EndUserTable(**cached_user_obj)
@ -223,7 +225,14 @@ async def get_end_user_object(
if response is None: if response is None:
raise Exception raise Exception
return LiteLLM_EndUserTable(**response.dict()) # save the end-user object to cache
await user_api_key_cache.async_set_cache(
key="end_user_id:{}".format(end_user_id), value=response
)
_response = LiteLLM_EndUserTable(**response.dict())
return _response
except Exception as e: # if end-user not in db except Exception as e: # if end-user not in db
return None return None

View file

@ -252,7 +252,7 @@ def run_server(
if model and "ollama" in model and api_base is None: if model and "ollama" in model and api_base is None:
run_ollama_serve() run_ollama_serve()
if test_async is True: if test_async is True:
import requests, concurrent, time import requests, concurrent, time # type: ignore
api_base = f"http://{host}:{port}" api_base = f"http://{host}:{port}"
@ -418,7 +418,7 @@ def run_server(
read from there and save it to os.env['DATABASE_URL'] read from there and save it to os.env['DATABASE_URL']
""" """
try: try:
import yaml, asyncio import yaml, asyncio # type: ignore
except: except:
raise ImportError( raise ImportError(
"yaml needs to be imported. Run - `pip install 'litellm[proxy]'`" "yaml needs to be imported. Run - `pip install 'litellm[proxy]'`"

View file

@ -30,7 +30,7 @@ sys.path.insert(
try: try:
import fastapi import fastapi
import backoff import backoff
import yaml import yaml # type: ignore
import orjson import orjson
import logging import logging
from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.schedulers.asyncio import AsyncIOScheduler
@ -231,6 +231,11 @@ class SpecialModelNames(enum.Enum):
all_team_models = "all-team-models" all_team_models = "all-team-models"
class CommonProxyErrors(enum.Enum):
db_not_connected_error = "DB not connected"
no_llm_router = "No models configured on proxy"
@app.exception_handler(ProxyException) @app.exception_handler(ProxyException)
async def openai_exception_handler(request: Request, exc: ProxyException): async def openai_exception_handler(request: Request, exc: ProxyException):
# NOTE: DO NOT MODIFY THIS, its crucial to map to Openai exceptions # NOTE: DO NOT MODIFY THIS, its crucial to map to Openai exceptions
@ -467,10 +472,6 @@ async def user_api_key_auth(
prisma_client=prisma_client, prisma_client=prisma_client,
user_api_key_cache=user_api_key_cache, user_api_key_cache=user_api_key_cache,
) )
# save the end-user object to cache
await user_api_key_cache.async_set_cache(
key=end_user_id, value=end_user_object
)
global_proxy_spend = None global_proxy_spend = None
if litellm.max_budget > 0: # user set proxy max budget if litellm.max_budget > 0: # user set proxy max budget
@ -952,13 +953,16 @@ async def user_api_key_auth(
_end_user_object = None _end_user_object = None
if "user" in request_data: if "user" in request_data:
_id = "end_user_id:{}".format(request_data["user"]) _end_user_object = await get_end_user_object(
_end_user_object = await user_api_key_cache.async_get_cache(key=_id) end_user_id=request_data["user"],
if _end_user_object is not None: prisma_client=prisma_client,
_end_user_object = LiteLLM_EndUserTable(**_end_user_object) user_api_key_cache=user_api_key_cache,
)
global_proxy_spend = None global_proxy_spend = None
if litellm.max_budget > 0: # user set proxy max budget if (
litellm.max_budget > 0 and prisma_client is not None
): # user set proxy max budget
# check cache # check cache
global_proxy_spend = await user_api_key_cache.async_get_cache( global_proxy_spend = await user_api_key_cache.async_get_cache(
key="{}:spend".format(litellm_proxy_admin_name) key="{}:spend".format(litellm_proxy_admin_name)
@ -1011,6 +1015,12 @@ async def user_api_key_auth(
) )
valid_token_dict = _get_pydantic_json_dict(valid_token) valid_token_dict = _get_pydantic_json_dict(valid_token)
valid_token_dict.pop("token", None) valid_token_dict.pop("token", None)
if _end_user_object is not None:
valid_token_dict["allowed_model_region"] = (
_end_user_object.allowed_model_region
)
""" """
asyncio create task to update the user api key cache with the user db table as well asyncio create task to update the user api key cache with the user db table as well
@ -1035,10 +1045,7 @@ async def user_api_key_auth(
# check if user can access this route # check if user can access this route
query_params = request.query_params query_params = request.query_params
key = query_params.get("key") key = query_params.get("key")
if ( if key is not None and hash_token(token=key) != api_key:
key is not None
and prisma_client.hash_token(token=key) != api_key
):
raise HTTPException( raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN, status_code=status.HTTP_403_FORBIDDEN,
detail="user not allowed to access this key's info", detail="user not allowed to access this key's info",
@ -1091,6 +1098,7 @@ async def user_api_key_auth(
# sso/login, ui/login, /key functions and /user functions # sso/login, ui/login, /key functions and /user functions
# this will never be allowed to call /chat/completions # this will never be allowed to call /chat/completions
token_team = getattr(valid_token, "team_id", None) token_team = getattr(valid_token, "team_id", None)
if token_team is not None and token_team == "litellm-dashboard": if token_team is not None and token_team == "litellm-dashboard":
# this token is only used for managing the ui # this token is only used for managing the ui
allowed_routes = [ allowed_routes = [
@ -3612,6 +3620,10 @@ async def chat_completion(
**data, **data,
} # add the team-specific configs to the completion call } # add the team-specific configs to the completion call
### END-USER SPECIFIC PARAMS ###
if user_api_key_dict.allowed_model_region is not None:
data["allowed_model_region"] = user_api_key_dict.allowed_model_region
global user_temperature, user_request_timeout, user_max_tokens, user_api_base global user_temperature, user_request_timeout, user_max_tokens, user_api_base
# override with user settings, these are params passed via cli # override with user settings, these are params passed via cli
if user_temperature: if user_temperature:
@ -3719,6 +3731,7 @@ async def chat_completion(
"x-litellm-model-id": model_id, "x-litellm-model-id": model_id,
"x-litellm-cache-key": cache_key, "x-litellm-cache-key": cache_key,
"x-litellm-model-api-base": api_base, "x-litellm-model-api-base": api_base,
"x-litellm-version": version,
} }
selected_data_generator = select_data_generator( selected_data_generator = select_data_generator(
response=response, response=response,
@ -3734,6 +3747,7 @@ async def chat_completion(
fastapi_response.headers["x-litellm-model-id"] = model_id fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
### CALL HOOKS ### - modify outgoing data ### CALL HOOKS ### - modify outgoing data
response = await proxy_logging_obj.post_call_success_hook( response = await proxy_logging_obj.post_call_success_hook(
@ -3890,14 +3904,10 @@ async def completion(
}, },
) )
if hasattr(response, "_hidden_params"): hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = response._hidden_params.get("model_id", None) or "" model_id = hidden_params.get("model_id", None) or ""
original_response = ( cache_key = hidden_params.get("cache_key", None) or ""
response._hidden_params.get("original_response", None) or "" api_base = hidden_params.get("api_base", None) or ""
)
else:
model_id = ""
original_response = ""
verbose_proxy_logger.debug("final response: %s", response) verbose_proxy_logger.debug("final response: %s", response)
if ( if (
@ -3905,6 +3915,9 @@ async def completion(
): # use generate_responses to stream responses ): # use generate_responses to stream responses
custom_headers = { custom_headers = {
"x-litellm-model-id": model_id, "x-litellm-model-id": model_id,
"x-litellm-cache-key": cache_key,
"x-litellm-model-api-base": api_base,
"x-litellm-version": version,
} }
selected_data_generator = select_data_generator( selected_data_generator = select_data_generator(
response=response, response=response,
@ -3919,6 +3932,10 @@ async def completion(
) )
fastapi_response.headers["x-litellm-model-id"] = model_id fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting data["litellm_status"] = "fail" # used for alerting
@ -3958,6 +3975,7 @@ async def completion(
) # azure compatible endpoint ) # azure compatible endpoint
async def embeddings( async def embeddings(
request: Request, request: Request,
fastapi_response: Response,
model: Optional[str] = None, model: Optional[str] = None,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
@ -4104,6 +4122,17 @@ async def embeddings(
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting data["litellm_status"] = "success" # used for alerting
### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting data["litellm_status"] = "fail" # used for alerting
@ -4142,6 +4171,7 @@ async def embeddings(
) )
async def image_generation( async def image_generation(
request: Request, request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
global proxy_logging_obj global proxy_logging_obj
@ -4261,6 +4291,17 @@ async def image_generation(
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting data["litellm_status"] = "success" # used for alerting
### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting data["litellm_status"] = "fail" # used for alerting
@ -4297,6 +4338,7 @@ async def image_generation(
) )
async def audio_transcriptions( async def audio_transcriptions(
request: Request, request: Request,
fastapi_response: Response,
file: UploadFile = File(...), file: UploadFile = File(...),
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
@ -4441,6 +4483,18 @@ async def audio_transcriptions(
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting data["litellm_status"] = "success" # used for alerting
### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting data["litellm_status"] = "fail" # used for alerting
@ -4480,6 +4534,7 @@ async def audio_transcriptions(
) )
async def moderations( async def moderations(
request: Request, request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth), user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
): ):
""" """
@ -4604,6 +4659,17 @@ async def moderations(
### ALERTING ### ### ALERTING ###
data["litellm_status"] = "success" # used for alerting data["litellm_status"] = "success" # used for alerting
### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response return response
except Exception as e: except Exception as e:
data["litellm_status"] = "fail" # used for alerting data["litellm_status"] = "fail" # used for alerting
@ -5809,35 +5875,38 @@ async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
if prisma_client is None: if prisma_client is None:
raise HTTPException(status_code=500, detail={"error": "No db connected"}) raise HTTPException(status_code=500, detail={"error": "No db connected"})
if data is None: """
sql_query = f"""SELECT * FROM "Last30dTopEndUsersSpend";""" Gets the top 100 end-users for a given api key
"""
startTime = None
endTime = None
selected_api_key = None
if data is not None:
startTime = data.startTime
endTime = data.endTime
selected_api_key = data.api_key
response = await prisma_client.db.query_raw(query=sql_query) startTime = startTime or datetime.now() - timedelta(days=30)
else: endTime = endTime or datetime.now()
"""
Gets the top 100 end-users for a given api key sql_query = """
""" SELECT end_user, COUNT(*) AS total_count, SUM(spend) AS total_spend
current_date = datetime.now() FROM "LiteLLM_SpendLogs"
past_date = current_date - timedelta(days=30) WHERE "startTime" >= $1::timestamp
response = await prisma_client.db.litellm_spendlogs.group_by( # type: ignore AND "startTime" < $2::timestamp
by=["end_user"], AND (
where={ CASE
"AND": [{"startTime": {"gte": past_date}}, {"api_key": data.api_key}] # type: ignore WHEN $3::TEXT IS NULL THEN TRUE
}, ELSE api_key = $3
sum={"spend": True}, END
order={"_sum": {"spend": "desc"}}, # type: ignore )
take=100, GROUP BY end_user
count=True, ORDER BY total_spend DESC
) LIMIT 100
if response is not None and isinstance(response, list): """
new_response = [] response = await prisma_client.db.query_raw(
for r in response: sql_query, startTime, endTime, selected_api_key
new_r = r )
new_r["total_spend"] = r["_sum"]["spend"]
new_r["total_count"] = r["_count"]["_all"]
new_r.pop("_sum")
new_r.pop("_count")
new_response.append(new_r)
return response return response
@ -5883,7 +5952,7 @@ async def global_predict_spend_logs(request: Request):
return _forecast_daily_cost(data) return _forecast_daily_cost(data)
#### USER MANAGEMENT #### #### INTERNAL USER MANAGEMENT ####
@router.post( @router.post(
"/user/new", "/user/new",
tags=["user management"], tags=["user management"],
@ -6376,6 +6445,43 @@ async def user_get_requests():
) )
@router.get(
"/user/get_users",
tags=["user management"],
dependencies=[Depends(user_api_key_auth)],
)
async def get_users(
role: str = fastapi.Query(
default=None,
description="Either 'proxy_admin', 'proxy_viewer', 'app_owner', 'app_user'",
)
):
"""
[BETA] This could change without notice. Give feedback - https://github.com/BerriAI/litellm/issues
Get all users who are a specific `user_role`.
Used by the UI to populate the user lists.
Currently - admin-only endpoint.
"""
global prisma_client
if prisma_client is None:
raise HTTPException(
status_code=500,
detail={"error": f"No db connected. prisma client={prisma_client}"},
)
all_users = await prisma_client.get_data(
table_name="user", query_type="find_all", key_val={"user_role": role}
)
return all_users
#### END-USER MANAGEMENT ####
@router.post( @router.post(
"/end_user/block", "/end_user/block",
tags=["End User Management"], tags=["End User Management"],
@ -6466,38 +6572,140 @@ async def unblock_user(data: BlockUsers):
return {"blocked_users": litellm.blocked_user_list} return {"blocked_users": litellm.blocked_user_list}
@router.get( @router.post(
"/user/get_users", "/end_user/new",
tags=["user management"], tags=["End User Management"],
dependencies=[Depends(user_api_key_auth)], dependencies=[Depends(user_api_key_auth)],
) )
async def get_users( async def new_end_user(
role: str = fastapi.Query( data: NewEndUserRequest,
default=None, user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
description="Either 'proxy_admin', 'proxy_viewer', 'app_owner', 'app_user'",
)
): ):
""" """
[BETA] This could change without notice. Give feedback - https://github.com/BerriAI/litellm/issues [TODO] Needs to be implemented.
Get all users who are a specific `user_role`. Allow creating a new end-user
Used by the UI to populate the user lists. - Allow specifying allowed regions
- Allow specifying default model
Currently - admin-only endpoint. Example curl:
```
curl --location 'http://0.0.0.0:4000/end_user/new' \
--header 'Authorization: Bearer sk-1234' \
--header 'Content-Type: application/json' \
--data '{
"end_user_id" : "ishaan-jaff-3", <- specific customer
"allowed_region": "eu" <- set region for models
+
"default_model": "azure/gpt-3.5-turbo-eu" <- all calls from this user, use this model?
}'
# return end-user object
```
""" """
global prisma_client global prisma_client, llm_router
"""
Validation:
- check if default model exists
- create budget object if not already created
- Add user to end user table
Return
- end-user object
- currently allowed models
"""
if prisma_client is None: if prisma_client is None:
raise HTTPException( raise HTTPException(
status_code=500, status_code=500,
detail={"error": f"No db connected. prisma client={prisma_client}"}, detail={"error": CommonProxyErrors.db_not_connected_error.value},
) )
all_users = await prisma_client.get_data(
table_name="user", query_type="find_all", key_val={"user_role": role} ## VALIDATION ##
if data.default_model is not None:
if llm_router is None:
raise HTTPException(
status_code=422, detail={"error": CommonProxyErrors.no_llm_router.value}
)
elif data.default_model not in llm_router.get_model_names():
raise HTTPException(
status_code=422,
detail={
"error": "Default Model not on proxy. Configure via `/model/new` or config.yaml. Default_model={}, proxy_model_names={}".format(
data.default_model, set(llm_router.get_model_names())
)
},
)
new_end_user_obj: Dict = {}
## CREATE BUDGET ## if set
if data.max_budget is not None:
budget_record = await prisma_client.db.litellm_budgettable.create(
data={
"max_budget": data.max_budget,
"created_by": user_api_key_dict.user_id or litellm_proxy_admin_name, # type: ignore
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
}
)
new_end_user_obj["budget_id"] = budget_record.budget_id
elif data.budget_id is not None:
new_end_user_obj["budget_id"] = data.budget_id
_user_data = data.dict(exclude_none=True)
for k, v in _user_data.items():
if k != "max_budget" and k != "budget_id":
new_end_user_obj[k] = v
## WRITE TO DB ##
end_user_record = await prisma_client.db.litellm_endusertable.create(
data=new_end_user_obj # type: ignore
) )
return all_users return end_user_record
@router.post(
"/end_user/info",
tags=["End User Management"],
dependencies=[Depends(user_api_key_auth)],
)
async def end_user_info():
"""
[TODO] Needs to be implemented.
"""
pass
@router.post(
"/end_user/update",
tags=["End User Management"],
dependencies=[Depends(user_api_key_auth)],
)
async def update_end_user():
"""
[TODO] Needs to be implemented.
"""
pass
@router.post(
"/end_user/delete",
tags=["End User Management"],
dependencies=[Depends(user_api_key_auth)],
)
async def delete_end_user():
"""
[TODO] Needs to be implemented.
"""
pass
#### TEAM MANAGEMENT #### #### TEAM MANAGEMENT ####

View file

@ -150,6 +150,8 @@ model LiteLLM_EndUserTable {
user_id String @id user_id String @id
alias String? // admin-facing alias alias String? // admin-facing alias
spend Float @default(0.0) spend Float @default(0.0)
allowed_model_region String? // require all user requests to use models in this specific region
default_model String? // use along with 'allowed_model_region'. if no available model in region, default to this model.
budget_id String? budget_id String?
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
blocked Boolean @default(false) blocked Boolean @default(false)

View file

@ -526,7 +526,7 @@ class PrismaClient:
finally: finally:
os.chdir(original_dir) os.chdir(original_dir)
# Now you can import the Prisma Client # Now you can import the Prisma Client
from prisma import Prisma # type: ignore from prisma import Prisma
self.db = Prisma() # Client to connect to Prisma db self.db = Prisma() # Client to connect to Prisma db
@ -1689,12 +1689,12 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
module_file_path = os.path.join(directory, *module_name.split(".")) module_file_path = os.path.join(directory, *module_name.split("."))
module_file_path += ".py" module_file_path += ".py"
spec = importlib.util.spec_from_file_location(module_name, module_file_path) spec = importlib.util.spec_from_file_location(module_name, module_file_path) # type: ignore
if spec is None: if spec is None:
raise ImportError( raise ImportError(
f"Could not find a module specification for {module_file_path}" f"Could not find a module specification for {module_file_path}"
) )
module = importlib.util.module_from_spec(spec) module = importlib.util.module_from_spec(spec) # type: ignore
spec.loader.exec_module(module) # type: ignore spec.loader.exec_module(module) # type: ignore
else: else:
# Dynamically import the module # Dynamically import the module

View file

@ -32,6 +32,7 @@ from litellm.utils import (
CustomStreamWrapper, CustomStreamWrapper,
get_utc_datetime, get_utc_datetime,
calculate_max_parallel_requests, calculate_max_parallel_requests,
_is_region_eu,
) )
import copy import copy
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
@ -1999,7 +2000,11 @@ class Router:
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env # user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
# we do this here because we init clients for Azure, OpenAI and we need to set the right key # we do this here because we init clients for Azure, OpenAI and we need to set the right key
api_key = litellm_params.get("api_key") or default_api_key api_key = litellm_params.get("api_key") or default_api_key
if api_key and api_key.startswith("os.environ/"): if (
api_key
and isinstance(api_key, str)
and api_key.startswith("os.environ/")
):
api_key_env_name = api_key.replace("os.environ/", "") api_key_env_name = api_key.replace("os.environ/", "")
api_key = litellm.get_secret(api_key_env_name) api_key = litellm.get_secret(api_key_env_name)
litellm_params["api_key"] = api_key litellm_params["api_key"] = api_key
@ -2023,6 +2028,7 @@ class Router:
if ( if (
is_azure_ai_studio_model == True is_azure_ai_studio_model == True
and api_base is not None and api_base is not None
and isinstance(api_base, str)
and not api_base.endswith("/v1/") and not api_base.endswith("/v1/")
): ):
# check if it ends with a trailing slash # check if it ends with a trailing slash
@ -2103,13 +2109,14 @@ class Router:
organization = litellm.get_secret(organization_env_name) organization = litellm.get_secret(organization_env_name)
litellm_params["organization"] = organization litellm_params["organization"] = organization
if "azure" in model_name: if "azure" in model_name and isinstance(api_key, str):
if api_base is None: if api_base is None or not isinstance(api_base, str):
raise ValueError( raise ValueError(
f"api_base is required for Azure OpenAI. Set it on your config. Model - {model}" f"api_base is required for Azure OpenAI. Set it on your config. Model - {model}"
) )
if api_version is None: if api_version is None:
api_version = "2023-07-01-preview" api_version = "2023-07-01-preview"
if "gateway.ai.cloudflare.com" in api_base: if "gateway.ai.cloudflare.com" in api_base:
if not api_base.endswith("/"): if not api_base.endswith("/"):
api_base += "/" api_base += "/"
@ -2532,7 +2539,7 @@ class Router:
self.default_deployment = deployment.to_json(exclude_none=True) self.default_deployment = deployment.to_json(exclude_none=True)
# Azure GPT-Vision Enhancements, users can pass os.environ/ # Azure GPT-Vision Enhancements, users can pass os.environ/
data_sources = deployment.litellm_params.get("dataSources", []) data_sources = deployment.litellm_params.get("dataSources", []) or []
for data_source in data_sources: for data_source in data_sources:
params = data_source.get("parameters", {}) params = data_source.get("parameters", {})
@ -2549,6 +2556,22 @@ class Router:
# init OpenAI, Azure clients # init OpenAI, Azure clients
self.set_client(model=deployment.to_json(exclude_none=True)) self.set_client(model=deployment.to_json(exclude_none=True))
# set region (if azure model)
try:
if "azure" in deployment.litellm_params.model:
region = litellm.utils.get_model_region(
litellm_params=deployment.litellm_params, mode=None
)
deployment.litellm_params.region_name = region
except Exception as e:
verbose_router_logger.error(
"Unable to get the region for azure model - {}, {}".format(
deployment.litellm_params.model, str(e)
)
)
pass # [NON-BLOCKING]
return deployment return deployment
def add_deployment(self, deployment: Deployment) -> Optional[Deployment]: def add_deployment(self, deployment: Deployment) -> Optional[Deployment]:
@ -2820,14 +2843,17 @@ class Router:
model: str, model: str,
healthy_deployments: List, healthy_deployments: List,
messages: List[Dict[str, str]], messages: List[Dict[str, str]],
allowed_model_region: Optional[Literal["eu"]] = None,
): ):
""" """
Filter out model in model group, if: Filter out model in model group, if:
- model context window < message length - model context window < message length
- filter models above rpm limits - filter models above rpm limits
- if region given, filter out models not in that region / unknown region
- [TODO] function call and model doesn't support function calling - [TODO] function call and model doesn't support function calling
""" """
verbose_router_logger.debug( verbose_router_logger.debug(
f"Starting Pre-call checks for deployments in model={model}" f"Starting Pre-call checks for deployments in model={model}"
) )
@ -2878,9 +2904,9 @@ class Router:
except Exception as e: except Exception as e:
verbose_router_logger.debug("An error occurs - {}".format(str(e))) verbose_router_logger.debug("An error occurs - {}".format(str(e)))
## RPM CHECK ##
_litellm_params = deployment.get("litellm_params", {}) _litellm_params = deployment.get("litellm_params", {})
model_id = deployment.get("model_info", {}).get("id", "") model_id = deployment.get("model_info", {}).get("id", "")
## RPM CHECK ##
### get local router cache ### ### get local router cache ###
current_request_cache_local = ( current_request_cache_local = (
self.cache.get_cache(key=model_id, local_only=True) or 0 self.cache.get_cache(key=model_id, local_only=True) or 0
@ -2908,6 +2934,28 @@ class Router:
_rate_limit_error = True _rate_limit_error = True
continue continue
## REGION CHECK ##
if allowed_model_region is not None:
if _litellm_params.get("region_name") is not None and isinstance(
_litellm_params["region_name"], str
):
# check if in allowed_model_region
if (
_is_region_eu(model_region=_litellm_params["region_name"])
== False
):
invalid_model_indices.append(idx)
continue
else:
verbose_router_logger.debug(
"Filtering out model - {}, as model_region=None, and allowed_model_region={}".format(
model_id, allowed_model_region
)
)
# filter out since region unknown, and user wants to filter for specific region
invalid_model_indices.append(idx)
continue
if len(invalid_model_indices) == len(_returned_deployments): if len(invalid_model_indices) == len(_returned_deployments):
""" """
- no healthy deployments available b/c context window checks or rate limit error - no healthy deployments available b/c context window checks or rate limit error
@ -3047,10 +3095,31 @@ class Router:
# filter pre-call checks # filter pre-call checks
if self.enable_pre_call_checks and messages is not None: if self.enable_pre_call_checks and messages is not None:
healthy_deployments = self._pre_call_checks( _allowed_model_region = (
model=model, healthy_deployments=healthy_deployments, messages=messages request_kwargs.get("allowed_model_region")
if request_kwargs is not None
else None
) )
if _allowed_model_region == "eu":
healthy_deployments = self._pre_call_checks(
model=model,
healthy_deployments=healthy_deployments,
messages=messages,
allowed_model_region=_allowed_model_region,
)
else:
verbose_router_logger.debug(
"Ignoring given 'allowed_model_region'={}. Only 'eu' is allowed".format(
_allowed_model_region
)
)
healthy_deployments = self._pre_call_checks(
model=model,
healthy_deployments=healthy_deployments,
messages=messages,
)
if len(healthy_deployments) == 0: if len(healthy_deployments) == 0:
raise ValueError( raise ValueError(
f"{RouterErrors.no_deployments_available.value}, passed model={model}" f"{RouterErrors.no_deployments_available.value}, passed model={model}"

View file

@ -6,7 +6,7 @@
# - use litellm.success + failure callbacks to log when a request completed # - use litellm.success + failure callbacks to log when a request completed
# - in get_available_deployment, for a given model group name -> pick based on traffic # - in get_available_deployment, for a given model group name -> pick based on traffic
import dotenv, os, requests, random import dotenv, os, requests, random # type: ignore
from typing import Optional from typing import Optional
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv

View file

@ -1,7 +1,7 @@
#### What this does #### #### What this does ####
# picks based on response time (for streaming, this is time to first token) # picks based on response time (for streaming, this is time to first token)
from pydantic import BaseModel, Extra, Field, root_validator from pydantic import BaseModel, Extra, Field, root_validator
import dotenv, os, requests, random import dotenv, os, requests, random # type: ignore
from typing import Optional, Union, List, Dict from typing import Optional, Union, List, Dict
from datetime import datetime, timedelta from datetime import datetime, timedelta
import random import random

View file

@ -1,7 +1,7 @@
#### What this does #### #### What this does ####
# picks based on response time (for streaming, this is time to first token) # picks based on response time (for streaming, this is time to first token)
from pydantic import BaseModel, Extra, Field, root_validator from pydantic import BaseModel, Extra, Field, root_validator # type: ignore
import dotenv, os, requests, random import dotenv, os, requests, random # type: ignore
from typing import Optional, Union, List, Dict from typing import Optional, Union, List, Dict
from datetime import datetime, timedelta from datetime import datetime, timedelta
import random import random

View file

@ -1,5 +1,6 @@
import pytest import pytest
from litellm import acompletion from litellm import acompletion
from litellm import completion
def test_acompletion_params(): def test_acompletion_params():
@ -7,17 +8,29 @@ def test_acompletion_params():
from litellm.types.completion import CompletionRequest from litellm.types.completion import CompletionRequest
acompletion_params_odict = inspect.signature(acompletion).parameters acompletion_params_odict = inspect.signature(acompletion).parameters
acompletion_params = {name: param.annotation for name, param in acompletion_params_odict.items()} completion_params_dict = inspect.signature(completion).parameters
completion_params = {field_name: field_type for field_name, field_type in CompletionRequest.__annotations__.items()}
# remove kwargs acompletion_params = {
acompletion_params.pop("kwargs", None) name: param.annotation for name, param in acompletion_params_odict.items()
}
completion_params = {
name: param.annotation for name, param in completion_params_dict.items()
}
keys_acompletion = set(acompletion_params.keys()) keys_acompletion = set(acompletion_params.keys())
keys_completion = set(completion_params.keys()) keys_completion = set(completion_params.keys())
print(keys_acompletion)
print("\n\n\n")
print(keys_completion)
print("diff=", keys_completion - keys_acompletion)
# Assert that the parameters are the same # Assert that the parameters are the same
if keys_acompletion != keys_completion: if keys_acompletion != keys_completion:
pytest.fail("The parameters of the acompletion function and the CompletionRequest class are not the same.") pytest.fail(
"The parameters of the litellm.acompletion function and litellm.completion are not the same."
)
# test_acompletion_params() # test_acompletion_params()

View file

@ -231,14 +231,17 @@ def test_cost_bedrock_pricing():
assert cost == predicted_cost assert cost == predicted_cost
@pytest.mark.skip(reason="AWS disabled our access")
def test_cost_bedrock_pricing_actual_calls(): def test_cost_bedrock_pricing_actual_calls():
litellm.set_verbose = True litellm.set_verbose = True
model = "anthropic.claude-instant-v1" model = "anthropic.claude-instant-v1"
messages = [{"role": "user", "content": "Hey, how's it going?"}] messages = [{"role": "user", "content": "Hey, how's it going?"}]
response = litellm.completion(model=model, messages=messages) response = litellm.completion(
assert response._hidden_params["region_name"] is not None model=model, messages=messages, mock_response="hello cool one"
)
print("response", response)
cost = litellm.completion_cost( cost = litellm.completion_cost(
model="bedrock/anthropic.claude-instant-v1",
completion_response=response, completion_response=response,
messages=[{"role": "user", "content": "Hey, how's it going?"}], messages=[{"role": "user", "content": "Hey, how's it going?"}],
) )

View file

@ -16,7 +16,7 @@ litellm.set_verbose = True
model_alias_map = {"good-model": "anyscale/meta-llama/Llama-2-7b-chat-hf"} model_alias_map = {"good-model": "anyscale/meta-llama/Llama-2-7b-chat-hf"}
def test_model_alias_map(): def test_model_alias_map(caplog):
try: try:
litellm.model_alias_map = model_alias_map litellm.model_alias_map = model_alias_map
response = completion( response = completion(
@ -27,9 +27,15 @@ def test_model_alias_map():
max_tokens=10, max_tokens=10,
) )
print(response.model) print(response.model)
captured_logs = [rec.levelname for rec in caplog.records]
for log in captured_logs:
assert "ERROR" not in log
assert "Llama-2-7b-chat-hf" in response.model assert "Llama-2-7b-chat-hf" in response.model
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
test_model_alias_map() # test_model_alias_map()

View file

@ -1501,6 +1501,37 @@ def test_openai_chat_completion_complete_response_call():
# test_openai_chat_completion_complete_response_call() # test_openai_chat_completion_complete_response_call()
def test_openai_stream_options_call():
litellm.set_verbose = False
response = litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": "say GM - we're going to make it "}],
stream=True,
stream_options={"include_usage": True},
max_tokens=10,
)
usage = None
chunks = []
for chunk in response:
print("chunk: ", chunk)
chunks.append(chunk)
last_chunk = chunks[-1]
print("last chunk: ", last_chunk)
"""
Assert that:
- Last Chunk includes Usage
- All chunks prior to last chunk have usage=None
"""
assert last_chunk.usage is not None
assert last_chunk.usage.total_tokens > 0
assert last_chunk.usage.prompt_tokens > 0
assert last_chunk.usage.completion_tokens > 0
# assert all non last chunks have usage=None
assert all(chunk.usage is None for chunk in chunks[:-1])
def test_openai_text_completion_call(): def test_openai_text_completion_call():

View file

@ -123,6 +123,8 @@ class GenericLiteLLMParams(BaseModel):
) )
max_retries: Optional[int] = None max_retries: Optional[int] = None
organization: Optional[str] = None # for openai orgs organization: Optional[str] = None # for openai orgs
## UNIFIED PROJECT/REGION ##
region_name: Optional[str] = None
## VERTEX AI ## ## VERTEX AI ##
vertex_project: Optional[str] = None vertex_project: Optional[str] = None
vertex_location: Optional[str] = None vertex_location: Optional[str] = None
@ -150,6 +152,8 @@ class GenericLiteLLMParams(BaseModel):
None # timeout when making stream=True calls, if str, pass in as os.environ/ None # timeout when making stream=True calls, if str, pass in as os.environ/
), ),
organization: Optional[str] = None, # for openai orgs organization: Optional[str] = None, # for openai orgs
## UNIFIED PROJECT/REGION ##
region_name: Optional[str] = None,
## VERTEX AI ## ## VERTEX AI ##
vertex_project: Optional[str] = None, vertex_project: Optional[str] = None,
vertex_location: Optional[str] = None, vertex_location: Optional[str] = None,

View file

@ -14,7 +14,7 @@ import subprocess, os
from os.path import abspath, join, dirname from os.path import abspath, join, dirname
import litellm, openai import litellm, openai
import itertools import itertools
import random, uuid, requests import random, uuid, requests # type: ignore
from functools import wraps from functools import wraps
import datetime, time import datetime, time
import tiktoken import tiktoken
@ -36,7 +36,7 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
try: try:
# this works in python 3.8 # this works in python 3.8
import pkg_resources import pkg_resources # type: ignore
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers") filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
# try: # try:
@ -612,6 +612,7 @@ class ModelResponse(OpenAIObject):
system_fingerprint=None, system_fingerprint=None,
usage=None, usage=None,
stream=None, stream=None,
stream_options=None,
response_ms=None, response_ms=None,
hidden_params=None, hidden_params=None,
**params, **params,
@ -658,6 +659,12 @@ class ModelResponse(OpenAIObject):
usage = usage usage = usage
elif stream is None or stream == False: elif stream is None or stream == False:
usage = Usage() usage = Usage()
elif (
stream == True
and stream_options is not None
and stream_options.get("include_usage") == True
):
usage = Usage()
if hidden_params: if hidden_params:
self._hidden_params = hidden_params self._hidden_params = hidden_params
@ -4161,8 +4168,30 @@ def cost_per_token(
model_with_provider_and_region in model_cost_ref model_with_provider_and_region in model_cost_ref
): # use region based pricing, if it's available ): # use region based pricing, if it's available
model_with_provider = model_with_provider_and_region model_with_provider = model_with_provider_and_region
if model_with_provider in model_cost_ref:
model_without_prefix = model
model_parts = model.split("/")
if len(model_parts) > 1:
model_without_prefix = model_parts[1]
else:
model_without_prefix = model
"""
Code block that formats model to lookup in litellm.model_cost
Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
Option2. model = "openai/gpt-4" - model = provider/model
Option3. model = "anthropic.claude-3" - model = model
"""
if (
model_with_provider in model_cost_ref
): # Option 2. use model with provider, model = "openai/gpt-4"
model = model_with_provider model = model_with_provider
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
model = model
elif (
model_without_prefix in model_cost_ref
): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
model = model_without_prefix
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
print_verbose(f"Looking up model={model} in model_cost_map") print_verbose(f"Looking up model={model} in model_cost_map")
if model in model_cost_ref: if model in model_cost_ref:
@ -4817,6 +4846,7 @@ def get_optional_params(
top_p=None, top_p=None,
n=None, n=None,
stream=False, stream=False,
stream_options=None,
stop=None, stop=None,
max_tokens=None, max_tokens=None,
presence_penalty=None, presence_penalty=None,
@ -4886,6 +4916,7 @@ def get_optional_params(
"top_p": None, "top_p": None,
"n": None, "n": None,
"stream": None, "stream": None,
"stream_options": None,
"stop": None, "stop": None,
"max_tokens": None, "max_tokens": None,
"presence_penalty": None, "presence_penalty": None,
@ -5757,6 +5788,8 @@ def get_optional_params(
optional_params["n"] = n optional_params["n"] = n
if stream is not None: if stream is not None:
optional_params["stream"] = stream optional_params["stream"] = stream
if stream_options is not None:
optional_params["stream_options"] = stream_options
if stop is not None: if stop is not None:
optional_params["stop"] = stop optional_params["stop"] = stop
if max_tokens is not None: if max_tokens is not None:
@ -5844,6 +5877,40 @@ def calculate_max_parallel_requests(
return None return None
def _is_region_eu(model_region: str) -> bool:
EU_Regions = ["europe", "sweden", "switzerland", "france", "uk"]
for region in EU_Regions:
if "europe" in model_region.lower():
return True
return False
def get_model_region(
litellm_params: LiteLLM_Params, mode: Optional[str]
) -> Optional[str]:
"""
Pass the litellm params for an azure model, and get back the region
"""
if (
"azure" in litellm_params.model
and isinstance(litellm_params.api_key, str)
and isinstance(litellm_params.api_base, str)
):
_model = litellm_params.model.replace("azure/", "")
response: dict = litellm.AzureChatCompletion().get_headers(
model=_model,
api_key=litellm_params.api_key,
api_base=litellm_params.api_base,
api_version=litellm_params.api_version or "2023-07-01-preview",
timeout=10,
mode=mode or "chat",
)
region: Optional[str] = response.get("x-ms-region", None)
return region
return None
def get_api_base(model: str, optional_params: dict) -> Optional[str]: def get_api_base(model: str, optional_params: dict) -> Optional[str]:
""" """
Returns the api base used for calling the model. Returns the api base used for calling the model.
@ -5878,6 +5945,8 @@ def get_api_base(model: str, optional_params: dict) -> Optional[str]:
if _optional_params.api_base is not None: if _optional_params.api_base is not None:
return _optional_params.api_base return _optional_params.api_base
if litellm.model_alias_map and model in litellm.model_alias_map:
model = litellm.model_alias_map[model]
try: try:
model, custom_llm_provider, dynamic_api_key, dynamic_api_base = ( model, custom_llm_provider, dynamic_api_key, dynamic_api_base = (
get_llm_provider( get_llm_provider(
@ -6027,6 +6096,7 @@ def get_supported_openai_params(model: str, custom_llm_provider: str):
"top_p", "top_p",
"n", "n",
"stream", "stream",
"stream_options",
"stop", "stop",
"max_tokens", "max_tokens",
"presence_penalty", "presence_penalty",
@ -7732,11 +7802,11 @@ def _calculate_retry_after(
try: try:
retry_after = int(retry_header) retry_after = int(retry_header)
except Exception: except Exception:
retry_date_tuple = email.utils.parsedate_tz(retry_header) retry_date_tuple = email.utils.parsedate_tz(retry_header) # type: ignore
if retry_date_tuple is None: if retry_date_tuple is None:
retry_after = -1 retry_after = -1
else: else:
retry_date = email.utils.mktime_tz(retry_date_tuple) retry_date = email.utils.mktime_tz(retry_date_tuple) # type: ignore
retry_after = int(retry_date - time.time()) retry_after = int(retry_date - time.time())
else: else:
retry_after = -1 retry_after = -1
@ -9423,7 +9493,9 @@ def get_secret(
else: else:
secret = os.environ.get(secret_name) secret = os.environ.get(secret_name)
try: try:
secret_value_as_bool = ast.literal_eval(secret) if secret is not None else None secret_value_as_bool = (
ast.literal_eval(secret) if secret is not None else None
)
if isinstance(secret_value_as_bool, bool): if isinstance(secret_value_as_bool, bool):
return secret_value_as_bool return secret_value_as_bool
else: else:
@ -9442,7 +9514,12 @@ def get_secret(
# replicate/anthropic/cohere # replicate/anthropic/cohere
class CustomStreamWrapper: class CustomStreamWrapper:
def __init__( def __init__(
self, completion_stream, model, custom_llm_provider=None, logging_obj=None self,
completion_stream,
model,
custom_llm_provider=None,
logging_obj=None,
stream_options=None,
): ):
self.model = model self.model = model
self.custom_llm_provider = custom_llm_provider self.custom_llm_provider = custom_llm_provider
@ -9468,6 +9545,7 @@ class CustomStreamWrapper:
self.response_id = None self.response_id = None
self.logging_loop = None self.logging_loop = None
self.rules = Rules() self.rules = Rules()
self.stream_options = stream_options
def __iter__(self): def __iter__(self):
return self return self
@ -9908,6 +9986,7 @@ class CustomStreamWrapper:
is_finished = False is_finished = False
finish_reason = None finish_reason = None
logprobs = None logprobs = None
usage = None
original_chunk = None # this is used for function/tool calling original_chunk = None # this is used for function/tool calling
if len(str_line.choices) > 0: if len(str_line.choices) > 0:
if ( if (
@ -9942,12 +10021,15 @@ class CustomStreamWrapper:
else: else:
logprobs = None logprobs = None
usage = getattr(str_line, "usage", None)
return { return {
"text": text, "text": text,
"is_finished": is_finished, "is_finished": is_finished,
"finish_reason": finish_reason, "finish_reason": finish_reason,
"logprobs": logprobs, "logprobs": logprobs,
"original_chunk": str_line, "original_chunk": str_line,
"usage": usage,
} }
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
@ -10250,7 +10332,9 @@ class CustomStreamWrapper:
raise e raise e
def model_response_creator(self): def model_response_creator(self):
model_response = ModelResponse(stream=True, model=self.model) model_response = ModelResponse(
stream=True, model=self.model, stream_options=self.stream_options
)
if self.response_id is not None: if self.response_id is not None:
model_response.id = self.response_id model_response.id = self.response_id
else: else:
@ -10570,6 +10654,12 @@ class CustomStreamWrapper:
if response_obj["logprobs"] is not None: if response_obj["logprobs"] is not None:
model_response.choices[0].logprobs = response_obj["logprobs"] model_response.choices[0].logprobs = response_obj["logprobs"]
if (
self.stream_options is not None
and self.stream_options["include_usage"] == True
):
model_response.usage = response_obj["usage"]
model_response.model = self.model model_response.model = self.model
print_verbose( print_verbose(
f"model_response finish reason 3: {self.received_finish_reason}; response_obj={response_obj}" f"model_response finish reason 3: {self.received_finish_reason}; response_obj={response_obj}"
@ -10657,6 +10747,11 @@ class CustomStreamWrapper:
except Exception as e: except Exception as e:
model_response.choices[0].delta = Delta() model_response.choices[0].delta = Delta()
else: else:
if (
self.stream_options is not None
and self.stream_options["include_usage"] == True
):
return model_response
return return
print_verbose( print_verbose(
f"model_response.choices[0].delta: {model_response.choices[0].delta}; completion_obj: {completion_obj}" f"model_response.choices[0].delta: {model_response.choices[0].delta}; completion_obj: {completion_obj}"

View file

@ -1,4 +1,9 @@
model_list: model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: azure/gpt-35-turbo
api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
api_key: os.environ/AZURE_EUROPE_API_KEY
- model_name: gpt-3.5-turbo - model_name: gpt-3.5-turbo
litellm_params: litellm_params:
model: azure/chatgpt-v-2 model: azure/chatgpt-v-2

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.36.3" version = "1.36.4"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.36.3" version = "1.36.4"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -150,6 +150,8 @@ model LiteLLM_EndUserTable {
user_id String @id user_id String @id
alias String? // admin-facing alias alias String? // admin-facing alias
spend Float @default(0.0) spend Float @default(0.0)
allowed_model_region String? // require all user requests to use models in this specific region
default_model String? // use along with 'allowed_model_region'. if no available model in region, default to this model.
budget_id String? budget_id String?
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id]) litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
blocked Boolean @default(false) blocked Boolean @default(false)

173
tests/test_end_users.py Normal file
View file

@ -0,0 +1,173 @@
# What is this?
## Unit tests for the /end_users/* endpoints
import pytest
import asyncio
import aiohttp
import time
import uuid
from openai import AsyncOpenAI
from typing import Optional
"""
- `/end_user/new`
- `/end_user/info`
"""
async def chat_completion_with_headers(session, key, model="gpt-4"):
url = "http://0.0.0.0:4000/chat/completions"
headers = {
"Authorization": f"Bearer {key}",
"Content-Type": "application/json",
}
data = {
"model": model,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
],
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(response_text)
print()
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
response_header_check(
response
) # calling the function to check response headers
raw_headers = response.raw_headers
raw_headers_json = {}
for (
item
) in (
response.raw_headers
): # ((b'date', b'Fri, 19 Apr 2024 21:17:29 GMT'), (), )
raw_headers_json[item[0].decode("utf-8")] = item[1].decode("utf-8")
return raw_headers_json
async def generate_key(
session,
i,
budget=None,
budget_duration=None,
models=["azure-models", "gpt-4", "dall-e-3"],
max_parallel_requests: Optional[int] = None,
user_id: Optional[str] = None,
team_id: Optional[str] = None,
calling_key="sk-1234",
):
url = "http://0.0.0.0:4000/key/generate"
headers = {
"Authorization": f"Bearer {calling_key}",
"Content-Type": "application/json",
}
data = {
"models": models,
"aliases": {"mistral-7b": "gpt-3.5-turbo"},
"duration": None,
"max_budget": budget,
"budget_duration": budget_duration,
"max_parallel_requests": max_parallel_requests,
"user_id": user_id,
"team_id": team_id,
}
print(f"data: {data}")
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(f"Response {i} (Status code: {status}):")
print(response_text)
print()
if status != 200:
raise Exception(f"Request {i} did not return a 200 status code: {status}")
return await response.json()
async def new_end_user(
session, i, user_id=str(uuid.uuid4()), model_region=None, default_model=None
):
url = "http://0.0.0.0:4000/end_user/new"
headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
data = {
"user_id": user_id,
"allowed_model_region": model_region,
"default_model": default_model,
}
async with session.post(url, headers=headers, json=data) as response:
status = response.status
response_text = await response.text()
print(f"Response {i} (Status code: {status}):")
print(response_text)
print()
if status != 200:
raise Exception(f"Request {i} did not return a 200 status code: {status}")
return await response.json()
@pytest.mark.asyncio
async def test_end_user_new():
"""
Make 20 parallel calls to /user/new. Assert all worked.
"""
async with aiohttp.ClientSession() as session:
tasks = [new_end_user(session, i, str(uuid.uuid4())) for i in range(1, 11)]
await asyncio.gather(*tasks)
@pytest.mark.asyncio
async def test_end_user_specific_region():
"""
- Specify region user can make calls in
- Make a generic call
- assert returned api base is for model in region
Repeat 3 times
"""
key: str = ""
## CREATE USER ##
async with aiohttp.ClientSession() as session:
end_user_obj = await new_end_user(
session=session,
i=0,
user_id=str(uuid.uuid4()),
model_region="eu",
)
## MAKE CALL ##
key_gen = await generate_key(session=session, i=0, models=["gpt-3.5-turbo"])
key = key_gen["key"]
for _ in range(3):
client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
print("SENDING USER PARAM - {}".format(end_user_obj["user_id"]))
result = await client.chat.completions.with_raw_response.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey!"}],
user=end_user_obj["user_id"],
)
assert (
result.headers.get("x-litellm-model-api-base")
== "https://my-endpoint-europe-berri-992.openai.azure.com/"
)

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -0,0 +1 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);

View file

@ -0,0 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""] 3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -189,6 +189,7 @@ const CreateKeyPage = () => {
userRole={userRole} userRole={userRole}
token={token} token={token}
accessToken={accessToken} accessToken={accessToken}
keys={keys}
/> />
)} )}
</div> </div>

View file

@ -786,7 +786,9 @@ export const adminTopKeysCall = async (accessToken: String) => {
export const adminTopEndUsersCall = async ( export const adminTopEndUsersCall = async (
accessToken: String, accessToken: String,
keyToken: String | null keyToken: String | null,
startTime: String | undefined,
endTime: String | undefined
) => { ) => {
try { try {
let url = proxyBaseUrl let url = proxyBaseUrl
@ -795,8 +797,11 @@ export const adminTopEndUsersCall = async (
let body = ""; let body = "";
if (keyToken) { if (keyToken) {
body = JSON.stringify({ api_key: keyToken }); body = JSON.stringify({ api_key: keyToken, startTime: startTime, endTime: endTime });
} else {
body = JSON.stringify({ startTime: startTime, endTime: endTime });
} }
//message.info("Making top end users request"); //message.info("Making top end users request");
// Define requestOptions with body as an optional property // Define requestOptions with body as an optional property
@ -815,9 +820,7 @@ export const adminTopEndUsersCall = async (
}, },
}; };
if (keyToken) { requestOptions.body = body;
requestOptions.body = JSON.stringify({ api_key: keyToken });
}
const response = await fetch(url, requestOptions); const response = await fetch(url, requestOptions);
if (!response.ok) { if (!response.ok) {

View file

@ -341,7 +341,7 @@ const Settings: React.FC<SettingsPageProps> = ({
return ( return (
<div className="w-full mx-4"> <div className="w-full mx-4">
<Grid numItems={1} className="gap-2 p-8 w-full mt-2"> <Grid numItems={1} className="gap-2 p-8 w-full mt-2">
<Callout title="Presidio PII / Guardrails Coming Soon" color="sky"> <Callout title="[UI] Presidio PII + Guardrails Coming Soon. https://docs.litellm.ai/docs/proxy/pii_masking" color="sky">
</Callout> </Callout>
<TabGroup> <TabGroup>
@ -353,9 +353,6 @@ const Settings: React.FC<SettingsPageProps> = ({
<TabPanel> <TabPanel>
<Card > <Card >
<Text>
Presidio + Guardrails coming soon
</Text>
<Table> <Table>
<TableHead> <TableHead>
<TableRow> <TableRow>

Some files were not shown because too many files have changed in this diff Show more