Merge branch 'main' into ui_show_spend_end_user

This commit is contained in:
Ishaan Jaff 2024-05-08 18:29:25 -07:00 committed by GitHub
commit 6d955ef457
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
101 changed files with 912 additions and 366 deletions

View file

@ -223,7 +223,7 @@ jobs:
background: true
- run:
name: Wait for app to be ready
command: dockerize -wait http://localhost:4000 -timeout 1m
command: dockerize -wait http://localhost:4000 -timeout 5m
- run:
name: Run tests
command: |

1
.gitignore vendored
View file

@ -1,5 +1,4 @@
.venv
venv
.env
litellm_uuid.txt
__pycache__/

View file

@ -16,11 +16,11 @@ repos:
name: Check if files match
entry: python3 ci_cd/check_files_match.py
language: system
- repo: local
hooks:
- id: mypy
name: mypy
entry: python3 -m mypy --ignore-missing-imports
language: system
types: [python]
files: ^litellm/
# - repo: local
# hooks:
# - id: mypy
# name: mypy
# entry: python3 -m mypy --ignore-missing-imports
# language: system
# types: [python]
# files: ^litellm/

Binary file not shown.

View file

@ -0,0 +1,15 @@
{
"$schema": "https://schema.management.azure.com/schemas/0.1.2-preview/CreateUIDefinition.MultiVm.json#",
"handler": "Microsoft.Azure.CreateUIDef",
"version": "0.1.2-preview",
"parameters": {
"config": {
"isWizard": false,
"basics": { }
},
"basics": [ ],
"steps": [ ],
"outputs": { },
"resourceTypes": [ ]
}
}

View file

@ -0,0 +1,63 @@
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"imageName": {
"type": "string",
"defaultValue": "ghcr.io/berriai/litellm:main-latest"
},
"containerName": {
"type": "string",
"defaultValue": "litellm-container"
},
"dnsLabelName": {
"type": "string",
"defaultValue": "litellm"
},
"portNumber": {
"type": "int",
"defaultValue": 4000
}
},
"resources": [
{
"type": "Microsoft.ContainerInstance/containerGroups",
"apiVersion": "2021-03-01",
"name": "[parameters('containerName')]",
"location": "[resourceGroup().location]",
"properties": {
"containers": [
{
"name": "[parameters('containerName')]",
"properties": {
"image": "[parameters('imageName')]",
"resources": {
"requests": {
"cpu": 1,
"memoryInGB": 2
}
},
"ports": [
{
"port": "[parameters('portNumber')]"
}
]
}
}
],
"osType": "Linux",
"restartPolicy": "Always",
"ipAddress": {
"type": "Public",
"ports": [
{
"protocol": "tcp",
"port": "[parameters('portNumber')]"
}
],
"dnsNameLabel": "[parameters('dnsLabelName')]"
}
}
}
]
}

View file

@ -0,0 +1,42 @@
param imageName string = 'ghcr.io/berriai/litellm:main-latest'
param containerName string = 'litellm-container'
param dnsLabelName string = 'litellm'
param portNumber int = 4000
resource containerGroupName 'Microsoft.ContainerInstance/containerGroups@2021-03-01' = {
name: containerName
location: resourceGroup().location
properties: {
containers: [
{
name: containerName
properties: {
image: imageName
resources: {
requests: {
cpu: 1
memoryInGB: 2
}
}
ports: [
{
port: portNumber
}
]
}
}
]
osType: 'Linux'
restartPolicy: 'Always'
ipAddress: {
type: 'Public'
ports: [
{
protocol: 'tcp'
port: portNumber
}
]
dnsNameLabel: dnsLabelName
}
}
}

View file

@ -46,4 +46,13 @@ Pricing is based on usage. We can figure out a price that works for your team, o
<Image img={require('../img/litellm_hosted_ui_router.png')} />
#### [**🚨 Schedule Call**](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
#### [**🚨 Schedule Call**](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat)
## Feature List
- Easy way to add/remove models
- 100% uptime even when models are added/removed
- custom callback webhooks
- your domain name with HTTPS
- Ability to create/delete User API keys
- Reasonable set monthly cost

View file

@ -12,8 +12,8 @@ Requirements:
You can set budgets at 3 levels:
- For the proxy
- For a user
- For a 'user' passed to `/chat/completions`, `/embeddings` etc
- For an internal user
- For an end-user
- For a key
- For a key (model specific budgets)
@ -58,7 +58,7 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
}'
```
</TabItem>
<TabItem value="per-user" label="For User">
<TabItem value="per-user" label="For Internal User">
Apply a budget across multiple keys.
@ -165,12 +165,12 @@ curl --location 'http://localhost:4000/team/new' \
}
```
</TabItem>
<TabItem value="per-user-chat" label="For 'user' passed to /chat/completions">
<TabItem value="per-user-chat" label="For End User">
Use this to budget `user` passed to `/chat/completions`, **without needing to create a key for every user**
**Step 1. Modify config.yaml**
Define `litellm.max_user_budget`
Define `litellm.max_end_user_budget`
```yaml
general_settings:
master_key: sk-1234
@ -328,7 +328,7 @@ You can set:
- max parallel requests
<Tabs>
<TabItem value="per-user" label="Per User">
<TabItem value="per-user" label="Per Internal User">
Use `/user/new`, to persist rate limits across multiple keys.
@ -408,7 +408,7 @@ curl --location 'http://localhost:4000/user/new' \
```
## Create new keys for existing user
## Create new keys for existing internal user
Just include user_id in the `/key/generate` request.

View file

@ -468,7 +468,7 @@ asyncio.run(router_acompletion())
```
</TabItem>
<TabItem value="lowest-cost" label="Lowest Cost Routing">
<TabItem value="lowest-cost" label="Lowest Cost Routing (Async)">
Picks a deployment based on the lowest cost

View file

@ -291,7 +291,7 @@ def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
def _forecast_daily_cost(data: list):
import requests
import requests # type: ignore
from datetime import datetime, timedelta
if len(data) == 0:

View file

@ -10,8 +10,8 @@
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
import os
import inspect
import redis, litellm
import redis.asyncio as async_redis
import redis, litellm # type: ignore
import redis.asyncio as async_redis # type: ignore
from typing import List, Optional

View file

@ -10,7 +10,7 @@
import os, json, time
import litellm
from litellm.utils import ModelResponse
import requests, threading
import requests, threading # type: ignore
from typing import Optional, Union, Literal

View file

@ -1,7 +1,6 @@
#### What this does ####
# On success + failure, log events to aispend.io
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback

View file

@ -4,18 +4,30 @@ import datetime
class AthinaLogger:
def __init__(self):
import os
self.athina_api_key = os.getenv("ATHINA_API_KEY")
self.headers = {
"athina-api-key": self.athina_api_key,
"Content-Type": "application/json"
"Content-Type": "application/json",
}
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response", "user_query"]
self.additional_keys = [
"environment",
"prompt_slug",
"customer_id",
"customer_user_id",
"session_id",
"external_reference_id",
"context",
"expected_response",
"user_query",
]
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
import requests
import requests # type: ignore
import json
import traceback
try:
response_json = response_obj.model_dump() if response_obj else {}
data = {
@ -23,32 +35,51 @@ class AthinaLogger:
"request": kwargs,
"response": response_json,
"prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"),
"completion_tokens": response_json.get("usage", {}).get("completion_tokens"),
"completion_tokens": response_json.get("usage", {}).get(
"completion_tokens"
),
"total_tokens": response_json.get("usage", {}).get("total_tokens"),
}
if type(end_time) == datetime.datetime and type(start_time) == datetime.datetime:
data["response_time"] = int((end_time - start_time).total_seconds() * 1000)
if (
type(end_time) == datetime.datetime
and type(start_time) == datetime.datetime
):
data["response_time"] = int(
(end_time - start_time).total_seconds() * 1000
)
if "messages" in kwargs:
data["prompt"] = kwargs.get("messages", None)
# Directly add tools or functions if present
optional_params = kwargs.get("optional_params", {})
data.update((k, v) for k, v in optional_params.items() if k in ["tools", "functions"])
data.update(
(k, v)
for k, v in optional_params.items()
if k in ["tools", "functions"]
)
# Add additional metadata keys
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
if metadata:
for key in self.additional_keys:
if key in metadata:
data[key] = metadata[key]
response = requests.post(self.athina_logging_url, headers=self.headers, data=json.dumps(data, default=str))
response = requests.post(
self.athina_logging_url,
headers=self.headers,
data=json.dumps(data, default=str),
)
if response.status_code != 200:
print_verbose(f"Athina Logger Error - {response.text}, {response.status_code}")
print_verbose(
f"Athina Logger Error - {response.text}, {response.status_code}"
)
else:
print_verbose(f"Athina Logger Succeeded - {response.text}")
except Exception as e:
print_verbose(f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}")
pass
print_verbose(
f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}"
)
pass

View file

@ -1,7 +1,7 @@
#### What this does ####
# On success + failure, log events to aispend.io
import dotenv, os
import requests
import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback

View file

@ -3,7 +3,6 @@
#### What this does ####
# On success, logs events to Promptlayer
import dotenv, os
import requests
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache

View file

@ -1,7 +1,6 @@
#### What this does ####
# On success, logs events to Promptlayer
import dotenv, os
import requests
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache

View file

@ -2,7 +2,7 @@
# On success + failure, log events to Supabase
import dotenv, os
import requests
import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback

View file

@ -2,7 +2,7 @@
# On success + failure, log events to Supabase
import dotenv, os
import requests
import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback

View file

@ -1,15 +1,17 @@
import requests
import requests # type: ignore
import json
import traceback
from datetime import datetime, timezone
class GreenscaleLogger:
def __init__(self):
import os
self.greenscale_api_key = os.getenv("GREENSCALE_API_KEY")
self.headers = {
"api-key": self.greenscale_api_key,
"Content-Type": "application/json"
"Content-Type": "application/json",
}
self.greenscale_logging_url = os.getenv("GREENSCALE_ENDPOINT")
@ -19,33 +21,48 @@ class GreenscaleLogger:
data = {
"modelId": kwargs.get("model"),
"inputTokenCount": response_json.get("usage", {}).get("prompt_tokens"),
"outputTokenCount": response_json.get("usage", {}).get("completion_tokens"),
"outputTokenCount": response_json.get("usage", {}).get(
"completion_tokens"
),
}
data["timestamp"] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
if type(end_time) == datetime and type(start_time) == datetime:
data["invocationLatency"] = int((end_time - start_time).total_seconds() * 1000)
data["timestamp"] = datetime.now(timezone.utc).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
if type(end_time) == datetime and type(start_time) == datetime:
data["invocationLatency"] = int(
(end_time - start_time).total_seconds() * 1000
)
# Add additional metadata keys to tags
tags = []
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
for key, value in metadata.items():
if key.startswith("greenscale"):
if key.startswith("greenscale"):
if key == "greenscale_project":
data["project"] = value
elif key == "greenscale_application":
data["application"] = value
else:
tags.append({"key": key.replace("greenscale_", ""), "value": str(value)})
tags.append(
{"key": key.replace("greenscale_", ""), "value": str(value)}
)
data["tags"] = tags
response = requests.post(self.greenscale_logging_url, headers=self.headers, data=json.dumps(data, default=str))
response = requests.post(
self.greenscale_logging_url,
headers=self.headers,
data=json.dumps(data, default=str),
)
if response.status_code != 200:
print_verbose(f"Greenscale Logger Error - {response.text}, {response.status_code}")
print_verbose(
f"Greenscale Logger Error - {response.text}, {response.status_code}"
)
else:
print_verbose(f"Greenscale Logger Succeeded - {response.text}")
except Exception as e:
print_verbose(f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}")
pass
print_verbose(
f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}"
)
pass

View file

@ -1,7 +1,7 @@
#### What this does ####
# On success, logs events to Helicone
import dotenv, os
import requests
import requests # type: ignore
import litellm
dotenv.load_dotenv() # Loading env variables using dotenv

View file

@ -1,15 +1,14 @@
#### What this does ####
# On success, logs events to Langsmith
import dotenv, os
import requests
import requests
import dotenv, os # type: ignore
import requests # type: ignore
from datetime import datetime
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import asyncio
import types
from pydantic import BaseModel
from pydantic import BaseModel # type: ignore
def is_serializable(value):
@ -79,8 +78,6 @@ class LangsmithLogger:
except:
response_obj = response_obj.dict() # type: ignore
print(f"response_obj: {response_obj}")
data = {
"name": run_name,
"run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
@ -90,7 +87,6 @@ class LangsmithLogger:
"start_time": start_time,
"end_time": end_time,
}
print(f"data: {data}")
response = requests.post(
"https://api.smith.langchain.com/runs",

View file

@ -2,7 +2,6 @@
## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268
import dotenv, os, json
import requests
import litellm
dotenv.load_dotenv() # Loading env variables using dotenv
@ -60,7 +59,7 @@ class OpenMeterLogger(CustomLogger):
"total_tokens": response_obj["usage"].get("total_tokens"),
}
subject = kwargs.get("user", None), # end-user passed in via 'user' param
subject = (kwargs.get("user", None),) # end-user passed in via 'user' param
if not subject:
raise Exception("OpenMeter: user is required")

View file

@ -3,7 +3,7 @@
# On success, log events to Prometheus
import dotenv, os
import requests
import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
@ -19,7 +19,6 @@ class PrometheusLogger:
**kwargs,
):
try:
print(f"in init prometheus metrics")
from prometheus_client import Counter
self.litellm_llm_api_failed_requests_metric = Counter(

View file

@ -4,7 +4,7 @@
import dotenv, os
import requests
import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
@ -183,7 +183,6 @@ class PrometheusServicesLogger:
)
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
print(f"received error payload: {payload.error}")
if self.mock_testing:
self.mock_testing_failure_calls += 1

View file

@ -1,12 +1,13 @@
#### What this does ####
# On success, logs events to Promptlayer
import dotenv, os
import requests
import requests # type: ignore
from pydantic import BaseModel
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
class PromptLayerLogger:
# Class variables or attributes
def __init__(self):
@ -32,7 +33,11 @@ class PromptLayerLogger:
tags = kwargs["litellm_params"]["metadata"]["pl_tags"]
# Remove "pl_tags" from metadata
metadata = {k:v for k, v in kwargs["litellm_params"]["metadata"].items() if k != "pl_tags"}
metadata = {
k: v
for k, v in kwargs["litellm_params"]["metadata"].items()
if k != "pl_tags"
}
print_verbose(
f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"

View file

@ -2,7 +2,6 @@
# On success + failure, log events to Supabase
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback

View file

@ -710,6 +710,7 @@ Model Info:
"db_exceptions",
"daily_reports",
"new_model_added",
"cooldown_deployment",
],
**kwargs,
):

View file

@ -2,7 +2,7 @@
# On success + failure, log events to Supabase
import dotenv, os
import requests
import requests # type: ignore
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback

View file

@ -1,8 +1,8 @@
import os, types, traceback
import json
from enum import Enum
import requests
import time, httpx
import requests # type: ignore
import time, httpx # type: ignore
from typing import Callable, Optional
from litellm.utils import ModelResponse, Choices, Message
import litellm

View file

@ -1,12 +1,12 @@
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time
from typing import Callable, Optional
import litellm
from litellm.utils import ModelResponse, Choices, Message, Usage
import httpx
import httpx # type: ignore
class AlephAlphaError(Exception):

View file

@ -1,7 +1,7 @@
import os, types
import json
from enum import Enum
import requests, copy
import requests, copy # type: ignore
import time
from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
@ -9,7 +9,7 @@ import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from .base import BaseLLM
import httpx
import httpx # type: ignore
class AnthropicConstants(Enum):

View file

@ -1,5 +1,5 @@
from typing import Optional, Union, Any
import types, requests
import types, requests # type: ignore
from .base import BaseLLM
from litellm.utils import (
ModelResponse,
@ -12,7 +12,7 @@ from litellm.utils import (
from typing import Callable, Optional, BinaryIO
from litellm import OpenAIConfig
import litellm, json
import httpx
import httpx # type: ignore
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
from openai import AzureOpenAI, AsyncAzureOpenAI
import uuid

View file

@ -1,5 +1,5 @@
from typing import Optional, Union, Any
import types, requests
import types, requests # type: ignore
from .base import BaseLLM
from litellm.utils import (
ModelResponse,

View file

@ -1,7 +1,7 @@
import os
import json
from enum import Enum
import requests
import requests # type: ignore
import time
from typing import Callable
from litellm.utils import ModelResponse, Usage

View file

@ -163,10 +163,9 @@ class AmazonAnthropicClaude3Config:
"stop",
"temperature",
"top_p",
"extra_headers"
"extra_headers",
]
def map_openai_params(self, non_default_params: dict, optional_params: dict):
for param, value in non_default_params.items():
if param == "max_tokens":
@ -534,10 +533,12 @@ class AmazonStabilityConfig:
def add_custom_header(headers):
"""Closure to capture the headers and add them."""
def callback(request, **kwargs):
"""Actual callback function that Boto3 will call."""
for header_name, header_value in headers.items():
request.headers.add_header(header_name, header_value)
return callback
@ -672,7 +673,9 @@ def init_bedrock_client(
config=config,
)
if extra_headers:
client.meta.events.register('before-sign.bedrock-runtime.*', add_custom_header(extra_headers))
client.meta.events.register(
"before-sign.bedrock-runtime.*", add_custom_header(extra_headers)
)
return client
@ -1224,7 +1227,7 @@ def _embedding_func_single(
"input_type", "search_document"
) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
data = {"texts": [input], **inference_params} # type: ignore
body = json.dumps(data).encode("utf-8")
body = json.dumps(data).encode("utf-8") # type: ignore
## LOGGING
request_str = f"""
response = client.invoke_model(
@ -1416,7 +1419,7 @@ def image_generation(
## LOGGING
request_str = f"""
response = client.invoke_model(
body={body},
body={body}, # type: ignore
modelId={modelId},
accept="application/json",
contentType="application/json",

View file

@ -1,11 +1,11 @@
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time
from typing import Callable, Optional
import litellm
import httpx
import httpx # type: ignore
from litellm.utils import ModelResponse, Usage
from .prompt_templates.factory import prompt_factory, custom_prompt

View file

@ -1,12 +1,12 @@
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time, traceback
from typing import Callable, Optional
from litellm.utils import ModelResponse, Choices, Message, Usage
import litellm
import httpx
import httpx # type: ignore
class CohereError(Exception):

View file

@ -1,12 +1,12 @@
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time, traceback
from typing import Callable, Optional
from litellm.utils import ModelResponse, Choices, Message, Usage
import litellm
import httpx
import httpx # type: ignore
from .prompt_templates.factory import cohere_message_pt

View file

@ -1,7 +1,7 @@
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time, traceback
from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Choices, Message, Usage

View file

@ -1,7 +1,7 @@
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time
from typing import Callable, Optional
import litellm

View file

@ -1,10 +1,10 @@
from itertools import chain
import requests, types, time
import requests, types, time # type: ignore
import json, uuid
import traceback
from typing import Optional
import litellm
import httpx, aiohttp, asyncio
import httpx, aiohttp, asyncio # type: ignore
from .prompt_templates.factory import prompt_factory, custom_prompt
@ -220,7 +220,10 @@ def get_ollama_response(
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
@ -232,7 +235,9 @@ def get_ollama_response(
model_response["created"] = int(time.time())
model_response["model"] = "ollama/" + model
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
completion_tokens = response_json.get(
"eval_count", len(response_json.get("message", dict()).get("content", ""))
)
model_response["usage"] = litellm.Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
@ -273,7 +278,10 @@ def ollama_completion_stream(url, data, logging_obj):
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
@ -314,9 +322,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
first_chunk_content = first_chunk.choices[0].delta.content or ""
response_content = first_chunk_content + "".join(
[
chunk.choices[0].delta.content
async for chunk in streamwrapper
if chunk.choices[0].delta.content]
chunk.choices[0].delta.content
async for chunk in streamwrapper
if chunk.choices[0].delta.content
]
)
function_call = json.loads(response_content)
delta = litellm.utils.Delta(
@ -324,7 +333,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
@ -373,7 +385,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
tool_calls=[
{
"id": f"call_{str(uuid.uuid4())}",
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
"function": {
"name": function_call["name"],
"arguments": json.dumps(function_call["arguments"]),
},
"type": "function",
}
],
@ -387,7 +402,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
model_response["created"] = int(time.time())
model_response["model"] = "ollama/" + data["model"]
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
completion_tokens = response_json.get(
"eval_count",
len(response_json.get("message", dict()).get("content", "")),
)
model_response["usage"] = litellm.Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
@ -475,6 +493,7 @@ async def ollama_aembeddings(
}
return model_response
def ollama_embeddings(
api_base: str,
model: str,
@ -492,5 +511,6 @@ def ollama_embeddings(
optional_params,
logging_obj,
model_response,
encoding)
encoding,
)
)

View file

@ -1,7 +1,7 @@
import os
import json
from enum import Enum
import requests
import requests # type: ignore
import time
from typing import Callable, Optional
from litellm.utils import ModelResponse, Usage

View file

@ -22,7 +22,6 @@ from litellm.utils import (
TextCompletionResponse,
)
from typing import Callable, Optional
import aiohttp, requests
import litellm
from .prompt_templates.factory import prompt_factory, custom_prompt
from openai import OpenAI, AsyncOpenAI

View file

@ -1,7 +1,7 @@
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time
from typing import Callable, Optional
import litellm

View file

@ -1,25 +1,27 @@
import json
import re
import traceback
import uuid
import xml.etree.ElementTree as ET
from enum import Enum
from typing import Any, List, Mapping, MutableMapping, Optional, Sequence
import requests
from jinja2 import BaseLoader, Template, exceptions, meta
import requests, traceback
import json, re, xml.etree.ElementTree as ET
from jinja2 import Template, exceptions, meta, BaseLoader
from jinja2.sandbox import ImmutableSandboxedEnvironment
from typing import (
Any,
List,
Mapping,
MutableMapping,
Optional,
Sequence,
)
import litellm
from litellm.types.completion import (
ChatCompletionFunctionMessageParam,
ChatCompletionMessageParam,
ChatCompletionMessageToolCallParam,
ChatCompletionSystemMessageParam,
ChatCompletionToolMessageParam,
ChatCompletionUserMessageParam,
ChatCompletionSystemMessageParam,
ChatCompletionMessageParam,
ChatCompletionFunctionMessageParam,
ChatCompletionMessageToolCallParam,
ChatCompletionToolMessageParam,
)
from litellm.types.llms.anthropic import *
import uuid
def default_pt(messages):
@ -601,9 +603,8 @@ def construct_tool_use_system_prompt(
def convert_url_to_base64(url):
import base64
import requests
import base64
for _ in range(3):
try:
@ -983,7 +984,6 @@ def anthropic_messages_pt(messages: list):
new_messages: list = []
msg_i = 0
tool_use_param = False
merge_with_previous = False
while msg_i < len(messages):
user_content = []
init_msg_i = msg_i
@ -1016,13 +1016,7 @@ def anthropic_messages_pt(messages: list):
msg_i += 1
if user_content:
if merge_with_previous:
new_messages[-1]["content"].extend(user_content)
merge_with_previous = False
else:
new_messages.append({"role": "user", "content": user_content})
elif msg_i > 0:
merge_with_previous = True
new_messages.append({"role": "user", "content": user_content})
assistant_content = []
## MERGE CONSECUTIVE ASSISTANT CONTENT ##
@ -1050,13 +1044,7 @@ def anthropic_messages_pt(messages: list):
msg_i += 1
if assistant_content:
if merge_with_previous:
new_messages[-1]["content"].extend(assistant_content)
merge_with_previous = False
else:
new_messages.append({"role": "assistant", "content": assistant_content})
elif msg_i > 0:
merge_with_previous = True
new_messages.append({"role": "assistant", "content": assistant_content})
if msg_i == init_msg_i: # prevent infinite loops
raise Exception(

View file

@ -1,11 +1,11 @@
import os, types
import json
import requests
import requests # type: ignore
import time
from typing import Callable, Optional
from litellm.utils import ModelResponse, Usage
import litellm
import httpx
import httpx # type: ignore
from .prompt_templates.factory import prompt_factory, custom_prompt

View file

@ -1,14 +1,14 @@
import os, types, traceback
from enum import Enum
import json
import requests
import requests # type: ignore
import time
from typing import Callable, Optional, Any
import litellm
from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage
import sys
from copy import deepcopy
import httpx
import httpx # type: ignore
from .prompt_templates.factory import prompt_factory, custom_prompt
@ -295,7 +295,7 @@ def completion(
EndpointName={model},
InferenceComponentName={model_id},
ContentType="application/json",
Body={data},
Body={data}, # type: ignore
CustomAttributes="accept_eula=true",
)
""" # type: ignore
@ -321,7 +321,7 @@ def completion(
response = client.invoke_endpoint(
EndpointName={model},
ContentType="application/json",
Body={data},
Body={data}, # type: ignore
CustomAttributes="accept_eula=true",
)
""" # type: ignore
@ -688,7 +688,7 @@ def embedding(
response = client.invoke_endpoint(
EndpointName={model},
ContentType="application/json",
Body={data},
Body={data}, # type: ignore
CustomAttributes="accept_eula=true",
)""" # type: ignore
logging_obj.pre_call(

View file

@ -6,11 +6,11 @@ Reference: https://docs.together.ai/docs/openai-api-compatibility
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time
from typing import Callable, Optional
import litellm
import httpx
import httpx # type: ignore
from litellm.utils import ModelResponse, Usage
from .prompt_templates.factory import prompt_factory, custom_prompt

View file

@ -1,12 +1,12 @@
import os, types
import json
from enum import Enum
import requests
import requests # type: ignore
import time
from typing import Callable, Optional, Union, List
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
import litellm, uuid
import httpx, inspect
import httpx, inspect # type: ignore
class VertexAIError(Exception):

View file

@ -3,7 +3,7 @@
import os, types
import json
from enum import Enum
import requests, copy
import requests, copy # type: ignore
import time, uuid
from typing import Callable, Optional, List
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
@ -17,7 +17,7 @@ from .prompt_templates.factory import (
extract_between_tags,
parse_xml_params,
)
import httpx
import httpx # type: ignore
class VertexAIError(Exception):

View file

@ -1,8 +1,8 @@
import os
import json
from enum import Enum
import requests
import time, httpx
import requests # type: ignore
import time, httpx # type: ignore
from typing import Callable, Any
from litellm.utils import ModelResponse, Usage
from .prompt_templates.factory import prompt_factory, custom_prompt

View file

@ -3,8 +3,8 @@ import json, types, time # noqa: E401
from contextlib import contextmanager
from typing import Callable, Dict, Optional, Any, Union, List
import httpx
import requests
import httpx # type: ignore
import requests # type: ignore
import litellm
from litellm.utils import ModelResponse, get_secret, Usage

View file

@ -12,7 +12,6 @@ from typing import Any, Literal, Union, BinaryIO
from functools import partial
import dotenv, traceback, random, asyncio, time, contextvars
from copy import deepcopy
import httpx
import litellm
from ._logging import verbose_logger
@ -2950,7 +2949,9 @@ def embedding(
model=model, # type: ignore
llm_provider="ollama", # type: ignore
)
ollama_embeddings_fn = ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
ollama_embeddings_fn = (
ollama.ollama_aembeddings if aembedding else ollama.ollama_embeddings
)
response = ollama_embeddings_fn(
api_base=api_base,
model=model,
@ -3094,6 +3095,7 @@ async def atext_completion(*args, **kwargs):
or custom_llm_provider == "huggingface"
or custom_llm_provider == "ollama"
or custom_llm_provider == "vertex_ai"
or custom_llm_provider in litellm.openai_compatible_providers
): # currently implemented aiohttp calls for just azure and openai, soon all.
# Await normally
response = await loop.run_in_executor(None, func_with_context)
@ -3124,6 +3126,8 @@ async def atext_completion(*args, **kwargs):
## TRANSLATE CHAT TO TEXT FORMAT ##
if isinstance(response, TextCompletionResponse):
return response
elif asyncio.iscoroutine(response):
response = await response
text_completion_response = TextCompletionResponse()
text_completion_response["id"] = response.get("id", None)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);

View file

@ -1 +1 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/91264d1b81b58743.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/7de0c97d470f519f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-cfde7590f67e0020.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-cfde7590f67e0020.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/91264d1b81b58743.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[58854,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"142\",\"static/chunks/142-11990a208bf93746.js\",\"931\",\"static/chunks/app/page-df6123fdb38c9b0d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/91264d1b81b58743.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"u7rM4cBDmVnacAN_shjp6\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[58854,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","142","static/chunks/142-11990a208bf93746.js","931","static/chunks/app/page-df6123fdb38c9b0d.js"],""]
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["u7rM4cBDmVnacAN_shjp6",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/91264d1b81b58743.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -17,6 +17,9 @@ model_list:
- model_name: gpt-4
litellm_params:
model: gpt-3.5-turbo
- litellm_params:
model: together_ai/codellama/CodeLlama-13b-Instruct-hf
model_name: CodeLlama-13b-Instruct
router_settings:
num_retries: 0
enable_pre_call_checks: true

View file

@ -252,7 +252,7 @@ def run_server(
if model and "ollama" in model and api_base is None:
run_ollama_serve()
if test_async is True:
import requests, concurrent, time
import requests, concurrent, time # type: ignore
api_base = f"http://{host}:{port}"
@ -418,7 +418,7 @@ def run_server(
read from there and save it to os.env['DATABASE_URL']
"""
try:
import yaml, asyncio
import yaml, asyncio # type: ignore
except:
raise ImportError(
"yaml needs to be imported. Run - `pip install 'litellm[proxy]'`"

View file

@ -30,7 +30,7 @@ sys.path.insert(
try:
import fastapi
import backoff
import yaml
import yaml # type: ignore
import orjson
import logging
from apscheduler.schedulers.asyncio import AsyncIOScheduler
@ -2516,20 +2516,24 @@ class ProxyConfig:
router = litellm.Router(**router_params) # type:ignore
return router, model_list, general_settings
def get_model_info_with_id(self, model) -> RouterModelInfo:
def get_model_info_with_id(self, model, db_model=False) -> RouterModelInfo:
"""
Common logic across add + delete router models
Parameters:
- deployment
- db_model -> flag for differentiating model stored in db vs. config -> used on UI
Return model info w/ id
"""
if model.model_info is not None and isinstance(model.model_info, dict):
if "id" not in model.model_info:
model.model_info["id"] = model.model_id
if "db_model" in model.model_info and model.model_info["db_model"] == False:
model.model_info["db_model"] = db_model
_model_info = RouterModelInfo(**model.model_info)
else:
_model_info = RouterModelInfo(id=model.model_id)
_model_info = RouterModelInfo(id=model.model_id, db_model=db_model)
return _model_info
async def _delete_deployment(self, db_models: list) -> int:
@ -2612,7 +2616,13 @@ class ProxyConfig:
for k, v in _litellm_params.items():
if isinstance(v, str):
# decode base64
decoded_b64 = base64.b64decode(v)
try:
decoded_b64 = base64.b64decode(v)
except Exception as e:
verbose_proxy_logger.error(
"Error decoding value - {}".format(v)
)
continue
# decrypt value
_value = decrypt_value(value=decoded_b64, master_key=master_key)
# sanity check if string > size 0
@ -2624,9 +2634,11 @@ class ProxyConfig:
f"Invalid model added to proxy db. Invalid litellm params. litellm_params={_litellm_params}"
)
continue # skip to next model
_model_info = self.get_model_info_with_id(model=m)
_model_info = self.get_model_info_with_id(
model=m, db_model=True
) ## 👈 FLAG = True for db_models
added = llm_router.add_deployment(
added = llm_router.upsert_deployment(
deployment=Deployment(
model_name=m.model_name,
litellm_params=_litellm_params,
@ -3487,6 +3499,11 @@ def model_list(
dependencies=[Depends(user_api_key_auth)],
tags=["chat/completions"],
)
@router.post(
"/engines/{model:path}/chat/completions",
dependencies=[Depends(user_api_key_auth)],
tags=["chat/completions"],
)
@router.post(
"/openai/deployments/{model:path}/chat/completions",
dependencies=[Depends(user_api_key_auth)],
@ -3702,6 +3719,7 @@ async def chat_completion(
"x-litellm-model-id": model_id,
"x-litellm-cache-key": cache_key,
"x-litellm-model-api-base": api_base,
"x-litellm-version": version,
}
selected_data_generator = select_data_generator(
response=response,
@ -3717,6 +3735,7 @@ async def chat_completion(
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
### CALL HOOKS ### - modify outgoing data
response = await proxy_logging_obj.post_call_success_hook(
@ -3873,14 +3892,10 @@ async def completion(
},
)
if hasattr(response, "_hidden_params"):
model_id = response._hidden_params.get("model_id", None) or ""
original_response = (
response._hidden_params.get("original_response", None) or ""
)
else:
model_id = ""
original_response = ""
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
verbose_proxy_logger.debug("final response: %s", response)
if (
@ -3888,6 +3903,9 @@ async def completion(
): # use generate_responses to stream responses
custom_headers = {
"x-litellm-model-id": model_id,
"x-litellm-cache-key": cache_key,
"x-litellm-model-api-base": api_base,
"x-litellm-version": version,
}
selected_data_generator = select_data_generator(
response=response,
@ -3902,6 +3920,10 @@ async def completion(
)
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response
except Exception as e:
data["litellm_status"] = "fail" # used for alerting
@ -3941,6 +3963,7 @@ async def completion(
) # azure compatible endpoint
async def embeddings(
request: Request,
fastapi_response: Response,
model: Optional[str] = None,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
@ -4087,6 +4110,17 @@ async def embeddings(
### ALERTING ###
data["litellm_status"] = "success" # used for alerting
### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response
except Exception as e:
data["litellm_status"] = "fail" # used for alerting
@ -4125,6 +4159,7 @@ async def embeddings(
)
async def image_generation(
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
global proxy_logging_obj
@ -4244,6 +4279,17 @@ async def image_generation(
### ALERTING ###
data["litellm_status"] = "success" # used for alerting
### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response
except Exception as e:
data["litellm_status"] = "fail" # used for alerting
@ -4280,6 +4326,7 @@ async def image_generation(
)
async def audio_transcriptions(
request: Request,
fastapi_response: Response,
file: UploadFile = File(...),
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
@ -4424,6 +4471,18 @@ async def audio_transcriptions(
### ALERTING ###
data["litellm_status"] = "success" # used for alerting
### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response
except Exception as e:
data["litellm_status"] = "fail" # used for alerting
@ -4463,6 +4522,7 @@ async def audio_transcriptions(
)
async def moderations(
request: Request,
fastapi_response: Response,
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
):
"""
@ -4587,6 +4647,17 @@ async def moderations(
### ALERTING ###
data["litellm_status"] = "success" # used for alerting
### RESPONSE HEADERS ###
hidden_params = getattr(response, "_hidden_params", {}) or {}
model_id = hidden_params.get("model_id", None) or ""
cache_key = hidden_params.get("cache_key", None) or ""
api_base = hidden_params.get("api_base", None) or ""
fastapi_response.headers["x-litellm-model-id"] = model_id
fastapi_response.headers["x-litellm-cache-key"] = cache_key
fastapi_response.headers["x-litellm-model-api-base"] = api_base
fastapi_response.headers["x-litellm-version"] = version
return response
except Exception as e:
data["litellm_status"] = "fail" # used for alerting
@ -7452,6 +7523,16 @@ async def update_model(
)
)
if _existing_litellm_params is None:
if (
llm_router is not None
and llm_router.get_deployment(model_id=_model_id) is not None
):
raise HTTPException(
status_code=400,
detail={
"error": "Can't edit model. Model in config. Store model in db via `/model/new`. to edit."
},
)
raise Exception("model not found")
_existing_litellm_params_dict = dict(
_existing_litellm_params.litellm_params
@ -7464,18 +7545,39 @@ async def update_model(
exclude_none=True
)
for key, value in _existing_litellm_params_dict.items():
if key in _new_litellm_params_dict:
_existing_litellm_params_dict[key] = _new_litellm_params_dict[key]
### ENCRYPT PARAMS ###
for k, v in _new_litellm_params_dict.items():
if isinstance(v, str):
encrypted_value = encrypt_value(value=v, master_key=master_key) # type: ignore
model_params.litellm_params[k] = base64.b64encode(
encrypted_value
).decode("utf-8")
### MERGE WITH EXISTING DATA ###
merged_dictionary = {}
_mp = model_params.litellm_params.dict()
for key, value in _mp.items():
if value is not None:
merged_dictionary[key] = value
elif (
key in _existing_litellm_params_dict
and _existing_litellm_params_dict[key] is not None
):
merged_dictionary[key] = _existing_litellm_params_dict[key]
else:
pass
_data: dict = {
"litellm_params": json.dumps(_existing_litellm_params_dict), # type: ignore
"litellm_params": json.dumps(merged_dictionary), # type: ignore
"updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
}
model_response = await prisma_client.db.litellm_proxymodeltable.update(
where={"model_id": _model_id},
data=_data, # type: ignore
)
return model_response
except Exception as e:
traceback.print_exc()
if isinstance(e, HTTPException):

View file

@ -1689,12 +1689,12 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
module_file_path = os.path.join(directory, *module_name.split("."))
module_file_path += ".py"
spec = importlib.util.spec_from_file_location(module_name, module_file_path)
spec = importlib.util.spec_from_file_location(module_name, module_file_path) # type: ignore
if spec is None:
raise ImportError(
f"Could not find a module specification for {module_file_path}"
)
module = importlib.util.module_from_spec(spec)
module = importlib.util.module_from_spec(spec) # type: ignore
spec.loader.exec_module(module) # type: ignore
else:
# Dynamically import the module

View file

@ -1862,6 +1862,10 @@ class Router:
self.cache.set_cache(
value=cached_value, key=cooldown_key, ttl=cooldown_time
)
self.send_deployment_cooldown_alert(
deployment_id=deployment, exception_status=exception_status
)
else:
self.failed_calls.set_cache(
key=deployment, value=updated_fails, ttl=cooldown_time
@ -2572,6 +2576,38 @@ class Router:
self.model_names.append(deployment.model_name)
return deployment
def upsert_deployment(self, deployment: Deployment) -> Deployment:
"""
Add or update deployment
Parameters:
- deployment: Deployment - the deployment to be added to the Router
Returns:
- The added/updated deployment
"""
# check if deployment already exists
if deployment.model_info.id in self.get_model_ids():
# remove the previous deployment
removal_idx: Optional[int] = None
for idx, model in enumerate(self.model_list):
if model["model_info"]["id"] == deployment.model_info.id:
removal_idx = idx
if removal_idx is not None:
self.model_list.pop(removal_idx)
# add to model list
_deployment = deployment.to_json(exclude_none=True)
self.model_list.append(_deployment)
# initialize client
self._add_deployment(deployment=deployment)
# add to model names
self.model_names.append(deployment.model_name)
return deployment
def delete_deployment(self, id: str) -> Optional[Deployment]:
"""
Parameters:
@ -2595,11 +2631,21 @@ class Router:
except:
return None
def get_deployment(self, model_id: str):
def get_deployment(self, model_id: str) -> Optional[Deployment]:
"""
Returns -> Deployment or None
Raise Exception -> if model found in invalid format
"""
for model in self.model_list:
if "model_info" in model and "id" in model["model_info"]:
if model_id == model["model_info"]["id"]:
return model
if isinstance(model, dict):
return Deployment(**model)
elif isinstance(model, Deployment):
return model
else:
raise Exception("Model invalid format - {}".format(type(model)))
return None
def get_model_info(self, id: str) -> Optional[dict]:
@ -3342,6 +3388,39 @@ class Router:
)
print("\033[94m\nInitialized Alerting for litellm.Router\033[0m\n") # noqa
def send_deployment_cooldown_alert(
self, deployment_id: str, exception_status: Union[str, int]
):
try:
from litellm.proxy.proxy_server import proxy_logging_obj
# trigger slack alert saying deployment is in cooldown
if (
proxy_logging_obj is not None
and proxy_logging_obj.alerting is not None
and "slack" in proxy_logging_obj.alerting
):
_deployment = self.get_deployment(model_id=deployment_id)
if _deployment is None:
return
_litellm_params = _deployment["litellm_params"]
temp_litellm_params = copy.deepcopy(_litellm_params)
temp_litellm_params = dict(temp_litellm_params)
_model_name = _deployment.get("model_name", None)
_api_base = litellm.get_api_base(
model=_model_name, optional_params=temp_litellm_params
)
asyncio.create_task(
proxy_logging_obj.slack_alerting_instance.send_alert(
message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}",
alert_type="cooldown_deployment",
level="Low",
)
)
except Exception as e:
pass
def flush_cache(self):
litellm.cache = None
self.cache.flush_cache()

View file

@ -6,7 +6,7 @@
# - use litellm.success + failure callbacks to log when a request completed
# - in get_available_deployment, for a given model group name -> pick based on traffic
import dotenv, os, requests, random
import dotenv, os, requests, random # type: ignore
from typing import Optional
dotenv.load_dotenv() # Loading env variables using dotenv

View file

@ -1,7 +1,7 @@
#### What this does ####
# picks based on response time (for streaming, this is time to first token)
from pydantic import BaseModel, Extra, Field, root_validator
import dotenv, os, requests, random
import dotenv, os, requests, random # type: ignore
from typing import Optional, Union, List, Dict
from datetime import datetime, timedelta
import random

View file

@ -1,7 +1,7 @@
#### What this does ####
# picks based on response time (for streaming, this is time to first token)
from pydantic import BaseModel, Extra, Field, root_validator
import dotenv, os, requests, random
from pydantic import BaseModel, Extra, Field, root_validator # type: ignore
import dotenv, os, requests, random # type: ignore
from typing import Optional, Union, List, Dict
from datetime import datetime, timedelta
import random

View file

@ -1,21 +1,17 @@
import os
import sys
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import io
import os
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the, system path
import pytest
import litellm
from litellm import (RateLimitError, Timeout, completion, completion_cost,
embedding)
from litellm import embedding, completion, completion_cost, Timeout
from litellm import RateLimitError
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
# litellm.num_retries=3
@ -167,32 +163,6 @@ def test_completion_claude_3():
pytest.fail(f"Error occurred: {e}")
def test_completion_claude_3_empty_message():
litellm.set_verbose = True
messages = [{'role': 'user', 'content': 'please create a logo for a modern AI app. create in SVG format'},
{'role': 'assistant', 'content': "To create a logo for a modern AI app in SVG format, I'll use the DALL-E 3 Image Generator."},
{'role': 'user', 'content': 'output SVG'},
{'role': 'assistant', 'content': 'To generate a logo for a modern AI app in SVG format using DALL-E 3, I need to:\n1. Craft a detailed prompt describing the desired logo style and elements\n2. Specify the image size (SVG is vector-based, so size is less relevant)\n3. Call the generate_image function with the prompt and size\n4. Display the generated SVG logo using the provided syntax\nThe prompt should include keywords related to AI, modern design, and SVG format. Some key elements to incorporate could be a brain symbol, circuit lines, or a robot icon, using a minimalist style and a blue color scheme often associated with technology and intelligence.',
'tool_calls': [
{'id': 'toolu_01KEUtRVySSeMrf3g7rCA12E', 'type': 'function', 'function': {'name': 'python_tool', 'arguments': '{"code": "...python code..."}'}}
]},
{'role': 'tool', 'content': '...python output...', 'tool_call_id': 'toolu_01KEUtRVySSeMrf3g7rCA12E'},
{'role': 'assistant', 'content': ''}, # empty message appended by model after tool call response
{'role': 'user', 'content': 'write SVG source youself!'},
]
try:
response = completion(
model="anthropic/claude-3-opus-20240229",
messages=messages,
stream=True,
tools=[{'type': 'function', 'function': {'name': 'python_tool', 'description': 'Execute code', 'parameters': {'type': 'object', 'properties': {'headline': {'description': 'Must have. Title of this tool call (maximum 15 characters).', 'type': 'string'}, 'code': {'description': 'Python code to execute.', 'type': 'string'}}, 'required': ['code', 'headline']}}}]
)
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_claude_3_function_call():
litellm.set_verbose = True
tools = [

View file

@ -231,14 +231,17 @@ def test_cost_bedrock_pricing():
assert cost == predicted_cost
@pytest.mark.skip(reason="AWS disabled our access")
def test_cost_bedrock_pricing_actual_calls():
litellm.set_verbose = True
model = "anthropic.claude-instant-v1"
messages = [{"role": "user", "content": "Hey, how's it going?"}]
response = litellm.completion(model=model, messages=messages)
assert response._hidden_params["region_name"] is not None
response = litellm.completion(
model=model, messages=messages, mock_response="hello cool one"
)
print("response", response)
cost = litellm.completion_cost(
model="bedrock/anthropic.claude-instant-v1",
completion_response=response,
messages=[{"role": "user", "content": "Hey, how's it going?"}],
)

View file

@ -140,6 +140,8 @@ async def test_add_existing_deployment():
deployment_2.to_json(exclude_none=True),
]
)
init_len_list = len(llm_router.model_list)
print(f"llm_router: {llm_router}")
master_key = "sk-1234"
setattr(litellm.proxy.proxy_server, "llm_router", llm_router)
@ -164,7 +166,7 @@ async def test_add_existing_deployment():
db_models = [db_model]
num_added = pc._add_deployment(db_models=db_models)
assert num_added == 0
assert init_len_list == len(llm_router.model_list)
litellm_params = LiteLLM_Params(

View file

@ -0,0 +1,143 @@
# What is this?
## Test to make sure function call response always works with json.loads() -> no extra parsing required. Relevant issue - https://github.com/BerriAI/litellm/issues/2654
import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os, io
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import pytest
import litellm
import json
import warnings
from litellm import completion
from typing import List
# Just a stub to keep the sample code simple
class Trade:
def __init__(self, order: dict):
self.order = order
@staticmethod
def buy(order: dict):
return Trade(order)
@staticmethod
def sell(order: dict):
return Trade(order)
def trade(model_name: str) -> List[Trade]:
def parse_order(order: dict) -> Trade:
action = order["action"]
if action == "buy":
return Trade.buy(order)
elif action == "sell":
return Trade.sell(order)
else:
raise ValueError(f"Invalid action {action}")
def parse_call(call) -> List[Trade]:
arguments = json.loads(call.function.arguments)
trades = [parse_order(order) for order in arguments["orders"]]
return trades
tool_spec = {
"type": "function",
"function": {
"name": "trade",
"description": "Execute orders to manage the portfolio. Orders will be executed immediately at the stated prices.",
"parameters": {
"type": "object",
"properties": {
"orders": {
"type": "array",
"items": {
"type": "object",
"properties": {
"action": {"type": "string", "enum": ["buy", "sell"]},
"asset": {"type": "string"},
"amount": {
"type": "number",
"description": "Amount of asset to buy or sell.",
},
},
"required": ["action", "asset", "amount"],
},
},
},
},
},
}
response = completion(
model_name,
[
{
"role": "system",
"content": """You are an expert asset manager, managing a portfolio.
Always use the `trade` function. Make sure that you call it correctly. For example, the following is a valid call:
```
trade({
"orders": [
{"action": "buy", "asset": "BTC", "amount": 0.1},
{"action": "sell", "asset": "ETH", "amount": 0.2}
]
})
```
If there are no trades to make, call `trade` with an empty array:
```
trade({ "orders": [] })
```
""",
},
{
"role": "user",
"content": """Manage the portfolio.
Don't jabber.
This is the current market data:
```
{market_data}
```
Your portfolio is as follows:
```
{portfolio}
```
""".replace(
"{market_data}", "BTC: 64,000 USD\nETH: 3,500 USD"
).replace(
"{portfolio}", "USD: 1000, BTC: 0.1, ETH: 0.2"
),
},
],
tools=[tool_spec],
tool_choice={
"type": "function",
"function": {"name": tool_spec["function"]["name"]}, # type: ignore
},
)
calls = response.choices[0].message.tool_calls
trades = [trade for call in calls for trade in parse_call(call)]
return trades
@pytest.mark.parametrize(
"model", ["claude-3-haiku-20240307", "anthropic.claude-3-haiku-20240307-v1:0"]
)
def test_function_call_parsing(model):
trades = trade(model)
print([trade.order for trade in trades])

View file

@ -117,6 +117,7 @@ def test_embedding(client):
"input_cost_per_token": 0.002,
"mode": "embedding",
"id": "hello",
"db_model": False,
}
result = response.json()
print(f"Received response: {result}")
@ -191,6 +192,7 @@ def test_chat_completion(client):
"id": "gm",
"input_cost_per_token": 0.0002,
"mode": "chat",
"db_model": False,
}
assert proxy_server_request_object == {
"url": "http://testserver/chat/completions",

View file

@ -160,7 +160,40 @@ def test_chat_completion(mock_acompletion, client_no_auth):
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
# Run the test
@mock_patch_acompletion()
def test_engines_model_chat_completions(mock_acompletion, client_no_auth):
global headers
try:
# Your test data
test_data = {
"model": "gpt-3.5-turbo",
"messages": [
{"role": "user", "content": "hi"},
],
"max_tokens": 10,
}
print("testing proxy server with chat completions")
response = client_no_auth.post("/engines/gpt-3.5-turbo/chat/completions", json=test_data)
mock_acompletion.assert_called_once_with(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": "hi"},
],
max_tokens=10,
litellm_call_id=mock.ANY,
litellm_logging_obj=mock.ANY,
request_timeout=mock.ANY,
specific_deployment=True,
metadata=mock.ANY,
proxy_server_request=mock.ANY,
)
print(f"response - {response.text}")
assert response.status_code == 200
result = response.json()
print(f"Received response: {result}")
except Exception as e:
pytest.fail(f"LiteLLM Proxy test failed. Exception - {str(e)}")
@mock_patch_acompletion()

View file

@ -3990,6 +3990,26 @@ def test_async_text_completion():
asyncio.run(test_get_response())
def test_async_text_completion_together_ai():
litellm.set_verbose = True
print("test_async_text_completion")
async def test_get_response():
try:
response = await litellm.atext_completion(
model="together_ai/codellama/CodeLlama-13b-Instruct-hf",
prompt="good morning",
max_tokens=10,
)
print(f"response: {response}")
except litellm.Timeout as e:
print(e)
except Exception as e:
pytest.fail("An unexpected error occurred")
asyncio.run(test_get_response())
# test_async_text_completion()

View file

@ -1,6 +1,6 @@
from typing import List, Optional, Union, Dict, Tuple, Literal
import httpx
from pydantic import BaseModel, validator
from pydantic import BaseModel, validator, Field
from .completion import CompletionRequest
from .embedding import EmbeddingRequest
import uuid, enum
@ -65,11 +65,17 @@ class UpdateRouterConfig(BaseModel):
fallbacks: Optional[List[dict]] = None
context_window_fallbacks: Optional[List[dict]] = None
class Config:
protected_namespaces = ()
class ModelInfo(BaseModel):
id: Optional[
str
] # Allow id to be optional on input, but it will always be present as a str in the model instance
db_model: bool = (
False # used for proxy - to separate models which are stored in the db vs. config.
)
def __init__(self, id: Optional[Union[str, int]] = None, **params):
if id is None:
@ -124,6 +130,11 @@ class GenericLiteLLMParams(BaseModel):
aws_access_key_id: Optional[str] = None
aws_secret_access_key: Optional[str] = None
aws_region_name: Optional[str] = None
## CUSTOM PRICING ##
input_cost_per_token: Optional[float] = None
output_cost_per_token: Optional[float] = None
input_cost_per_second: Optional[float] = None
output_cost_per_second: Optional[float] = None
def __init__(
self,
@ -146,6 +157,10 @@ class GenericLiteLLMParams(BaseModel):
aws_access_key_id: Optional[str] = None,
aws_secret_access_key: Optional[str] = None,
aws_region_name: Optional[str] = None,
input_cost_per_token: Optional[float] = None,
output_cost_per_token: Optional[float] = None,
input_cost_per_second: Optional[float] = None,
output_cost_per_second: Optional[float] = None,
**params
):
args = locals()
@ -239,28 +254,10 @@ class LiteLLM_Params(GenericLiteLLMParams):
setattr(self, key, value)
class updateLiteLLMParams(BaseModel):
class updateLiteLLMParams(GenericLiteLLMParams):
# This class is used to update the LiteLLM_Params
# only differece is model is optional
model: Optional[str] = None
tpm: Optional[int] = None
rpm: Optional[int] = None
api_key: Optional[str] = None
api_base: Optional[str] = None
api_version: Optional[str] = None
timeout: Optional[Union[float, str]] = None # if str, pass in as os.environ/
stream_timeout: Optional[Union[float, str]] = (
None # timeout when making stream=True calls, if str, pass in as os.environ/
)
max_retries: int = 2 # follows openai default of 2
organization: Optional[str] = None # for openai orgs
## VERTEX AI ##
vertex_project: Optional[str] = None
vertex_location: Optional[str] = None
## AWS BEDROCK / SAGEMAKER ##
aws_access_key_id: Optional[str] = None
aws_secret_access_key: Optional[str] = None
aws_region_name: Optional[str] = None
class updateDeployment(BaseModel):

View file

@ -14,7 +14,7 @@ import subprocess, os
from os.path import abspath, join, dirname
import litellm, openai
import itertools
import random, uuid, requests
import random, uuid, requests # type: ignore
from functools import wraps
import datetime, time
import tiktoken
@ -36,7 +36,7 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
try:
# this works in python 3.8
import pkg_resources
import pkg_resources # type: ignore
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
# try:
@ -4161,8 +4161,30 @@ def cost_per_token(
model_with_provider_and_region in model_cost_ref
): # use region based pricing, if it's available
model_with_provider = model_with_provider_and_region
if model_with_provider in model_cost_ref:
model_without_prefix = model
model_parts = model.split("/")
if len(model_parts) > 1:
model_without_prefix = model_parts[1]
else:
model_without_prefix = model
"""
Code block that formats model to lookup in litellm.model_cost
Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
Option2. model = "openai/gpt-4" - model = provider/model
Option3. model = "anthropic.claude-3" - model = model
"""
if (
model_with_provider in model_cost_ref
): # Option 2. use model with provider, model = "openai/gpt-4"
model = model_with_provider
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
model = model
elif (
model_without_prefix in model_cost_ref
): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
model = model_without_prefix
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
print_verbose(f"Looking up model={model} in model_cost_map")
if model in model_cost_ref:
@ -7732,11 +7754,11 @@ def _calculate_retry_after(
try:
retry_after = int(retry_header)
except Exception:
retry_date_tuple = email.utils.parsedate_tz(retry_header)
retry_date_tuple = email.utils.parsedate_tz(retry_header) # type: ignore
if retry_date_tuple is None:
retry_after = -1
else:
retry_date = email.utils.mktime_tz(retry_date_tuple)
retry_date = email.utils.mktime_tz(retry_date_tuple) # type: ignore
retry_after = int(retry_date - time.time())
else:
retry_after = -1
@ -9423,7 +9445,9 @@ def get_secret(
else:
secret = os.environ.get(secret_name)
try:
secret_value_as_bool = ast.literal_eval(secret) if secret is not None else None
secret_value_as_bool = (
ast.literal_eval(secret) if secret is not None else None
)
if isinstance(secret_value_as_bool, bool):
return secret_value_as_bool
else:

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
version = "1.36.2"
version = "1.36.3"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
version = "1.36.2"
version = "1.36.3"
version_files = [
"pyproject.toml:^version"
]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/91264d1b81b58743.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();

File diff suppressed because one or more lines are too long

View file

@ -1 +0,0 @@
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();

View file

@ -1 +0,0 @@
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-cfde7590f67e0020.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-cfde7590f67e0020.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/91264d1b81b58743.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[58854,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"142\",\"static/chunks/142-11990a208bf93746.js\",\"931\",\"static/chunks/app/page-df6123fdb38c9b0d.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/91264d1b81b58743.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"u7rM4cBDmVnacAN_shjp6\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""]
3:I[58854,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","142","static/chunks/142-11990a208bf93746.js","931","static/chunks/app/page-df6123fdb38c9b0d.js"],""]
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""]
4:I[5613,[],""]
5:I[31778,[],""]
0:["u7rM4cBDmVnacAN_shjp6",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/91264d1b81b58743.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null

View file

@ -37,7 +37,7 @@ import { Badge, BadgeDelta, Button } from "@tremor/react";
import RequestAccess from "./request_model_access";
import { Typography } from "antd";
import TextArea from "antd/es/input/TextArea";
import { InformationCircleIcon, PencilAltIcon, PencilIcon, StatusOnlineIcon, TrashIcon, RefreshIcon } from "@heroicons/react/outline";
import { InformationCircleIcon, PencilAltIcon, PencilIcon, StatusOnlineIcon, TrashIcon, RefreshIcon, CheckCircleIcon, XCircleIcon } from "@heroicons/react/outline";
import DeleteModelButton from "./delete_model_button";
const { Title: Title2, Link } = Typography;
import { UploadOutlined } from '@ant-design/icons';
@ -332,7 +332,24 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
<InputNumber min={0} step={1} />
</Form.Item>
<Form.Item
label="input_cost_per_token"
name="input_cost_per_token"
tooltip="float (optional) - Input cost per token"
>
<InputNumber min={0} step={0.0001} />
</Form.Item>
<Form.Item
label="output_cost_per_token"
name="output_cost_per_token"
tooltip="float (optional) - Output cost per token"
>
<InputNumber min={0} step={0.0001} />
</Form.Item>
<Form.Item
@ -394,19 +411,16 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
console.log("handleEditSubmit payload:", payload);
let newModelValue = await modelUpdateCall(accessToken, payload);
try {
let newModelValue = await modelUpdateCall(accessToken, payload);
message.success("Model updated successfully, restart server to see updates");
// Update the teams state with the updated team data
// if (teams) {
// const updatedTeams = teams.map((team) =>
// team.team_id === teamId ? newTeamValues.data : team
// );
// setTeams(updatedTeams);
// }
message.success("Model updated successfully, restart server to see updates");
setEditModalVisible(false);
setSelectedModel(null);
} catch (error) {
console.log(`Error occurred`)
}
setEditModalVisible(false);
setSelectedModel(null);
};
@ -921,6 +935,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
<TableHeaderCell>Input Price per token ($)</TableHeaderCell>
<TableHeaderCell>Output Price per token ($)</TableHeaderCell>
<TableHeaderCell>Max Tokens</TableHeaderCell>
<TableHeaderCell>Status</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
@ -929,6 +944,7 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
selectedModelGroup === "all" || model.model_name === selectedModelGroup || selectedModelGroup === null || selectedModelGroup === undefined || selectedModelGroup === ""
)
.map((model: any, index: number) => (
<TableRow key={index}>
<TableCell>
<Text>{model.model_name}</Text>
@ -954,10 +970,15 @@ const handleEditSubmit = async (formValues: Record<string, any>) => {
</Accordion>
</TableCell>
<TableCell>{model.input_cost}</TableCell>
<TableCell>{model.output_cost}</TableCell>
<TableCell>{model.input_cost || model.litellm_params.input_cost_per_token || null}</TableCell>
<TableCell>{model.output_cost || model.litellm_params.output_cost_per_token || null}</TableCell>
<TableCell>{model.max_tokens}</TableCell>
<TableCell>
{
model.model_info.db_model ? <Badge icon={CheckCircleIcon} className="text-white">DB Model</Badge> : <Badge icon={XCircleIcon} className="text-black">Config Model</Badge>
}
</TableCell>
<TableCell>
<Icon
icon={PencilAltIcon}

Some files were not shown because too many files have changed in this diff Show more