mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Merge branch 'main' into litellm_region_based_routing
This commit is contained in:
commit
64ca2fde53
85 changed files with 793 additions and 448 deletions
|
@ -16,11 +16,11 @@ repos:
|
||||||
name: Check if files match
|
name: Check if files match
|
||||||
entry: python3 ci_cd/check_files_match.py
|
entry: python3 ci_cd/check_files_match.py
|
||||||
language: system
|
language: system
|
||||||
- repo: local
|
# - repo: local
|
||||||
hooks:
|
# hooks:
|
||||||
- id: mypy
|
# - id: mypy
|
||||||
name: mypy
|
# name: mypy
|
||||||
entry: python3 -m mypy --ignore-missing-imports
|
# entry: python3 -m mypy --ignore-missing-imports
|
||||||
language: system
|
# language: system
|
||||||
types: [python]
|
# types: [python]
|
||||||
files: ^litellm/
|
# files: ^litellm/
|
|
@ -291,7 +291,7 @@ def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
||||||
|
|
||||||
|
|
||||||
def _forecast_daily_cost(data: list):
|
def _forecast_daily_cost(data: list):
|
||||||
import requests
|
import requests # type: ignore
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
if len(data) == 0:
|
if len(data) == 0:
|
||||||
|
|
|
@ -10,8 +10,8 @@
|
||||||
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
|
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
|
||||||
import os
|
import os
|
||||||
import inspect
|
import inspect
|
||||||
import redis, litellm
|
import redis, litellm # type: ignore
|
||||||
import redis.asyncio as async_redis
|
import redis.asyncio as async_redis # type: ignore
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
import os, json, time
|
import os, json, time
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.utils import ModelResponse
|
from litellm.utils import ModelResponse
|
||||||
import requests, threading
|
import requests, threading # type: ignore
|
||||||
from typing import Optional, Union, Literal
|
from typing import Optional, Union, Literal
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success + failure, log events to aispend.io
|
# On success + failure, log events to aispend.io
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
|
|
@ -4,18 +4,30 @@ import datetime
|
||||||
class AthinaLogger:
|
class AthinaLogger:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
import os
|
import os
|
||||||
|
|
||||||
self.athina_api_key = os.getenv("ATHINA_API_KEY")
|
self.athina_api_key = os.getenv("ATHINA_API_KEY")
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"athina-api-key": self.athina_api_key,
|
"athina-api-key": self.athina_api_key,
|
||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
|
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
|
||||||
self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response", "user_query"]
|
self.additional_keys = [
|
||||||
|
"environment",
|
||||||
|
"prompt_slug",
|
||||||
|
"customer_id",
|
||||||
|
"customer_user_id",
|
||||||
|
"session_id",
|
||||||
|
"external_reference_id",
|
||||||
|
"context",
|
||||||
|
"expected_response",
|
||||||
|
"user_query",
|
||||||
|
]
|
||||||
|
|
||||||
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import json
|
import json
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response_json = response_obj.model_dump() if response_obj else {}
|
response_json = response_obj.model_dump() if response_obj else {}
|
||||||
data = {
|
data = {
|
||||||
|
@ -23,19 +35,30 @@ class AthinaLogger:
|
||||||
"request": kwargs,
|
"request": kwargs,
|
||||||
"response": response_json,
|
"response": response_json,
|
||||||
"prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"),
|
"prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"),
|
||||||
"completion_tokens": response_json.get("usage", {}).get("completion_tokens"),
|
"completion_tokens": response_json.get("usage", {}).get(
|
||||||
|
"completion_tokens"
|
||||||
|
),
|
||||||
"total_tokens": response_json.get("usage", {}).get("total_tokens"),
|
"total_tokens": response_json.get("usage", {}).get("total_tokens"),
|
||||||
}
|
}
|
||||||
|
|
||||||
if type(end_time) == datetime.datetime and type(start_time) == datetime.datetime:
|
if (
|
||||||
data["response_time"] = int((end_time - start_time).total_seconds() * 1000)
|
type(end_time) == datetime.datetime
|
||||||
|
and type(start_time) == datetime.datetime
|
||||||
|
):
|
||||||
|
data["response_time"] = int(
|
||||||
|
(end_time - start_time).total_seconds() * 1000
|
||||||
|
)
|
||||||
|
|
||||||
if "messages" in kwargs:
|
if "messages" in kwargs:
|
||||||
data["prompt"] = kwargs.get("messages", None)
|
data["prompt"] = kwargs.get("messages", None)
|
||||||
|
|
||||||
# Directly add tools or functions if present
|
# Directly add tools or functions if present
|
||||||
optional_params = kwargs.get("optional_params", {})
|
optional_params = kwargs.get("optional_params", {})
|
||||||
data.update((k, v) for k, v in optional_params.items() if k in ["tools", "functions"])
|
data.update(
|
||||||
|
(k, v)
|
||||||
|
for k, v in optional_params.items()
|
||||||
|
if k in ["tools", "functions"]
|
||||||
|
)
|
||||||
|
|
||||||
# Add additional metadata keys
|
# Add additional metadata keys
|
||||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||||
|
@ -44,11 +67,19 @@ class AthinaLogger:
|
||||||
if key in metadata:
|
if key in metadata:
|
||||||
data[key] = metadata[key]
|
data[key] = metadata[key]
|
||||||
|
|
||||||
response = requests.post(self.athina_logging_url, headers=self.headers, data=json.dumps(data, default=str))
|
response = requests.post(
|
||||||
|
self.athina_logging_url,
|
||||||
|
headers=self.headers,
|
||||||
|
data=json.dumps(data, default=str),
|
||||||
|
)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
print_verbose(f"Athina Logger Error - {response.text}, {response.status_code}")
|
print_verbose(
|
||||||
|
f"Athina Logger Error - {response.text}, {response.status_code}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print_verbose(f"Athina Logger Succeeded - {response.text}")
|
print_verbose(f"Athina Logger Succeeded - {response.text}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}")
|
print_verbose(
|
||||||
|
f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}"
|
||||||
|
)
|
||||||
pass
|
pass
|
|
@ -1,7 +1,7 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success + failure, log events to aispend.io
|
# On success + failure, log events to aispend.io
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests # type: ignore
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success, logs events to Promptlayer
|
# On success, logs events to Promptlayer
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
|
||||||
|
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success, logs events to Promptlayer
|
# On success, logs events to Promptlayer
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
|
||||||
|
|
||||||
from litellm.proxy._types import UserAPIKeyAuth
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
from litellm.caching import DualCache
|
from litellm.caching import DualCache
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# On success + failure, log events to Supabase
|
# On success + failure, log events to Supabase
|
||||||
|
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests # type: ignore
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# On success + failure, log events to Supabase
|
# On success + failure, log events to Supabase
|
||||||
|
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests # type: ignore
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import json
|
import json
|
||||||
import traceback
|
import traceback
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
class GreenscaleLogger:
|
class GreenscaleLogger:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
import os
|
import os
|
||||||
|
|
||||||
self.greenscale_api_key = os.getenv("GREENSCALE_API_KEY")
|
self.greenscale_api_key = os.getenv("GREENSCALE_API_KEY")
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"api-key": self.greenscale_api_key,
|
"api-key": self.greenscale_api_key,
|
||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
self.greenscale_logging_url = os.getenv("GREENSCALE_ENDPOINT")
|
self.greenscale_logging_url = os.getenv("GREENSCALE_ENDPOINT")
|
||||||
|
|
||||||
|
@ -19,13 +21,18 @@ class GreenscaleLogger:
|
||||||
data = {
|
data = {
|
||||||
"modelId": kwargs.get("model"),
|
"modelId": kwargs.get("model"),
|
||||||
"inputTokenCount": response_json.get("usage", {}).get("prompt_tokens"),
|
"inputTokenCount": response_json.get("usage", {}).get("prompt_tokens"),
|
||||||
"outputTokenCount": response_json.get("usage", {}).get("completion_tokens"),
|
"outputTokenCount": response_json.get("usage", {}).get(
|
||||||
|
"completion_tokens"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
data["timestamp"] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
data["timestamp"] = datetime.now(timezone.utc).strftime(
|
||||||
|
"%Y-%m-%dT%H:%M:%SZ"
|
||||||
|
)
|
||||||
|
|
||||||
if type(end_time) == datetime and type(start_time) == datetime:
|
if type(end_time) == datetime and type(start_time) == datetime:
|
||||||
data["invocationLatency"] = int((end_time - start_time).total_seconds() * 1000)
|
data["invocationLatency"] = int(
|
||||||
|
(end_time - start_time).total_seconds() * 1000
|
||||||
|
)
|
||||||
|
|
||||||
# Add additional metadata keys to tags
|
# Add additional metadata keys to tags
|
||||||
tags = []
|
tags = []
|
||||||
|
@ -37,15 +44,25 @@ class GreenscaleLogger:
|
||||||
elif key == "greenscale_application":
|
elif key == "greenscale_application":
|
||||||
data["application"] = value
|
data["application"] = value
|
||||||
else:
|
else:
|
||||||
tags.append({"key": key.replace("greenscale_", ""), "value": str(value)})
|
tags.append(
|
||||||
|
{"key": key.replace("greenscale_", ""), "value": str(value)}
|
||||||
|
)
|
||||||
|
|
||||||
data["tags"] = tags
|
data["tags"] = tags
|
||||||
|
|
||||||
response = requests.post(self.greenscale_logging_url, headers=self.headers, data=json.dumps(data, default=str))
|
response = requests.post(
|
||||||
|
self.greenscale_logging_url,
|
||||||
|
headers=self.headers,
|
||||||
|
data=json.dumps(data, default=str),
|
||||||
|
)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
print_verbose(f"Greenscale Logger Error - {response.text}, {response.status_code}")
|
print_verbose(
|
||||||
|
f"Greenscale Logger Error - {response.text}, {response.status_code}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
print_verbose(f"Greenscale Logger Succeeded - {response.text}")
|
print_verbose(f"Greenscale Logger Succeeded - {response.text}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print_verbose(f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}")
|
print_verbose(
|
||||||
|
f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}"
|
||||||
|
)
|
||||||
pass
|
pass
|
|
@ -1,7 +1,7 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success, logs events to Helicone
|
# On success, logs events to Helicone
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
|
|
@ -1,15 +1,14 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success, logs events to Langsmith
|
# On success, logs events to Langsmith
|
||||||
import dotenv, os
|
import dotenv, os # type: ignore
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import requests
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
import asyncio
|
import asyncio
|
||||||
import types
|
import types
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def is_serializable(value):
|
def is_serializable(value):
|
||||||
|
@ -79,8 +78,6 @@ class LangsmithLogger:
|
||||||
except:
|
except:
|
||||||
response_obj = response_obj.dict() # type: ignore
|
response_obj = response_obj.dict() # type: ignore
|
||||||
|
|
||||||
print(f"response_obj: {response_obj}")
|
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"name": run_name,
|
"name": run_name,
|
||||||
"run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
|
"run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
|
||||||
|
@ -90,7 +87,6 @@ class LangsmithLogger:
|
||||||
"start_time": start_time,
|
"start_time": start_time,
|
||||||
"end_time": end_time,
|
"end_time": end_time,
|
||||||
}
|
}
|
||||||
print(f"data: {data}")
|
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
"https://api.smith.langchain.com/runs",
|
"https://api.smith.langchain.com/runs",
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268
|
## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268
|
||||||
|
|
||||||
import dotenv, os, json
|
import dotenv, os, json
|
||||||
import requests
|
|
||||||
import litellm
|
import litellm
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
@ -60,7 +59,7 @@ class OpenMeterLogger(CustomLogger):
|
||||||
"total_tokens": response_obj["usage"].get("total_tokens"),
|
"total_tokens": response_obj["usage"].get("total_tokens"),
|
||||||
}
|
}
|
||||||
|
|
||||||
subject = kwargs.get("user", None), # end-user passed in via 'user' param
|
subject = (kwargs.get("user", None),) # end-user passed in via 'user' param
|
||||||
if not subject:
|
if not subject:
|
||||||
raise Exception("OpenMeter: user is required")
|
raise Exception("OpenMeter: user is required")
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
# On success, log events to Prometheus
|
# On success, log events to Prometheus
|
||||||
|
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests # type: ignore
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -19,7 +19,6 @@ class PrometheusLogger:
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
print(f"in init prometheus metrics")
|
|
||||||
from prometheus_client import Counter
|
from prometheus_client import Counter
|
||||||
|
|
||||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
|
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests # type: ignore
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
@ -183,7 +183,6 @@ class PrometheusServicesLogger:
|
||||||
)
|
)
|
||||||
|
|
||||||
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
|
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
|
||||||
print(f"received error payload: {payload.error}")
|
|
||||||
if self.mock_testing:
|
if self.mock_testing:
|
||||||
self.mock_testing_failure_calls += 1
|
self.mock_testing_failure_calls += 1
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# On success, logs events to Promptlayer
|
# On success, logs events to Promptlayer
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests # type: ignore
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
class PromptLayerLogger:
|
class PromptLayerLogger:
|
||||||
# Class variables or attributes
|
# Class variables or attributes
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
@ -32,7 +33,11 @@ class PromptLayerLogger:
|
||||||
tags = kwargs["litellm_params"]["metadata"]["pl_tags"]
|
tags = kwargs["litellm_params"]["metadata"]["pl_tags"]
|
||||||
|
|
||||||
# Remove "pl_tags" from metadata
|
# Remove "pl_tags" from metadata
|
||||||
metadata = {k:v for k, v in kwargs["litellm_params"]["metadata"].items() if k != "pl_tags"}
|
metadata = {
|
||||||
|
k: v
|
||||||
|
for k, v in kwargs["litellm_params"]["metadata"].items()
|
||||||
|
if k != "pl_tags"
|
||||||
|
}
|
||||||
|
|
||||||
print_verbose(
|
print_verbose(
|
||||||
f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
|
f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
# On success + failure, log events to Supabase
|
# On success + failure, log events to Supabase
|
||||||
|
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
# On success + failure, log events to Supabase
|
# On success + failure, log events to Supabase
|
||||||
|
|
||||||
import dotenv, os
|
import dotenv, os
|
||||||
import requests
|
import requests # type: ignore
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
import traceback
|
import traceback
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import os, types, traceback
|
import os, types, traceback
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time, httpx
|
import time, httpx # type: ignore
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
from litellm.utils import ModelResponse, Choices, Message
|
from litellm.utils import ModelResponse, Choices, Message
|
||||||
import litellm
|
import litellm
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class AlephAlphaError(Exception):
|
class AlephAlphaError(Exception):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests, copy
|
import requests, copy # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional, List
|
from typing import Callable, Optional, List
|
||||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||||
|
@ -9,7 +9,7 @@ import litellm
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
from .base import BaseLLM
|
from .base import BaseLLM
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class AnthropicConstants(Enum):
|
class AnthropicConstants(Enum):
|
||||||
|
|
|
@ -12,7 +12,7 @@ from litellm.utils import (
|
||||||
from typing import Callable, Optional, BinaryIO
|
from typing import Callable, Optional, BinaryIO
|
||||||
from litellm import OpenAIConfig
|
from litellm import OpenAIConfig
|
||||||
import litellm, json
|
import litellm, json
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
|
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
|
||||||
from openai import AzureOpenAI, AsyncAzureOpenAI
|
from openai import AzureOpenAI, AsyncAzureOpenAI
|
||||||
import uuid
|
import uuid
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
from typing import Optional, Union, Any
|
from typing import Optional, Union, Any
|
||||||
import types, requests
|
import types, requests # type: ignore
|
||||||
from .base import BaseLLM
|
from .base import BaseLLM
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
ModelResponse,
|
ModelResponse,
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
from litellm.utils import ModelResponse, Usage
|
from litellm.utils import ModelResponse, Usage
|
||||||
|
|
|
@ -163,10 +163,9 @@ class AmazonAnthropicClaude3Config:
|
||||||
"stop",
|
"stop",
|
||||||
"temperature",
|
"temperature",
|
||||||
"top_p",
|
"top_p",
|
||||||
"extra_headers"
|
"extra_headers",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||||
for param, value in non_default_params.items():
|
for param, value in non_default_params.items():
|
||||||
if param == "max_tokens":
|
if param == "max_tokens":
|
||||||
|
@ -534,10 +533,12 @@ class AmazonStabilityConfig:
|
||||||
|
|
||||||
def add_custom_header(headers):
|
def add_custom_header(headers):
|
||||||
"""Closure to capture the headers and add them."""
|
"""Closure to capture the headers and add them."""
|
||||||
|
|
||||||
def callback(request, **kwargs):
|
def callback(request, **kwargs):
|
||||||
"""Actual callback function that Boto3 will call."""
|
"""Actual callback function that Boto3 will call."""
|
||||||
for header_name, header_value in headers.items():
|
for header_name, header_value in headers.items():
|
||||||
request.headers.add_header(header_name, header_value)
|
request.headers.add_header(header_name, header_value)
|
||||||
|
|
||||||
return callback
|
return callback
|
||||||
|
|
||||||
|
|
||||||
|
@ -672,7 +673,9 @@ def init_bedrock_client(
|
||||||
config=config,
|
config=config,
|
||||||
)
|
)
|
||||||
if extra_headers:
|
if extra_headers:
|
||||||
client.meta.events.register('before-sign.bedrock-runtime.*', add_custom_header(extra_headers))
|
client.meta.events.register(
|
||||||
|
"before-sign.bedrock-runtime.*", add_custom_header(extra_headers)
|
||||||
|
)
|
||||||
|
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
@ -1224,7 +1227,7 @@ def _embedding_func_single(
|
||||||
"input_type", "search_document"
|
"input_type", "search_document"
|
||||||
) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
|
) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
|
||||||
data = {"texts": [input], **inference_params} # type: ignore
|
data = {"texts": [input], **inference_params} # type: ignore
|
||||||
body = json.dumps(data).encode("utf-8")
|
body = json.dumps(data).encode("utf-8") # type: ignore
|
||||||
## LOGGING
|
## LOGGING
|
||||||
request_str = f"""
|
request_str = f"""
|
||||||
response = client.invoke_model(
|
response = client.invoke_model(
|
||||||
|
@ -1416,7 +1419,7 @@ def image_generation(
|
||||||
## LOGGING
|
## LOGGING
|
||||||
request_str = f"""
|
request_str = f"""
|
||||||
response = client.invoke_model(
|
response = client.invoke_model(
|
||||||
body={body},
|
body={body}, # type: ignore
|
||||||
modelId={modelId},
|
modelId={modelId},
|
||||||
accept="application/json",
|
accept="application/json",
|
||||||
contentType="application/json",
|
contentType="application/json",
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
import litellm
|
import litellm
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
from litellm.utils import ModelResponse, Usage
|
from litellm.utils import ModelResponse, Usage
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time, traceback
|
import time, traceback
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||||
import litellm
|
import litellm
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class CohereError(Exception):
|
class CohereError(Exception):
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time, traceback
|
import time, traceback
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||||
import litellm
|
import litellm
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
from .prompt_templates.factory import cohere_message_pt
|
from .prompt_templates.factory import cohere_message_pt
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time, traceback
|
import time, traceback
|
||||||
from typing import Callable, Optional, List
|
from typing import Callable, Optional, List
|
||||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
import litellm
|
import litellm
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
import requests, types, time
|
import requests, types, time # type: ignore
|
||||||
import json, uuid
|
import json, uuid
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
import litellm
|
import litellm
|
||||||
import httpx, aiohttp, asyncio
|
import httpx, aiohttp, asyncio # type: ignore
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
|
||||||
|
|
||||||
|
@ -220,7 +220,10 @@ def get_ollama_response(
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
"function": {
|
||||||
|
"name": function_call["name"],
|
||||||
|
"arguments": json.dumps(function_call["arguments"]),
|
||||||
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -232,7 +235,9 @@ def get_ollama_response(
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = "ollama/" + model
|
model_response["model"] = "ollama/" + model
|
||||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore
|
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore
|
||||||
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
|
completion_tokens = response_json.get(
|
||||||
|
"eval_count", len(response_json.get("message", dict()).get("content", ""))
|
||||||
|
)
|
||||||
model_response["usage"] = litellm.Usage(
|
model_response["usage"] = litellm.Usage(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
|
@ -273,7 +278,10 @@ def ollama_completion_stream(url, data, logging_obj):
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
"function": {
|
||||||
|
"name": function_call["name"],
|
||||||
|
"arguments": json.dumps(function_call["arguments"]),
|
||||||
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -316,7 +324,8 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
||||||
[
|
[
|
||||||
chunk.choices[0].delta.content
|
chunk.choices[0].delta.content
|
||||||
async for chunk in streamwrapper
|
async for chunk in streamwrapper
|
||||||
if chunk.choices[0].delta.content]
|
if chunk.choices[0].delta.content
|
||||||
|
]
|
||||||
)
|
)
|
||||||
function_call = json.loads(response_content)
|
function_call = json.loads(response_content)
|
||||||
delta = litellm.utils.Delta(
|
delta = litellm.utils.Delta(
|
||||||
|
@ -324,7 +333,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
"function": {
|
||||||
|
"name": function_call["name"],
|
||||||
|
"arguments": json.dumps(function_call["arguments"]),
|
||||||
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -373,7 +385,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
||||||
tool_calls=[
|
tool_calls=[
|
||||||
{
|
{
|
||||||
"id": f"call_{str(uuid.uuid4())}",
|
"id": f"call_{str(uuid.uuid4())}",
|
||||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
"function": {
|
||||||
|
"name": function_call["name"],
|
||||||
|
"arguments": json.dumps(function_call["arguments"]),
|
||||||
|
},
|
||||||
"type": "function",
|
"type": "function",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -387,7 +402,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
||||||
model_response["created"] = int(time.time())
|
model_response["created"] = int(time.time())
|
||||||
model_response["model"] = "ollama/" + data["model"]
|
model_response["model"] = "ollama/" + data["model"]
|
||||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore
|
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore
|
||||||
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
|
completion_tokens = response_json.get(
|
||||||
|
"eval_count",
|
||||||
|
len(response_json.get("message", dict()).get("content", "")),
|
||||||
|
)
|
||||||
model_response["usage"] = litellm.Usage(
|
model_response["usage"] = litellm.Usage(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
|
@ -475,6 +493,7 @@ async def ollama_aembeddings(
|
||||||
}
|
}
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
|
||||||
def ollama_embeddings(
|
def ollama_embeddings(
|
||||||
api_base: str,
|
api_base: str,
|
||||||
model: str,
|
model: str,
|
||||||
|
@ -492,5 +511,6 @@ def ollama_embeddings(
|
||||||
optional_params,
|
optional_params,
|
||||||
logging_obj,
|
logging_obj,
|
||||||
model_response,
|
model_response,
|
||||||
encoding)
|
encoding,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
from litellm.utils import ModelResponse, Usage
|
from litellm.utils import ModelResponse, Usage
|
||||||
|
|
|
@ -22,7 +22,6 @@ from litellm.utils import (
|
||||||
TextCompletionResponse,
|
TextCompletionResponse,
|
||||||
)
|
)
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
import aiohttp, requests
|
|
||||||
import litellm
|
import litellm
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
from openai import OpenAI, AsyncOpenAI
|
from openai import OpenAI, AsyncOpenAI
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
import litellm
|
import litellm
|
||||||
|
|
|
@ -487,7 +487,7 @@ def format_prompt_togetherai(messages, prompt_format, chat_template):
|
||||||
|
|
||||||
def ibm_granite_pt(messages: list):
|
def ibm_granite_pt(messages: list):
|
||||||
"""
|
"""
|
||||||
IBM's Granite models uses the template:
|
IBM's Granite chat models uses the template:
|
||||||
<|system|> {system_message} <|user|> {user_message} <|assistant|> {assistant_message}
|
<|system|> {system_message} <|user|> {user_message} <|assistant|> {assistant_message}
|
||||||
|
|
||||||
See: https://www.ibm.com/docs/en/watsonx-as-a-service?topic=solutions-supported-foundation-models
|
See: https://www.ibm.com/docs/en/watsonx-as-a-service?topic=solutions-supported-foundation-models
|
||||||
|
@ -503,12 +503,13 @@ def ibm_granite_pt(messages: list):
|
||||||
"pre_message": "<|user|>\n",
|
"pre_message": "<|user|>\n",
|
||||||
"post_message": "\n",
|
"post_message": "\n",
|
||||||
},
|
},
|
||||||
"assistant": {
|
'assistant': {
|
||||||
"pre_message": "<|assistant|>\n",
|
'pre_message': '<|assistant|>\n',
|
||||||
"post_message": "\n",
|
'post_message': '\n',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
).strip()
|
final_prompt_value='<|assistant|>\n',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
### ANTHROPIC ###
|
### ANTHROPIC ###
|
||||||
|
@ -981,7 +982,7 @@ def anthropic_messages_pt(messages: list):
|
||||||
# add role=tool support to allow function call result/error submission
|
# add role=tool support to allow function call result/error submission
|
||||||
user_message_types = {"user", "tool", "function"}
|
user_message_types = {"user", "tool", "function"}
|
||||||
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
|
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
|
||||||
new_messages = []
|
new_messages: list = []
|
||||||
msg_i = 0
|
msg_i = 0
|
||||||
tool_use_param = False
|
tool_use_param = False
|
||||||
while msg_i < len(messages):
|
while msg_i < len(messages):
|
||||||
|
@ -1524,24 +1525,9 @@ def prompt_factory(
|
||||||
return mistral_instruct_pt(messages=messages)
|
return mistral_instruct_pt(messages=messages)
|
||||||
elif "meta-llama/llama-3" in model and "instruct" in model:
|
elif "meta-llama/llama-3" in model and "instruct" in model:
|
||||||
# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
|
# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
|
||||||
return custom_prompt(
|
return hf_chat_template(
|
||||||
role_dict={
|
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
||||||
"system": {
|
|
||||||
"pre_message": "<|start_header_id|>system<|end_header_id|>\n",
|
|
||||||
"post_message": "<|eot_id|>",
|
|
||||||
},
|
|
||||||
"user": {
|
|
||||||
"pre_message": "<|start_header_id|>user<|end_header_id|>\n",
|
|
||||||
"post_message": "<|eot_id|>",
|
|
||||||
},
|
|
||||||
"assistant": {
|
|
||||||
"pre_message": "<|start_header_id|>assistant<|end_header_id|>\n",
|
|
||||||
"post_message": "<|eot_id|>",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
messages=messages,
|
messages=messages,
|
||||||
initial_prompt_value="<|begin_of_text|>",
|
|
||||||
final_prompt_value="<|start_header_id|>assistant<|end_header_id|>\n",
|
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
if "meta-llama/llama-2" in model and "chat" in model:
|
if "meta-llama/llama-2" in model and "chat" in model:
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
from litellm.utils import ModelResponse, Usage
|
from litellm.utils import ModelResponse, Usage
|
||||||
import litellm
|
import litellm
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
import os, types, traceback
|
import os, types, traceback
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional, Any
|
from typing import Callable, Optional, Any
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage
|
from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage
|
||||||
import sys
|
import sys
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
|
||||||
|
|
||||||
|
@ -295,7 +295,7 @@ def completion(
|
||||||
EndpointName={model},
|
EndpointName={model},
|
||||||
InferenceComponentName={model_id},
|
InferenceComponentName={model_id},
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
Body={data},
|
Body={data}, # type: ignore
|
||||||
CustomAttributes="accept_eula=true",
|
CustomAttributes="accept_eula=true",
|
||||||
)
|
)
|
||||||
""" # type: ignore
|
""" # type: ignore
|
||||||
|
@ -321,7 +321,7 @@ def completion(
|
||||||
response = client.invoke_endpoint(
|
response = client.invoke_endpoint(
|
||||||
EndpointName={model},
|
EndpointName={model},
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
Body={data},
|
Body={data}, # type: ignore
|
||||||
CustomAttributes="accept_eula=true",
|
CustomAttributes="accept_eula=true",
|
||||||
)
|
)
|
||||||
""" # type: ignore
|
""" # type: ignore
|
||||||
|
@ -688,7 +688,7 @@ def embedding(
|
||||||
response = client.invoke_endpoint(
|
response = client.invoke_endpoint(
|
||||||
EndpointName={model},
|
EndpointName={model},
|
||||||
ContentType="application/json",
|
ContentType="application/json",
|
||||||
Body={data},
|
Body={data}, # type: ignore
|
||||||
CustomAttributes="accept_eula=true",
|
CustomAttributes="accept_eula=true",
|
||||||
)""" # type: ignore
|
)""" # type: ignore
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
|
|
|
@ -6,11 +6,11 @@ Reference: https://docs.together.ai/docs/openai-api-compatibility
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
import litellm
|
import litellm
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
from litellm.utils import ModelResponse, Usage
|
from litellm.utils import ModelResponse, Usage
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional, Union, List
|
from typing import Callable, Optional, Union, List
|
||||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
||||||
import litellm, uuid
|
import litellm, uuid
|
||||||
import httpx, inspect
|
import httpx, inspect # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class VertexAIError(Exception):
|
class VertexAIError(Exception):
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
import os, types
|
import os, types
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests, copy
|
import requests, copy # type: ignore
|
||||||
import time, uuid
|
import time, uuid
|
||||||
from typing import Callable, Optional, List
|
from typing import Callable, Optional, List
|
||||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||||
|
@ -17,7 +17,7 @@ from .prompt_templates.factory import (
|
||||||
extract_between_tags,
|
extract_between_tags,
|
||||||
parse_xml_params,
|
parse_xml_params,
|
||||||
)
|
)
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
|
|
||||||
|
|
||||||
class VertexAIError(Exception):
|
class VertexAIError(Exception):
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import time, httpx
|
import time, httpx # type: ignore
|
||||||
from typing import Callable, Any
|
from typing import Callable, Any
|
||||||
from litellm.utils import ModelResponse, Usage
|
from litellm.utils import ModelResponse, Usage
|
||||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
import json, types, time # noqa: E401
|
import json, types, time # noqa: E401
|
||||||
from contextlib import contextmanager
|
from contextlib import asynccontextmanager, contextmanager
|
||||||
from typing import Callable, Dict, Optional, Any, Union, List
|
from typing import AsyncGenerator, Callable, Dict, Generator, Optional, Any, Union, List
|
||||||
|
|
||||||
import httpx
|
import httpx # type: ignore
|
||||||
import requests
|
import requests # type: ignore
|
||||||
import litellm
|
import litellm
|
||||||
from litellm.utils import ModelResponse, get_secret, Usage
|
from litellm.utils import Logging, ModelResponse, Usage, get_secret
|
||||||
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||||
|
|
||||||
from .base import BaseLLM
|
from .base import BaseLLM
|
||||||
from .prompt_templates import factory as ptf
|
from .prompt_templates import factory as ptf
|
||||||
|
@ -192,7 +193,7 @@ class WatsonXAIEndpoint(str, Enum):
|
||||||
|
|
||||||
class IBMWatsonXAI(BaseLLM):
|
class IBMWatsonXAI(BaseLLM):
|
||||||
"""
|
"""
|
||||||
Class to interface with IBM Watsonx.ai API for text generation and embeddings.
|
Class to interface with IBM watsonx.ai API for text generation and embeddings.
|
||||||
|
|
||||||
Reference: https://cloud.ibm.com/apidocs/watsonx-ai
|
Reference: https://cloud.ibm.com/apidocs/watsonx-ai
|
||||||
"""
|
"""
|
||||||
|
@ -343,7 +344,7 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
)
|
)
|
||||||
if token is None and api_key is not None:
|
if token is None and api_key is not None:
|
||||||
# generate the auth token
|
# generate the auth token
|
||||||
if print_verbose:
|
if print_verbose is not None:
|
||||||
print_verbose("Generating IAM token for Watsonx.ai")
|
print_verbose("Generating IAM token for Watsonx.ai")
|
||||||
token = self.generate_iam_token(api_key)
|
token = self.generate_iam_token(api_key)
|
||||||
elif token is None and api_key is None:
|
elif token is None and api_key is None:
|
||||||
|
@ -377,8 +378,9 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
model_response: ModelResponse,
|
model_response: ModelResponse,
|
||||||
print_verbose: Callable,
|
print_verbose: Callable,
|
||||||
encoding,
|
encoding,
|
||||||
logging_obj,
|
logging_obj: Logging,
|
||||||
optional_params: dict,
|
optional_params: Optional[dict] = None,
|
||||||
|
acompletion: bool = None,
|
||||||
litellm_params: Optional[dict] = None,
|
litellm_params: Optional[dict] = None,
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
timeout: Optional[float] = None,
|
timeout: Optional[float] = None,
|
||||||
|
@ -402,12 +404,14 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
model, messages, provider, custom_prompt_dict
|
model, messages, provider, custom_prompt_dict
|
||||||
)
|
)
|
||||||
|
|
||||||
def process_text_request(request_params: dict) -> ModelResponse:
|
manage_response = self._make_response_manager(async_=(acompletion is True), logging_obj=logging_obj)
|
||||||
with self._manage_response(
|
|
||||||
request_params, logging_obj=logging_obj, input=prompt, timeout=timeout
|
|
||||||
) as resp:
|
|
||||||
json_resp = resp.json()
|
|
||||||
|
|
||||||
|
def process_text_gen_response(json_resp: dict) -> ModelResponse:
|
||||||
|
if "results" not in json_resp:
|
||||||
|
raise WatsonXAIError(
|
||||||
|
status_code=500,
|
||||||
|
message=f"Error: Invalid response from Watsonx.ai API: {json_resp}",
|
||||||
|
)
|
||||||
generated_text = json_resp["results"][0]["generated_text"]
|
generated_text = json_resp["results"][0]["generated_text"]
|
||||||
prompt_tokens = json_resp["results"][0]["input_token_count"]
|
prompt_tokens = json_resp["results"][0]["input_token_count"]
|
||||||
completion_tokens = json_resp["results"][0]["generated_token_count"]
|
completion_tokens = json_resp["results"][0]["generated_token_count"]
|
||||||
|
@ -426,25 +430,52 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
)
|
)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
def process_stream_request(
|
def handle_text_request(request_params: dict) -> ModelResponse:
|
||||||
|
with manage_response(
|
||||||
|
request_params, input=prompt, timeout=timeout,
|
||||||
|
) as resp:
|
||||||
|
json_resp = resp.json()
|
||||||
|
|
||||||
|
return process_text_gen_response(json_resp)
|
||||||
|
|
||||||
|
async def handle_text_request_async(request_params: dict) -> ModelResponse:
|
||||||
|
async with manage_response(
|
||||||
|
request_params, input=prompt, timeout=timeout,
|
||||||
|
) as resp:
|
||||||
|
json_resp = resp.json()
|
||||||
|
return process_text_gen_response(json_resp)
|
||||||
|
|
||||||
|
def handle_stream_request(
|
||||||
request_params: dict,
|
request_params: dict,
|
||||||
) -> litellm.CustomStreamWrapper:
|
) -> litellm.CustomStreamWrapper:
|
||||||
# stream the response - generated chunks will be handled
|
# stream the response - generated chunks will be handled
|
||||||
# by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
|
# by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
|
||||||
with self._manage_response(
|
with manage_response(
|
||||||
request_params,
|
request_params, stream=True, input=prompt, timeout=timeout,
|
||||||
logging_obj=logging_obj,
|
|
||||||
stream=True,
|
|
||||||
input=prompt,
|
|
||||||
timeout=timeout,
|
|
||||||
) as resp:
|
) as resp:
|
||||||
response = litellm.CustomStreamWrapper(
|
streamwrapper = litellm.CustomStreamWrapper(
|
||||||
resp.iter_lines(),
|
resp.iter_lines(),
|
||||||
model=model,
|
model=model,
|
||||||
custom_llm_provider="watsonx",
|
custom_llm_provider="watsonx",
|
||||||
logging_obj=logging_obj,
|
logging_obj=logging_obj,
|
||||||
)
|
)
|
||||||
return response
|
return streamwrapper
|
||||||
|
|
||||||
|
async def handle_stream_request_async(
|
||||||
|
request_params: dict,
|
||||||
|
) -> litellm.CustomStreamWrapper:
|
||||||
|
# stream the response - generated chunks will be handled
|
||||||
|
# by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
|
||||||
|
async with manage_response(
|
||||||
|
request_params, stream=True, input=prompt, timeout=timeout,
|
||||||
|
) as resp:
|
||||||
|
streamwrapper = litellm.CustomStreamWrapper(
|
||||||
|
resp.aiter_lines(),
|
||||||
|
model=model,
|
||||||
|
custom_llm_provider="watsonx",
|
||||||
|
logging_obj=logging_obj,
|
||||||
|
)
|
||||||
|
return streamwrapper
|
||||||
|
|
||||||
try:
|
try:
|
||||||
## Get the response from the model
|
## Get the response from the model
|
||||||
|
@ -455,10 +486,18 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
print_verbose=print_verbose,
|
print_verbose=print_verbose,
|
||||||
)
|
)
|
||||||
if stream:
|
if stream and acompletion:
|
||||||
return process_stream_request(req_params)
|
# stream and async text generation
|
||||||
|
return handle_stream_request_async(req_params)
|
||||||
|
elif stream:
|
||||||
|
# streaming text generation
|
||||||
|
return handle_stream_request(req_params)
|
||||||
|
elif acompletion:
|
||||||
|
# async text generation
|
||||||
|
return handle_text_request_async(req_params)
|
||||||
else:
|
else:
|
||||||
return process_text_request(req_params)
|
# regular text generation
|
||||||
|
return handle_text_request(req_params)
|
||||||
except WatsonXAIError as e:
|
except WatsonXAIError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -473,6 +512,7 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
model_response=None,
|
model_response=None,
|
||||||
optional_params=None,
|
optional_params=None,
|
||||||
encoding=None,
|
encoding=None,
|
||||||
|
aembedding=None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Send a text embedding request to the IBM Watsonx.ai API.
|
Send a text embedding request to the IBM Watsonx.ai API.
|
||||||
|
@ -507,9 +547,6 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
}
|
}
|
||||||
request_params = dict(version=api_params["api_version"])
|
request_params = dict(version=api_params["api_version"])
|
||||||
url = api_params["url"].rstrip("/") + WatsonXAIEndpoint.EMBEDDINGS
|
url = api_params["url"].rstrip("/") + WatsonXAIEndpoint.EMBEDDINGS
|
||||||
# request = httpx.Request(
|
|
||||||
# "POST", url, headers=headers, json=payload, params=request_params
|
|
||||||
# )
|
|
||||||
req_params = {
|
req_params = {
|
||||||
"method": "POST",
|
"method": "POST",
|
||||||
"url": url,
|
"url": url,
|
||||||
|
@ -517,11 +554,9 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
"json": payload,
|
"json": payload,
|
||||||
"params": request_params,
|
"params": request_params,
|
||||||
}
|
}
|
||||||
with self._manage_response(
|
manage_response = self._make_response_manager(async_=(aembedding is True), logging_obj=logging_obj)
|
||||||
req_params, logging_obj=logging_obj, input=input
|
|
||||||
) as resp:
|
|
||||||
json_resp = resp.json()
|
|
||||||
|
|
||||||
|
def process_embedding_response(json_resp: dict) -> ModelResponse:
|
||||||
results = json_resp.get("results", [])
|
results = json_resp.get("results", [])
|
||||||
embedding_response = []
|
embedding_response = []
|
||||||
for idx, result in enumerate(results):
|
for idx, result in enumerate(results):
|
||||||
|
@ -537,6 +572,30 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
)
|
)
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
|
def handle_embedding_request(request_params: dict) -> ModelResponse:
|
||||||
|
with manage_response(
|
||||||
|
request_params, input=input
|
||||||
|
) as resp:
|
||||||
|
json_resp = resp.json()
|
||||||
|
return process_embedding_response(json_resp)
|
||||||
|
|
||||||
|
async def handle_embedding_request_async(request_params: dict) -> ModelResponse:
|
||||||
|
async with manage_response(
|
||||||
|
request_params, input=input
|
||||||
|
) as resp:
|
||||||
|
json_resp = resp.json()
|
||||||
|
return process_embedding_response(json_resp)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if aembedding:
|
||||||
|
return handle_embedding_request_async(req_params)
|
||||||
|
else:
|
||||||
|
return handle_embedding_request(req_params)
|
||||||
|
except WatsonXAIError as e:
|
||||||
|
raise e
|
||||||
|
except Exception as e:
|
||||||
|
raise WatsonXAIError(status_code=500, message=str(e))
|
||||||
|
|
||||||
def generate_iam_token(self, api_key=None, **params):
|
def generate_iam_token(self, api_key=None, **params):
|
||||||
headers = {}
|
headers = {}
|
||||||
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
headers["Content-Type"] = "application/x-www-form-urlencoded"
|
||||||
|
@ -558,17 +617,33 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
self.token = iam_access_token
|
self.token = iam_access_token
|
||||||
return iam_access_token
|
return iam_access_token
|
||||||
|
|
||||||
@contextmanager
|
def _make_response_manager(
|
||||||
def _manage_response(
|
|
||||||
self,
|
self,
|
||||||
|
async_: bool,
|
||||||
|
logging_obj: Logging
|
||||||
|
) -> Callable[..., Generator[Union[requests.Response, httpx.Response], None, None]]:
|
||||||
|
"""
|
||||||
|
Returns a context manager that manages the response from the request.
|
||||||
|
if async_ is True, returns an async context manager, otherwise returns a regular context manager.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
```python
|
||||||
|
manage_response = self._make_response_manager(async_=True, logging_obj=logging_obj)
|
||||||
|
async with manage_response(request_params) as resp:
|
||||||
|
...
|
||||||
|
# or
|
||||||
|
manage_response = self._make_response_manager(async_=False, logging_obj=logging_obj)
|
||||||
|
with manage_response(request_params) as resp:
|
||||||
|
...
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
def pre_call(
|
||||||
request_params: dict,
|
request_params: dict,
|
||||||
logging_obj: Any,
|
input:Optional[Any]=None,
|
||||||
stream: bool = False,
|
|
||||||
input: Optional[Any] = None,
|
|
||||||
timeout: Optional[float] = None,
|
|
||||||
):
|
):
|
||||||
request_str = (
|
request_str = (
|
||||||
f"response = {request_params['method']}(\n"
|
f"response = {'await ' if async_ else ''}{request_params['method']}(\n"
|
||||||
f"\turl={request_params['url']},\n"
|
f"\turl={request_params['url']},\n"
|
||||||
f"\tjson={request_params['json']},\n"
|
f"\tjson={request_params['json']},\n"
|
||||||
f")"
|
f")"
|
||||||
|
@ -581,29 +656,76 @@ class IBMWatsonXAI(BaseLLM):
|
||||||
"request_str": request_str,
|
"request_str": request_str,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
if timeout:
|
|
||||||
request_params["timeout"] = timeout
|
def post_call(resp, request_params):
|
||||||
try:
|
|
||||||
if stream:
|
|
||||||
resp = requests.request(
|
|
||||||
**request_params,
|
|
||||||
stream=True,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
yield resp
|
|
||||||
else:
|
|
||||||
resp = requests.request(**request_params)
|
|
||||||
resp.raise_for_status()
|
|
||||||
yield resp
|
|
||||||
except Exception as e:
|
|
||||||
raise WatsonXAIError(status_code=500, message=str(e))
|
|
||||||
if not stream:
|
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=input,
|
input=input,
|
||||||
api_key=request_params["headers"].get("Authorization"),
|
api_key=request_params["headers"].get("Authorization"),
|
||||||
original_response=json.dumps(resp.json()),
|
original_response=json.dumps(resp.json()),
|
||||||
additional_args={
|
additional_args={
|
||||||
"status_code": resp.status_code,
|
"status_code": resp.status_code,
|
||||||
"complete_input_dict": request_params["json"],
|
"complete_input_dict": request_params.get("data", request_params.get("json")),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _manage_response(
|
||||||
|
request_params: dict,
|
||||||
|
stream: bool = False,
|
||||||
|
input: Optional[Any] = None,
|
||||||
|
timeout: float = None,
|
||||||
|
) -> Generator[requests.Response, None, None]:
|
||||||
|
"""
|
||||||
|
Returns a context manager that yields the response from the request.
|
||||||
|
"""
|
||||||
|
pre_call(request_params, input)
|
||||||
|
if timeout:
|
||||||
|
request_params["timeout"] = timeout
|
||||||
|
if stream:
|
||||||
|
request_params["stream"] = stream
|
||||||
|
try:
|
||||||
|
resp = requests.request(**request_params)
|
||||||
|
resp.raise_for_status()
|
||||||
|
yield resp
|
||||||
|
except Exception as e:
|
||||||
|
raise WatsonXAIError(status_code=500, message=str(e))
|
||||||
|
if not stream:
|
||||||
|
post_call(resp, request_params)
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def _manage_response_async(
|
||||||
|
request_params: dict,
|
||||||
|
stream: bool = False,
|
||||||
|
input: Optional[Any] = None,
|
||||||
|
timeout: float = None,
|
||||||
|
) -> AsyncGenerator[httpx.Response, None]:
|
||||||
|
pre_call(request_params, input)
|
||||||
|
if timeout:
|
||||||
|
request_params["timeout"] = timeout
|
||||||
|
if stream:
|
||||||
|
request_params["stream"] = stream
|
||||||
|
try:
|
||||||
|
# async with AsyncHTTPHandler(timeout=timeout) as client:
|
||||||
|
self.async_handler = AsyncHTTPHandler(
|
||||||
|
timeout=httpx.Timeout(timeout=request_params.pop("timeout", 600.0), connect=5.0),
|
||||||
|
)
|
||||||
|
# async_handler.client.verify = False
|
||||||
|
if "json" in request_params:
|
||||||
|
request_params['data'] = json.dumps(request_params.pop("json", {}))
|
||||||
|
method = request_params.pop("method")
|
||||||
|
if method.upper() == "POST":
|
||||||
|
resp = await self.async_handler.post(**request_params)
|
||||||
|
else:
|
||||||
|
resp = await self.async_handler.get(**request_params)
|
||||||
|
yield resp
|
||||||
|
# await async_handler.close()
|
||||||
|
except Exception as e:
|
||||||
|
raise WatsonXAIError(status_code=500, message=str(e))
|
||||||
|
if not stream:
|
||||||
|
post_call(resp, request_params)
|
||||||
|
|
||||||
|
if async_:
|
||||||
|
return _manage_response_async
|
||||||
|
else:
|
||||||
|
return _manage_response
|
||||||
|
|
|
@ -73,6 +73,7 @@ from .llms.azure_text import AzureTextCompletion
|
||||||
from .llms.anthropic import AnthropicChatCompletion
|
from .llms.anthropic import AnthropicChatCompletion
|
||||||
from .llms.anthropic_text import AnthropicTextCompletion
|
from .llms.anthropic_text import AnthropicTextCompletion
|
||||||
from .llms.huggingface_restapi import Huggingface
|
from .llms.huggingface_restapi import Huggingface
|
||||||
|
from .llms.watsonx import IBMWatsonXAI
|
||||||
from .llms.prompt_templates.factory import (
|
from .llms.prompt_templates.factory import (
|
||||||
prompt_factory,
|
prompt_factory,
|
||||||
custom_prompt,
|
custom_prompt,
|
||||||
|
@ -109,6 +110,7 @@ anthropic_text_completions = AnthropicTextCompletion()
|
||||||
azure_chat_completions = AzureChatCompletion()
|
azure_chat_completions = AzureChatCompletion()
|
||||||
azure_text_completions = AzureTextCompletion()
|
azure_text_completions = AzureTextCompletion()
|
||||||
huggingface = Huggingface()
|
huggingface = Huggingface()
|
||||||
|
watsonxai = IBMWatsonXAI()
|
||||||
####### COMPLETION ENDPOINTS ################
|
####### COMPLETION ENDPOINTS ################
|
||||||
|
|
||||||
|
|
||||||
|
@ -313,6 +315,7 @@ async def acompletion(
|
||||||
or custom_llm_provider == "gemini"
|
or custom_llm_provider == "gemini"
|
||||||
or custom_llm_provider == "sagemaker"
|
or custom_llm_provider == "sagemaker"
|
||||||
or custom_llm_provider == "anthropic"
|
or custom_llm_provider == "anthropic"
|
||||||
|
or custom_llm_provider == "watsonx"
|
||||||
or custom_llm_provider in litellm.openai_compatible_providers
|
or custom_llm_provider in litellm.openai_compatible_providers
|
||||||
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
||||||
init_response = await loop.run_in_executor(None, func_with_context)
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
@ -1908,7 +1911,7 @@ def completion(
|
||||||
response = response
|
response = response
|
||||||
elif custom_llm_provider == "watsonx":
|
elif custom_llm_provider == "watsonx":
|
||||||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||||||
response = watsonx.IBMWatsonXAI().completion(
|
response = watsonxai.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
custom_prompt_dict=custom_prompt_dict,
|
custom_prompt_dict=custom_prompt_dict,
|
||||||
|
@ -1919,7 +1922,8 @@ def completion(
|
||||||
logger_fn=logger_fn,
|
logger_fn=logger_fn,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
logging_obj=logging,
|
logging_obj=logging,
|
||||||
timeout=timeout, # type: ignore
|
acompletion=acompletion,
|
||||||
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
"stream" in optional_params
|
"stream" in optional_params
|
||||||
|
@ -2572,6 +2576,7 @@ async def aembedding(*args, **kwargs):
|
||||||
or custom_llm_provider == "fireworks_ai"
|
or custom_llm_provider == "fireworks_ai"
|
||||||
or custom_llm_provider == "ollama"
|
or custom_llm_provider == "ollama"
|
||||||
or custom_llm_provider == "vertex_ai"
|
or custom_llm_provider == "vertex_ai"
|
||||||
|
or custom_llm_provider == "watsonx"
|
||||||
): # currently implemented aiohttp calls for just azure and openai, soon all.
|
): # currently implemented aiohttp calls for just azure and openai, soon all.
|
||||||
# Await normally
|
# Await normally
|
||||||
init_response = await loop.run_in_executor(None, func_with_context)
|
init_response = await loop.run_in_executor(None, func_with_context)
|
||||||
|
@ -3029,13 +3034,14 @@ def embedding(
|
||||||
aembedding=aembedding,
|
aembedding=aembedding,
|
||||||
)
|
)
|
||||||
elif custom_llm_provider == "watsonx":
|
elif custom_llm_provider == "watsonx":
|
||||||
response = watsonx.IBMWatsonXAI().embedding(
|
response = watsonxai.embedding(
|
||||||
model=model,
|
model=model,
|
||||||
input=input,
|
input=input,
|
||||||
encoding=encoding,
|
encoding=encoding,
|
||||||
logging_obj=logging,
|
logging_obj=logging,
|
||||||
optional_params=optional_params,
|
optional_params=optional_params,
|
||||||
model_response=EmbeddingResponse(),
|
model_response=EmbeddingResponse(),
|
||||||
|
aembedding=aembedding,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
args = locals()
|
args = locals()
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
|
|
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
||||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/7de0c97d470f519f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""]
|
3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -515,6 +515,8 @@ class NewTeamRequest(TeamBase):
|
||||||
|
|
||||||
class GlobalEndUsersSpend(LiteLLMBase):
|
class GlobalEndUsersSpend(LiteLLMBase):
|
||||||
api_key: Optional[str] = None
|
api_key: Optional[str] = None
|
||||||
|
startTime: Optional[datetime] = None
|
||||||
|
endTime: Optional[datetime] = None
|
||||||
|
|
||||||
|
|
||||||
class TeamMemberAddRequest(LiteLLMBase):
|
class TeamMemberAddRequest(LiteLLMBase):
|
||||||
|
|
|
@ -252,7 +252,7 @@ def run_server(
|
||||||
if model and "ollama" in model and api_base is None:
|
if model and "ollama" in model and api_base is None:
|
||||||
run_ollama_serve()
|
run_ollama_serve()
|
||||||
if test_async is True:
|
if test_async is True:
|
||||||
import requests, concurrent, time
|
import requests, concurrent, time # type: ignore
|
||||||
|
|
||||||
api_base = f"http://{host}:{port}"
|
api_base = f"http://{host}:{port}"
|
||||||
|
|
||||||
|
@ -418,7 +418,7 @@ def run_server(
|
||||||
read from there and save it to os.env['DATABASE_URL']
|
read from there and save it to os.env['DATABASE_URL']
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import yaml, asyncio
|
import yaml, asyncio # type: ignore
|
||||||
except:
|
except:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"yaml needs to be imported. Run - `pip install 'litellm[proxy]'`"
|
"yaml needs to be imported. Run - `pip install 'litellm[proxy]'`"
|
||||||
|
|
|
@ -30,7 +30,7 @@ sys.path.insert(
|
||||||
try:
|
try:
|
||||||
import fastapi
|
import fastapi
|
||||||
import backoff
|
import backoff
|
||||||
import yaml
|
import yaml # type: ignore
|
||||||
import orjson
|
import orjson
|
||||||
import logging
|
import logging
|
||||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
|
@ -3731,6 +3731,7 @@ async def chat_completion(
|
||||||
"x-litellm-model-id": model_id,
|
"x-litellm-model-id": model_id,
|
||||||
"x-litellm-cache-key": cache_key,
|
"x-litellm-cache-key": cache_key,
|
||||||
"x-litellm-model-api-base": api_base,
|
"x-litellm-model-api-base": api_base,
|
||||||
|
"x-litellm-version": version,
|
||||||
}
|
}
|
||||||
selected_data_generator = select_data_generator(
|
selected_data_generator = select_data_generator(
|
||||||
response=response,
|
response=response,
|
||||||
|
@ -3746,6 +3747,7 @@ async def chat_completion(
|
||||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||||
|
fastapi_response.headers["x-litellm-version"] = version
|
||||||
|
|
||||||
### CALL HOOKS ### - modify outgoing data
|
### CALL HOOKS ### - modify outgoing data
|
||||||
response = await proxy_logging_obj.post_call_success_hook(
|
response = await proxy_logging_obj.post_call_success_hook(
|
||||||
|
@ -3902,14 +3904,10 @@ async def completion(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if hasattr(response, "_hidden_params"):
|
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||||
model_id = response._hidden_params.get("model_id", None) or ""
|
model_id = hidden_params.get("model_id", None) or ""
|
||||||
original_response = (
|
cache_key = hidden_params.get("cache_key", None) or ""
|
||||||
response._hidden_params.get("original_response", None) or ""
|
api_base = hidden_params.get("api_base", None) or ""
|
||||||
)
|
|
||||||
else:
|
|
||||||
model_id = ""
|
|
||||||
original_response = ""
|
|
||||||
|
|
||||||
verbose_proxy_logger.debug("final response: %s", response)
|
verbose_proxy_logger.debug("final response: %s", response)
|
||||||
if (
|
if (
|
||||||
|
@ -3917,6 +3915,9 @@ async def completion(
|
||||||
): # use generate_responses to stream responses
|
): # use generate_responses to stream responses
|
||||||
custom_headers = {
|
custom_headers = {
|
||||||
"x-litellm-model-id": model_id,
|
"x-litellm-model-id": model_id,
|
||||||
|
"x-litellm-cache-key": cache_key,
|
||||||
|
"x-litellm-model-api-base": api_base,
|
||||||
|
"x-litellm-version": version,
|
||||||
}
|
}
|
||||||
selected_data_generator = select_data_generator(
|
selected_data_generator = select_data_generator(
|
||||||
response=response,
|
response=response,
|
||||||
|
@ -3931,6 +3932,10 @@ async def completion(
|
||||||
)
|
)
|
||||||
|
|
||||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
|
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||||
|
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||||
|
fastapi_response.headers["x-litellm-version"] = version
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
data["litellm_status"] = "fail" # used for alerting
|
data["litellm_status"] = "fail" # used for alerting
|
||||||
|
@ -3970,6 +3975,7 @@ async def completion(
|
||||||
) # azure compatible endpoint
|
) # azure compatible endpoint
|
||||||
async def embeddings(
|
async def embeddings(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
fastapi_response: Response,
|
||||||
model: Optional[str] = None,
|
model: Optional[str] = None,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
|
@ -4116,6 +4122,17 @@ async def embeddings(
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
|
|
||||||
|
### RESPONSE HEADERS ###
|
||||||
|
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||||
|
model_id = hidden_params.get("model_id", None) or ""
|
||||||
|
cache_key = hidden_params.get("cache_key", None) or ""
|
||||||
|
api_base = hidden_params.get("api_base", None) or ""
|
||||||
|
|
||||||
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
|
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||||
|
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||||
|
fastapi_response.headers["x-litellm-version"] = version
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
data["litellm_status"] = "fail" # used for alerting
|
data["litellm_status"] = "fail" # used for alerting
|
||||||
|
@ -4154,6 +4171,7 @@ async def embeddings(
|
||||||
)
|
)
|
||||||
async def image_generation(
|
async def image_generation(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
fastapi_response: Response,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
global proxy_logging_obj
|
global proxy_logging_obj
|
||||||
|
@ -4273,6 +4291,17 @@ async def image_generation(
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
|
|
||||||
|
### RESPONSE HEADERS ###
|
||||||
|
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||||
|
model_id = hidden_params.get("model_id", None) or ""
|
||||||
|
cache_key = hidden_params.get("cache_key", None) or ""
|
||||||
|
api_base = hidden_params.get("api_base", None) or ""
|
||||||
|
|
||||||
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
|
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||||
|
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||||
|
fastapi_response.headers["x-litellm-version"] = version
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
data["litellm_status"] = "fail" # used for alerting
|
data["litellm_status"] = "fail" # used for alerting
|
||||||
|
@ -4309,6 +4338,7 @@ async def image_generation(
|
||||||
)
|
)
|
||||||
async def audio_transcriptions(
|
async def audio_transcriptions(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
fastapi_response: Response,
|
||||||
file: UploadFile = File(...),
|
file: UploadFile = File(...),
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
|
@ -4453,6 +4483,18 @@ async def audio_transcriptions(
|
||||||
|
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
|
|
||||||
|
### RESPONSE HEADERS ###
|
||||||
|
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||||
|
model_id = hidden_params.get("model_id", None) or ""
|
||||||
|
cache_key = hidden_params.get("cache_key", None) or ""
|
||||||
|
api_base = hidden_params.get("api_base", None) or ""
|
||||||
|
|
||||||
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
|
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||||
|
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||||
|
fastapi_response.headers["x-litellm-version"] = version
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
data["litellm_status"] = "fail" # used for alerting
|
data["litellm_status"] = "fail" # used for alerting
|
||||||
|
@ -4492,6 +4534,7 @@ async def audio_transcriptions(
|
||||||
)
|
)
|
||||||
async def moderations(
|
async def moderations(
|
||||||
request: Request,
|
request: Request,
|
||||||
|
fastapi_response: Response,
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -4616,6 +4659,17 @@ async def moderations(
|
||||||
### ALERTING ###
|
### ALERTING ###
|
||||||
data["litellm_status"] = "success" # used for alerting
|
data["litellm_status"] = "success" # used for alerting
|
||||||
|
|
||||||
|
### RESPONSE HEADERS ###
|
||||||
|
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||||
|
model_id = hidden_params.get("model_id", None) or ""
|
||||||
|
cache_key = hidden_params.get("cache_key", None) or ""
|
||||||
|
api_base = hidden_params.get("api_base", None) or ""
|
||||||
|
|
||||||
|
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||||
|
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||||
|
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||||
|
fastapi_response.headers["x-litellm-version"] = version
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
data["litellm_status"] = "fail" # used for alerting
|
data["litellm_status"] = "fail" # used for alerting
|
||||||
|
@ -5821,35 +5875,38 @@ async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
|
||||||
if prisma_client is None:
|
if prisma_client is None:
|
||||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||||
|
|
||||||
if data is None:
|
|
||||||
sql_query = f"""SELECT * FROM "Last30dTopEndUsersSpend";"""
|
|
||||||
|
|
||||||
response = await prisma_client.db.query_raw(query=sql_query)
|
|
||||||
else:
|
|
||||||
"""
|
"""
|
||||||
Gets the top 100 end-users for a given api key
|
Gets the top 100 end-users for a given api key
|
||||||
"""
|
"""
|
||||||
current_date = datetime.now()
|
startTime = None
|
||||||
past_date = current_date - timedelta(days=30)
|
endTime = None
|
||||||
response = await prisma_client.db.litellm_spendlogs.group_by( # type: ignore
|
selected_api_key = None
|
||||||
by=["end_user"],
|
if data is not None:
|
||||||
where={
|
startTime = data.startTime
|
||||||
"AND": [{"startTime": {"gte": past_date}}, {"api_key": data.api_key}] # type: ignore
|
endTime = data.endTime
|
||||||
},
|
selected_api_key = data.api_key
|
||||||
sum={"spend": True},
|
|
||||||
order={"_sum": {"spend": "desc"}}, # type: ignore
|
startTime = startTime or datetime.now() - timedelta(days=30)
|
||||||
take=100,
|
endTime = endTime or datetime.now()
|
||||||
count=True,
|
|
||||||
|
sql_query = """
|
||||||
|
SELECT end_user, COUNT(*) AS total_count, SUM(spend) AS total_spend
|
||||||
|
FROM "LiteLLM_SpendLogs"
|
||||||
|
WHERE "startTime" >= $1::timestamp
|
||||||
|
AND "startTime" < $2::timestamp
|
||||||
|
AND (
|
||||||
|
CASE
|
||||||
|
WHEN $3::TEXT IS NULL THEN TRUE
|
||||||
|
ELSE api_key = $3
|
||||||
|
END
|
||||||
|
)
|
||||||
|
GROUP BY end_user
|
||||||
|
ORDER BY total_spend DESC
|
||||||
|
LIMIT 100
|
||||||
|
"""
|
||||||
|
response = await prisma_client.db.query_raw(
|
||||||
|
sql_query, startTime, endTime, selected_api_key
|
||||||
)
|
)
|
||||||
if response is not None and isinstance(response, list):
|
|
||||||
new_response = []
|
|
||||||
for r in response:
|
|
||||||
new_r = r
|
|
||||||
new_r["total_spend"] = r["_sum"]["spend"]
|
|
||||||
new_r["total_count"] = r["_count"]["_all"]
|
|
||||||
new_r.pop("_sum")
|
|
||||||
new_r.pop("_count")
|
|
||||||
new_response.append(new_r)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
|
@ -1689,12 +1689,12 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
|
||||||
module_file_path = os.path.join(directory, *module_name.split("."))
|
module_file_path = os.path.join(directory, *module_name.split("."))
|
||||||
module_file_path += ".py"
|
module_file_path += ".py"
|
||||||
|
|
||||||
spec = importlib.util.spec_from_file_location(module_name, module_file_path)
|
spec = importlib.util.spec_from_file_location(module_name, module_file_path) # type: ignore
|
||||||
if spec is None:
|
if spec is None:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
f"Could not find a module specification for {module_file_path}"
|
f"Could not find a module specification for {module_file_path}"
|
||||||
)
|
)
|
||||||
module = importlib.util.module_from_spec(spec)
|
module = importlib.util.module_from_spec(spec) # type: ignore
|
||||||
spec.loader.exec_module(module) # type: ignore
|
spec.loader.exec_module(module) # type: ignore
|
||||||
else:
|
else:
|
||||||
# Dynamically import the module
|
# Dynamically import the module
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
# - use litellm.success + failure callbacks to log when a request completed
|
# - use litellm.success + failure callbacks to log when a request completed
|
||||||
# - in get_available_deployment, for a given model group name -> pick based on traffic
|
# - in get_available_deployment, for a given model group name -> pick based on traffic
|
||||||
|
|
||||||
import dotenv, os, requests, random
|
import dotenv, os, requests, random # type: ignore
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# picks based on response time (for streaming, this is time to first token)
|
# picks based on response time (for streaming, this is time to first token)
|
||||||
from pydantic import BaseModel, Extra, Field, root_validator
|
from pydantic import BaseModel, Extra, Field, root_validator
|
||||||
import dotenv, os, requests, random
|
import dotenv, os, requests, random # type: ignore
|
||||||
from typing import Optional, Union, List, Dict
|
from typing import Optional, Union, List, Dict
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import random
|
import random
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#### What this does ####
|
#### What this does ####
|
||||||
# picks based on response time (for streaming, this is time to first token)
|
# picks based on response time (for streaming, this is time to first token)
|
||||||
from pydantic import BaseModel, Extra, Field, root_validator
|
from pydantic import BaseModel, Extra, Field, root_validator # type: ignore
|
||||||
import dotenv, os, requests, random
|
import dotenv, os, requests, random # type: ignore
|
||||||
from typing import Optional, Union, List, Dict
|
from typing import Optional, Union, List, Dict
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import random
|
import random
|
||||||
|
|
|
@ -231,14 +231,17 @@ def test_cost_bedrock_pricing():
|
||||||
assert cost == predicted_cost
|
assert cost == predicted_cost
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip(reason="AWS disabled our access")
|
|
||||||
def test_cost_bedrock_pricing_actual_calls():
|
def test_cost_bedrock_pricing_actual_calls():
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
model = "anthropic.claude-instant-v1"
|
model = "anthropic.claude-instant-v1"
|
||||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
response = litellm.completion(model=model, messages=messages)
|
response = litellm.completion(
|
||||||
assert response._hidden_params["region_name"] is not None
|
model=model, messages=messages, mock_response="hello cool one"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("response", response)
|
||||||
cost = litellm.completion_cost(
|
cost = litellm.completion_cost(
|
||||||
|
model="bedrock/anthropic.claude-instant-v1",
|
||||||
completion_response=response,
|
completion_response=response,
|
||||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
)
|
)
|
||||||
|
|
|
@ -14,7 +14,7 @@ import subprocess, os
|
||||||
from os.path import abspath, join, dirname
|
from os.path import abspath, join, dirname
|
||||||
import litellm, openai
|
import litellm, openai
|
||||||
import itertools
|
import itertools
|
||||||
import random, uuid, requests
|
import random, uuid, requests # type: ignore
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
import datetime, time
|
import datetime, time
|
||||||
import tiktoken
|
import tiktoken
|
||||||
|
@ -36,7 +36,7 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# this works in python 3.8
|
# this works in python 3.8
|
||||||
import pkg_resources
|
import pkg_resources # type: ignore
|
||||||
|
|
||||||
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
|
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
|
||||||
# try:
|
# try:
|
||||||
|
@ -4161,8 +4161,30 @@ def cost_per_token(
|
||||||
model_with_provider_and_region in model_cost_ref
|
model_with_provider_and_region in model_cost_ref
|
||||||
): # use region based pricing, if it's available
|
): # use region based pricing, if it's available
|
||||||
model_with_provider = model_with_provider_and_region
|
model_with_provider = model_with_provider_and_region
|
||||||
if model_with_provider in model_cost_ref:
|
|
||||||
|
model_without_prefix = model
|
||||||
|
model_parts = model.split("/")
|
||||||
|
if len(model_parts) > 1:
|
||||||
|
model_without_prefix = model_parts[1]
|
||||||
|
else:
|
||||||
|
model_without_prefix = model
|
||||||
|
"""
|
||||||
|
Code block that formats model to lookup in litellm.model_cost
|
||||||
|
Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
|
||||||
|
Option2. model = "openai/gpt-4" - model = provider/model
|
||||||
|
Option3. model = "anthropic.claude-3" - model = model
|
||||||
|
"""
|
||||||
|
if (
|
||||||
|
model_with_provider in model_cost_ref
|
||||||
|
): # Option 2. use model with provider, model = "openai/gpt-4"
|
||||||
model = model_with_provider
|
model = model_with_provider
|
||||||
|
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
|
||||||
|
model = model
|
||||||
|
elif (
|
||||||
|
model_without_prefix in model_cost_ref
|
||||||
|
): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
|
||||||
|
model = model_without_prefix
|
||||||
|
|
||||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||||
if model in model_cost_ref:
|
if model in model_cost_ref:
|
||||||
|
@ -7766,11 +7788,11 @@ def _calculate_retry_after(
|
||||||
try:
|
try:
|
||||||
retry_after = int(retry_header)
|
retry_after = int(retry_header)
|
||||||
except Exception:
|
except Exception:
|
||||||
retry_date_tuple = email.utils.parsedate_tz(retry_header)
|
retry_date_tuple = email.utils.parsedate_tz(retry_header) # type: ignore
|
||||||
if retry_date_tuple is None:
|
if retry_date_tuple is None:
|
||||||
retry_after = -1
|
retry_after = -1
|
||||||
else:
|
else:
|
||||||
retry_date = email.utils.mktime_tz(retry_date_tuple)
|
retry_date = email.utils.mktime_tz(retry_date_tuple) # type: ignore
|
||||||
retry_after = int(retry_date - time.time())
|
retry_after = int(retry_date - time.time())
|
||||||
else:
|
else:
|
||||||
retry_after = -1
|
retry_after = -1
|
||||||
|
@ -10545,6 +10567,18 @@ class CustomStreamWrapper:
|
||||||
elif self.custom_llm_provider == "watsonx":
|
elif self.custom_llm_provider == "watsonx":
|
||||||
response_obj = self.handle_watsonx_stream(chunk)
|
response_obj = self.handle_watsonx_stream(chunk)
|
||||||
completion_obj["content"] = response_obj["text"]
|
completion_obj["content"] = response_obj["text"]
|
||||||
|
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||||
|
if getattr(model_response, "usage", None) is None:
|
||||||
|
model_response.usage = Usage()
|
||||||
|
if response_obj.get("prompt_tokens") is not None:
|
||||||
|
prompt_token_count = getattr(model_response.usage, "prompt_tokens", 0)
|
||||||
|
model_response.usage.prompt_tokens = (prompt_token_count+response_obj["prompt_tokens"])
|
||||||
|
if response_obj.get("completion_tokens") is not None:
|
||||||
|
model_response.usage.completion_tokens = response_obj["completion_tokens"]
|
||||||
|
model_response.usage.total_tokens = (
|
||||||
|
getattr(model_response.usage, "prompt_tokens", 0)
|
||||||
|
+ getattr(model_response.usage, "completion_tokens", 0)
|
||||||
|
)
|
||||||
if response_obj["is_finished"]:
|
if response_obj["is_finished"]:
|
||||||
self.received_finish_reason = response_obj["finish_reason"]
|
self.received_finish_reason = response_obj["finish_reason"]
|
||||||
elif self.custom_llm_provider == "text-completion-openai":
|
elif self.custom_llm_provider == "text-completion-openai":
|
||||||
|
@ -10949,6 +10983,7 @@ class CustomStreamWrapper:
|
||||||
or self.custom_llm_provider == "sagemaker"
|
or self.custom_llm_provider == "sagemaker"
|
||||||
or self.custom_llm_provider == "gemini"
|
or self.custom_llm_provider == "gemini"
|
||||||
or self.custom_llm_provider == "cached_response"
|
or self.custom_llm_provider == "cached_response"
|
||||||
|
or self.custom_llm_provider == "watsonx"
|
||||||
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
||||||
):
|
):
|
||||||
async for chunk in self.completion_stream:
|
async for chunk in self.completion_stream:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.36.3"
|
version = "1.36.4"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.36.3"
|
version = "1.36.4"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -0,0 +1 @@
|
||||||
|
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -0,0 +1 @@
|
||||||
|
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""]
|
3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -189,6 +189,7 @@ const CreateKeyPage = () => {
|
||||||
userRole={userRole}
|
userRole={userRole}
|
||||||
token={token}
|
token={token}
|
||||||
accessToken={accessToken}
|
accessToken={accessToken}
|
||||||
|
keys={keys}
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -786,7 +786,9 @@ export const adminTopKeysCall = async (accessToken: String) => {
|
||||||
|
|
||||||
export const adminTopEndUsersCall = async (
|
export const adminTopEndUsersCall = async (
|
||||||
accessToken: String,
|
accessToken: String,
|
||||||
keyToken: String | null
|
keyToken: String | null,
|
||||||
|
startTime: String | undefined,
|
||||||
|
endTime: String | undefined
|
||||||
) => {
|
) => {
|
||||||
try {
|
try {
|
||||||
let url = proxyBaseUrl
|
let url = proxyBaseUrl
|
||||||
|
@ -795,8 +797,11 @@ export const adminTopEndUsersCall = async (
|
||||||
|
|
||||||
let body = "";
|
let body = "";
|
||||||
if (keyToken) {
|
if (keyToken) {
|
||||||
body = JSON.stringify({ api_key: keyToken });
|
body = JSON.stringify({ api_key: keyToken, startTime: startTime, endTime: endTime });
|
||||||
|
} else {
|
||||||
|
body = JSON.stringify({ startTime: startTime, endTime: endTime });
|
||||||
}
|
}
|
||||||
|
|
||||||
//message.info("Making top end users request");
|
//message.info("Making top end users request");
|
||||||
|
|
||||||
// Define requestOptions with body as an optional property
|
// Define requestOptions with body as an optional property
|
||||||
|
@ -815,9 +820,7 @@ export const adminTopEndUsersCall = async (
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
if (keyToken) {
|
requestOptions.body = body;
|
||||||
requestOptions.body = JSON.stringify({ api_key: keyToken });
|
|
||||||
}
|
|
||||||
|
|
||||||
const response = await fetch(url, requestOptions);
|
const response = await fetch(url, requestOptions);
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
|
|
|
@ -3,13 +3,14 @@ import { BarChart, BarList, Card, Title, Table, TableHead, TableHeaderCell, Tabl
|
||||||
import React, { useState, useEffect } from "react";
|
import React, { useState, useEffect } from "react";
|
||||||
|
|
||||||
import ViewUserSpend from "./view_user_spend";
|
import ViewUserSpend from "./view_user_spend";
|
||||||
import { Grid, Col, Text, LineChart, TabPanel, TabPanels, TabGroup, TabList, Tab, Select, SelectItem } from "@tremor/react";
|
import { Grid, Col, Text, LineChart, TabPanel, TabPanels, TabGroup, TabList, Tab, Select, SelectItem, DateRangePicker, DateRangePickerValue } from "@tremor/react";
|
||||||
import {
|
import {
|
||||||
userSpendLogsCall,
|
userSpendLogsCall,
|
||||||
keyInfoCall,
|
keyInfoCall,
|
||||||
adminSpendLogsCall,
|
adminSpendLogsCall,
|
||||||
adminTopKeysCall,
|
adminTopKeysCall,
|
||||||
adminTopModelsCall,
|
adminTopModelsCall,
|
||||||
|
adminTopEndUsersCall,
|
||||||
teamSpendLogsCall,
|
teamSpendLogsCall,
|
||||||
tagsSpendLogsCall,
|
tagsSpendLogsCall,
|
||||||
modelMetricsCall,
|
modelMetricsCall,
|
||||||
|
@ -23,6 +24,7 @@ interface UsagePageProps {
|
||||||
token: string | null;
|
token: string | null;
|
||||||
userRole: string | null;
|
userRole: string | null;
|
||||||
userID: string | null;
|
userID: string | null;
|
||||||
|
keys: any[] | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
type CustomTooltipTypeBar = {
|
type CustomTooltipTypeBar = {
|
||||||
|
@ -95,47 +97,14 @@ function getTopKeys(data: Array<{ [key: string]: unknown }>): any[] {
|
||||||
}
|
}
|
||||||
type DataDict = { [key: string]: unknown };
|
type DataDict = { [key: string]: unknown };
|
||||||
type UserData = { user_id: string; spend: number };
|
type UserData = { user_id: string; spend: number };
|
||||||
function getTopUsers(data: Array<DataDict>): UserData[] {
|
|
||||||
const userSpend: { [key: string]: number } = {};
|
|
||||||
|
|
||||||
data.forEach((dict) => {
|
|
||||||
const payload: DataDict = dict["users"] as DataDict;
|
|
||||||
Object.entries(payload).forEach(([user_id, value]) => {
|
|
||||||
if (
|
|
||||||
user_id === "" ||
|
|
||||||
user_id === undefined ||
|
|
||||||
user_id === null ||
|
|
||||||
user_id == "None"
|
|
||||||
) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!userSpend[user_id]) {
|
|
||||||
userSpend[user_id] = 0;
|
|
||||||
}
|
|
||||||
userSpend[user_id] += value as number;
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
const spendUsers: UserData[] = Object.entries(userSpend).map(
|
|
||||||
([user_id, spend]) => ({
|
|
||||||
user_id,
|
|
||||||
spend,
|
|
||||||
})
|
|
||||||
);
|
|
||||||
|
|
||||||
spendUsers.sort((a, b) => b.spend - a.spend);
|
|
||||||
|
|
||||||
const topKeys = spendUsers.slice(0, 5);
|
|
||||||
console.log(`topKeys: ${Object.values(topKeys[0])}`);
|
|
||||||
return topKeys;
|
|
||||||
}
|
|
||||||
|
|
||||||
const UsagePage: React.FC<UsagePageProps> = ({
|
const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
accessToken,
|
accessToken,
|
||||||
token,
|
token,
|
||||||
userRole,
|
userRole,
|
||||||
userID,
|
userID,
|
||||||
|
keys,
|
||||||
}) => {
|
}) => {
|
||||||
const currentDate = new Date();
|
const currentDate = new Date();
|
||||||
const [keySpendData, setKeySpendData] = useState<any[]>([]);
|
const [keySpendData, setKeySpendData] = useState<any[]>([]);
|
||||||
|
@ -146,6 +115,11 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
const [topTagsData, setTopTagsData] = useState<any[]>([]);
|
const [topTagsData, setTopTagsData] = useState<any[]>([]);
|
||||||
const [uniqueTeamIds, setUniqueTeamIds] = useState<any[]>([]);
|
const [uniqueTeamIds, setUniqueTeamIds] = useState<any[]>([]);
|
||||||
const [totalSpendPerTeam, setTotalSpendPerTeam] = useState<any[]>([]);
|
const [totalSpendPerTeam, setTotalSpendPerTeam] = useState<any[]>([]);
|
||||||
|
const [selectedKeyID, setSelectedKeyID] = useState<string | null>("");
|
||||||
|
const [dateValue, setDateValue] = useState<DateRangePickerValue>({
|
||||||
|
from: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000),
|
||||||
|
to: new Date(),
|
||||||
|
});
|
||||||
|
|
||||||
const firstDay = new Date(
|
const firstDay = new Date(
|
||||||
currentDate.getFullYear(),
|
currentDate.getFullYear(),
|
||||||
|
@ -161,6 +135,26 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
let startTime = formatDate(firstDay);
|
let startTime = formatDate(firstDay);
|
||||||
let endTime = formatDate(lastDay);
|
let endTime = formatDate(lastDay);
|
||||||
|
|
||||||
|
console.log("keys in usage", keys);
|
||||||
|
|
||||||
|
const updateEndUserData = async (startTime: Date | undefined, endTime: Date | undefined, uiSelectedKey: string | null) => {
|
||||||
|
if (!startTime || !endTime || !accessToken) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("uiSelectedKey", uiSelectedKey);
|
||||||
|
|
||||||
|
let newTopUserData = await adminTopEndUsersCall(
|
||||||
|
accessToken,
|
||||||
|
uiSelectedKey,
|
||||||
|
startTime.toISOString(),
|
||||||
|
endTime.toISOString()
|
||||||
|
)
|
||||||
|
console.log("End user data updated successfully", newTopUserData);
|
||||||
|
setTopUsers(newTopUserData);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
function formatDate(date: Date) {
|
function formatDate(date: Date) {
|
||||||
const year = date.getFullYear();
|
const year = date.getFullYear();
|
||||||
let month = date.getMonth() + 1; // JS month index starts from 0
|
let month = date.getMonth() + 1; // JS month index starts from 0
|
||||||
|
@ -227,6 +221,12 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
const top_tags = await tagsSpendLogsCall(accessToken);
|
const top_tags = await tagsSpendLogsCall(accessToken);
|
||||||
setTopTagsData(top_tags.top_10_tags);
|
setTopTagsData(top_tags.top_10_tags);
|
||||||
|
|
||||||
|
// get spend per end-user
|
||||||
|
let spend_user_call = await adminTopEndUsersCall(accessToken, null, undefined, undefined);
|
||||||
|
setTopUsers(spend_user_call);
|
||||||
|
|
||||||
|
console.log("spend/user result", spend_user_call);
|
||||||
|
|
||||||
} else if (userRole == "App Owner") {
|
} else if (userRole == "App Owner") {
|
||||||
await userSpendLogsCall(
|
await userSpendLogsCall(
|
||||||
accessToken,
|
accessToken,
|
||||||
|
@ -258,7 +258,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
spend: k["spend"],
|
spend: k["spend"],
|
||||||
}));
|
}));
|
||||||
setTopKeys(filtered_keys);
|
setTopKeys(filtered_keys);
|
||||||
setTopUsers(getTopUsers(response));
|
|
||||||
setKeySpendData(response);
|
setKeySpendData(response);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -286,6 +285,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
<TabList className="mt-2">
|
<TabList className="mt-2">
|
||||||
<Tab>All Up</Tab>
|
<Tab>All Up</Tab>
|
||||||
<Tab>Team Based Usage</Tab>
|
<Tab>Team Based Usage</Tab>
|
||||||
|
<Tab>End User Usage</Tab>
|
||||||
<Tab>Tag Based Usage</Tab>
|
<Tab>Tag Based Usage</Tab>
|
||||||
</TabList>
|
</TabList>
|
||||||
<TabPanels>
|
<TabPanels>
|
||||||
|
@ -323,22 +323,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
/>
|
/>
|
||||||
</Card>
|
</Card>
|
||||||
</Col>
|
</Col>
|
||||||
<Col numColSpan={1}>
|
|
||||||
<Card>
|
|
||||||
<Title>Top Users</Title>
|
|
||||||
<BarChart
|
|
||||||
className="mt-4 h-40"
|
|
||||||
data={topUsers}
|
|
||||||
index="user_id"
|
|
||||||
categories={["spend"]}
|
|
||||||
colors={["blue"]}
|
|
||||||
yAxisWidth={200}
|
|
||||||
layout="vertical"
|
|
||||||
showXAxis={false}
|
|
||||||
showLegend={false}
|
|
||||||
/>
|
|
||||||
</Card>
|
|
||||||
</Col>
|
|
||||||
<Col numColSpan={1}>
|
<Col numColSpan={1}>
|
||||||
<Card>
|
<Card>
|
||||||
<Title>Top Models</Title>
|
<Title>Top Models</Title>
|
||||||
|
@ -354,6 +338,10 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
showLegend={false}
|
showLegend={false}
|
||||||
/>
|
/>
|
||||||
</Card>
|
</Card>
|
||||||
|
|
||||||
|
</Col>
|
||||||
|
<Col numColSpan={1}>
|
||||||
|
|
||||||
</Col>
|
</Col>
|
||||||
</Grid>
|
</Grid>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
|
@ -385,6 +373,88 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
||||||
<Col numColSpan={2}>
|
<Col numColSpan={2}>
|
||||||
</Col>
|
</Col>
|
||||||
</Grid>
|
</Grid>
|
||||||
|
</TabPanel>
|
||||||
|
<TabPanel>
|
||||||
|
<p className="mb-2 text-gray-500 italic text-[12px]">End-Users of your LLM API calls. Tracked when a `user` param is passed in your LLM calls <a className="text-blue-500" href="https://docs.litellm.ai/docs/proxy/users" target="_blank">docs here</a></p>
|
||||||
|
<Grid numItems={2}>
|
||||||
|
<Col>
|
||||||
|
<Text>Select Time Range</Text>
|
||||||
|
|
||||||
|
<DateRangePicker
|
||||||
|
enableSelect={true}
|
||||||
|
value={dateValue}
|
||||||
|
onValueChange={(value) => {
|
||||||
|
setDateValue(value);
|
||||||
|
updateEndUserData(value.from, value.to, null); // Call updateModelMetrics with the new date range
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</Col>
|
||||||
|
<Col>
|
||||||
|
<Text>Select Key</Text>
|
||||||
|
<Select defaultValue="all-keys">
|
||||||
|
<SelectItem
|
||||||
|
key="all-keys"
|
||||||
|
value="all-keys"
|
||||||
|
onClick={() => {
|
||||||
|
updateEndUserData(dateValue.from, dateValue.to, null);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
All Keys
|
||||||
|
</SelectItem>
|
||||||
|
{keys?.map((key: any, index: number) => {
|
||||||
|
if (
|
||||||
|
key &&
|
||||||
|
key["key_alias"] !== null &&
|
||||||
|
key["key_alias"].length > 0
|
||||||
|
) {
|
||||||
|
return (
|
||||||
|
|
||||||
|
<SelectItem
|
||||||
|
key={index}
|
||||||
|
value={String(index)}
|
||||||
|
onClick={() => {
|
||||||
|
updateEndUserData(dateValue.from, dateValue.to, key["token"]);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{key["key_alias"]}
|
||||||
|
</SelectItem>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return null; // Add this line to handle the case when the condition is not met
|
||||||
|
})}
|
||||||
|
</Select>
|
||||||
|
</Col>
|
||||||
|
|
||||||
|
</Grid>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<Card className="mt-4">
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<Table className="max-h-[70vh] min-h-[500px]">
|
||||||
|
<TableHead>
|
||||||
|
<TableRow>
|
||||||
|
<TableHeaderCell>End User</TableHeaderCell>
|
||||||
|
<TableHeaderCell>Spend</TableHeaderCell>
|
||||||
|
<TableHeaderCell>Total Events</TableHeaderCell>
|
||||||
|
</TableRow>
|
||||||
|
</TableHead>
|
||||||
|
|
||||||
|
<TableBody>
|
||||||
|
{topUsers?.map((user: any, index: number) => (
|
||||||
|
<TableRow key={index}>
|
||||||
|
<TableCell>{user.end_user}</TableCell>
|
||||||
|
<TableCell>{user.total_spend?.toFixed(4)}</TableCell>
|
||||||
|
<TableCell>{user.total_count}</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</Table>
|
||||||
|
|
||||||
|
</Card>
|
||||||
|
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
<TabPanel>
|
<TabPanel>
|
||||||
<Grid numItems={2} className="gap-2 h-[75vh] w-full mb-4">
|
<Grid numItems={2} className="gap-2 h-[75vh] w-full mb-4">
|
||||||
|
|
|
@ -24,7 +24,7 @@ import {
|
||||||
Icon,
|
Icon,
|
||||||
TextInput,
|
TextInput,
|
||||||
} from "@tremor/react";
|
} from "@tremor/react";
|
||||||
import { userInfoCall, adminTopEndUsersCall } from "./networking";
|
import { userInfoCall } from "./networking";
|
||||||
import { Badge, BadgeDelta, Button } from "@tremor/react";
|
import { Badge, BadgeDelta, Button } from "@tremor/react";
|
||||||
import RequestAccess from "./request_model_access";
|
import RequestAccess from "./request_model_access";
|
||||||
import CreateUser from "./create_user_button";
|
import CreateUser from "./create_user_button";
|
||||||
|
@ -83,22 +83,7 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
fetchData();
|
fetchData();
|
||||||
}
|
}
|
||||||
|
|
||||||
const fetchEndUserSpend = async () => {
|
|
||||||
try {
|
|
||||||
const topEndUsers = await adminTopEndUsersCall(accessToken, null);
|
|
||||||
console.log("user data response:", topEndUsers);
|
|
||||||
setEndUsers(topEndUsers);
|
|
||||||
} catch (error) {
|
|
||||||
console.error("There was an error fetching the model data", error);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if (
|
|
||||||
userRole &&
|
|
||||||
(userRole == "Admin" || userRole == "Admin Viewer") &&
|
|
||||||
!endUsers
|
|
||||||
) {
|
|
||||||
fetchEndUserSpend();
|
|
||||||
}
|
|
||||||
}, [accessToken, token, userRole, userID, currentPage]);
|
}, [accessToken, token, userRole, userID, currentPage]);
|
||||||
|
|
||||||
if (!userData) {
|
if (!userData) {
|
||||||
|
@ -109,16 +94,6 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
return <div>Loading...</div>;
|
return <div>Loading...</div>;
|
||||||
}
|
}
|
||||||
|
|
||||||
const onKeyClick = async (keyToken: String) => {
|
|
||||||
try {
|
|
||||||
const topEndUsers = await adminTopEndUsersCall(accessToken, keyToken);
|
|
||||||
console.log("user data response:", topEndUsers);
|
|
||||||
setEndUsers(topEndUsers);
|
|
||||||
} catch (error) {
|
|
||||||
console.error("There was an error fetching the model data", error);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
function renderPagination() {
|
function renderPagination() {
|
||||||
if (!userData) return null;
|
if (!userData) return null;
|
||||||
|
|
||||||
|
@ -157,14 +132,11 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
<CreateUser userID={userID} accessToken={accessToken} teams={teams}/>
|
<CreateUser userID={userID} accessToken={accessToken} teams={teams}/>
|
||||||
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4">
|
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4">
|
||||||
<div className="mb-4 mt-1">
|
<div className="mb-4 mt-1">
|
||||||
<Text><b>Key Owners: </b> Users on LiteLLM that created API Keys. Automatically tracked by LiteLLM</Text>
|
<Text>These are Users on LiteLLM that created API Keys. Automatically tracked by LiteLLM</Text>
|
||||||
<Text className="mt-1"><b>End Users: </b>End Users of your LLM API calls. Tracked When a `user` param is passed in your LLM calls</Text>
|
|
||||||
</div>
|
</div>
|
||||||
<TabGroup>
|
<TabGroup>
|
||||||
<TabList variant="line" defaultValue="1">
|
|
||||||
<Tab value="1">Key Owners</Tab>
|
|
||||||
<Tab value="2">End-Users</Tab>
|
|
||||||
</TabList>
|
|
||||||
<TabPanels>
|
<TabPanels>
|
||||||
<TabPanel>
|
<TabPanel>
|
||||||
|
|
||||||
|
@ -190,7 +162,7 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
? user.models
|
? user.models
|
||||||
: "All Models"}
|
: "All Models"}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
<TableCell>{user.spend ? user.spend : 0}</TableCell>
|
<TableCell>{user.spend ? user.spend?.toFixed(2) : 0}</TableCell>
|
||||||
<TableCell>
|
<TableCell>
|
||||||
{user.max_budget ? user.max_budget : "Unlimited"}
|
{user.max_budget ? user.max_budget : "Unlimited"}
|
||||||
</TableCell>
|
</TableCell>
|
||||||
|
@ -220,29 +192,10 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
<div className="flex items-center">
|
<div className="flex items-center">
|
||||||
<div className="flex-1"></div>
|
<div className="flex-1"></div>
|
||||||
<div className="flex-1 flex justify-between items-center">
|
<div className="flex-1 flex justify-between items-center">
|
||||||
<Text className="w-1/4 mr-2 text-right">Key</Text>
|
|
||||||
<Select defaultValue="1" className="w-3/4">
|
|
||||||
{keys?.map((key: any, index: number) => {
|
|
||||||
if (
|
|
||||||
key &&
|
|
||||||
key["key_alias"] !== null &&
|
|
||||||
key["key_alias"].length > 0
|
|
||||||
) {
|
|
||||||
return (
|
|
||||||
<SelectItem
|
|
||||||
key={index}
|
|
||||||
value={String(index)}
|
|
||||||
onClick={() => onKeyClick(key["token"])}
|
|
||||||
>
|
|
||||||
{key["key_alias"]}
|
|
||||||
</SelectItem>
|
|
||||||
);
|
|
||||||
}
|
|
||||||
})}
|
|
||||||
</Select>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<Table className="max-h-[70vh] min-h-[500px]">
|
{/* <Table className="max-h-[70vh] min-h-[500px]">
|
||||||
<TableHead>
|
<TableHead>
|
||||||
<TableRow>
|
<TableRow>
|
||||||
<TableHeaderCell>End User</TableHeaderCell>
|
<TableHeaderCell>End User</TableHeaderCell>
|
||||||
|
@ -260,7 +213,7 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
||||||
</TableRow>
|
</TableRow>
|
||||||
))}
|
))}
|
||||||
</TableBody>
|
</TableBody>
|
||||||
</Table>
|
</Table> */}
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
</TabPanels>
|
</TabPanels>
|
||||||
</TabGroup>
|
</TabGroup>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue