forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_region_based_routing
This commit is contained in:
commit
8ad979cdfe
85 changed files with 793 additions and 448 deletions
|
@ -16,11 +16,11 @@ repos:
|
|||
name: Check if files match
|
||||
entry: python3 ci_cd/check_files_match.py
|
||||
language: system
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: mypy
|
||||
name: mypy
|
||||
entry: python3 -m mypy --ignore-missing-imports
|
||||
language: system
|
||||
types: [python]
|
||||
files: ^litellm/
|
||||
# - repo: local
|
||||
# hooks:
|
||||
# - id: mypy
|
||||
# name: mypy
|
||||
# entry: python3 -m mypy --ignore-missing-imports
|
||||
# language: system
|
||||
# types: [python]
|
||||
# files: ^litellm/
|
|
@ -291,7 +291,7 @@ def _create_clickhouse_aggregate_tables(client=None, table_names=[]):
|
|||
|
||||
|
||||
def _forecast_daily_cost(data: list):
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
if len(data) == 0:
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
|
||||
import os
|
||||
import inspect
|
||||
import redis, litellm
|
||||
import redis.asyncio as async_redis
|
||||
import redis, litellm # type: ignore
|
||||
import redis.asyncio as async_redis # type: ignore
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
import os, json, time
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse
|
||||
import requests, threading
|
||||
import requests, threading # type: ignore
|
||||
from typing import Optional, Union, Literal
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -4,18 +4,30 @@ import datetime
|
|||
class AthinaLogger:
|
||||
def __init__(self):
|
||||
import os
|
||||
|
||||
self.athina_api_key = os.getenv("ATHINA_API_KEY")
|
||||
self.headers = {
|
||||
"athina-api-key": self.athina_api_key,
|
||||
"Content-Type": "application/json"
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
self.athina_logging_url = "https://log.athina.ai/api/v1/log/inference"
|
||||
self.additional_keys = ["environment", "prompt_slug", "customer_id", "customer_user_id", "session_id", "external_reference_id", "context", "expected_response", "user_query"]
|
||||
self.additional_keys = [
|
||||
"environment",
|
||||
"prompt_slug",
|
||||
"customer_id",
|
||||
"customer_user_id",
|
||||
"session_id",
|
||||
"external_reference_id",
|
||||
"context",
|
||||
"expected_response",
|
||||
"user_query",
|
||||
]
|
||||
|
||||
def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import json
|
||||
import traceback
|
||||
|
||||
try:
|
||||
response_json = response_obj.model_dump() if response_obj else {}
|
||||
data = {
|
||||
|
@ -23,32 +35,51 @@ class AthinaLogger:
|
|||
"request": kwargs,
|
||||
"response": response_json,
|
||||
"prompt_tokens": response_json.get("usage", {}).get("prompt_tokens"),
|
||||
"completion_tokens": response_json.get("usage", {}).get("completion_tokens"),
|
||||
"completion_tokens": response_json.get("usage", {}).get(
|
||||
"completion_tokens"
|
||||
),
|
||||
"total_tokens": response_json.get("usage", {}).get("total_tokens"),
|
||||
}
|
||||
|
||||
if type(end_time) == datetime.datetime and type(start_time) == datetime.datetime:
|
||||
data["response_time"] = int((end_time - start_time).total_seconds() * 1000)
|
||||
|
||||
if (
|
||||
type(end_time) == datetime.datetime
|
||||
and type(start_time) == datetime.datetime
|
||||
):
|
||||
data["response_time"] = int(
|
||||
(end_time - start_time).total_seconds() * 1000
|
||||
)
|
||||
|
||||
if "messages" in kwargs:
|
||||
data["prompt"] = kwargs.get("messages", None)
|
||||
|
||||
# Directly add tools or functions if present
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
data.update((k, v) for k, v in optional_params.items() if k in ["tools", "functions"])
|
||||
data.update(
|
||||
(k, v)
|
||||
for k, v in optional_params.items()
|
||||
if k in ["tools", "functions"]
|
||||
)
|
||||
|
||||
# Add additional metadata keys
|
||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||
if metadata:
|
||||
for key in self.additional_keys:
|
||||
if key in metadata:
|
||||
data[key] = metadata[key]
|
||||
|
||||
response = requests.post(self.athina_logging_url, headers=self.headers, data=json.dumps(data, default=str))
|
||||
response = requests.post(
|
||||
self.athina_logging_url,
|
||||
headers=self.headers,
|
||||
data=json.dumps(data, default=str),
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print_verbose(f"Athina Logger Error - {response.text}, {response.status_code}")
|
||||
print_verbose(
|
||||
f"Athina Logger Error - {response.text}, {response.status_code}"
|
||||
)
|
||||
else:
|
||||
print_verbose(f"Athina Logger Succeeded - {response.text}")
|
||||
except Exception as e:
|
||||
print_verbose(f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}")
|
||||
pass
|
||||
print_verbose(
|
||||
f"Athina Logger Error - {e}, Stack trace: {traceback.format_exc()}"
|
||||
)
|
||||
pass
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
import requests
|
||||
import requests # type: ignore
|
||||
import json
|
||||
import traceback
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
class GreenscaleLogger:
|
||||
def __init__(self):
|
||||
import os
|
||||
|
||||
self.greenscale_api_key = os.getenv("GREENSCALE_API_KEY")
|
||||
self.headers = {
|
||||
"api-key": self.greenscale_api_key,
|
||||
"Content-Type": "application/json"
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
self.greenscale_logging_url = os.getenv("GREENSCALE_ENDPOINT")
|
||||
|
||||
|
@ -19,33 +21,48 @@ class GreenscaleLogger:
|
|||
data = {
|
||||
"modelId": kwargs.get("model"),
|
||||
"inputTokenCount": response_json.get("usage", {}).get("prompt_tokens"),
|
||||
"outputTokenCount": response_json.get("usage", {}).get("completion_tokens"),
|
||||
"outputTokenCount": response_json.get("usage", {}).get(
|
||||
"completion_tokens"
|
||||
),
|
||||
}
|
||||
data["timestamp"] = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
|
||||
if type(end_time) == datetime and type(start_time) == datetime:
|
||||
data["invocationLatency"] = int((end_time - start_time).total_seconds() * 1000)
|
||||
data["timestamp"] = datetime.now(timezone.utc).strftime(
|
||||
"%Y-%m-%dT%H:%M:%SZ"
|
||||
)
|
||||
|
||||
if type(end_time) == datetime and type(start_time) == datetime:
|
||||
data["invocationLatency"] = int(
|
||||
(end_time - start_time).total_seconds() * 1000
|
||||
)
|
||||
|
||||
# Add additional metadata keys to tags
|
||||
tags = []
|
||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||
for key, value in metadata.items():
|
||||
if key.startswith("greenscale"):
|
||||
if key.startswith("greenscale"):
|
||||
if key == "greenscale_project":
|
||||
data["project"] = value
|
||||
elif key == "greenscale_application":
|
||||
data["application"] = value
|
||||
else:
|
||||
tags.append({"key": key.replace("greenscale_", ""), "value": str(value)})
|
||||
|
||||
tags.append(
|
||||
{"key": key.replace("greenscale_", ""), "value": str(value)}
|
||||
)
|
||||
|
||||
data["tags"] = tags
|
||||
|
||||
response = requests.post(self.greenscale_logging_url, headers=self.headers, data=json.dumps(data, default=str))
|
||||
response = requests.post(
|
||||
self.greenscale_logging_url,
|
||||
headers=self.headers,
|
||||
data=json.dumps(data, default=str),
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print_verbose(f"Greenscale Logger Error - {response.text}, {response.status_code}")
|
||||
print_verbose(
|
||||
f"Greenscale Logger Error - {response.text}, {response.status_code}"
|
||||
)
|
||||
else:
|
||||
print_verbose(f"Greenscale Logger Succeeded - {response.text}")
|
||||
except Exception as e:
|
||||
print_verbose(f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}")
|
||||
pass
|
||||
print_verbose(
|
||||
f"Greenscale Logger Error - {e}, Stack trace: {traceback.format_exc()}"
|
||||
)
|
||||
pass
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Helicone
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import litellm
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
|
|
|
@ -1,15 +1,14 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Langsmith
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests
|
||||
import dotenv, os # type: ignore
|
||||
import requests # type: ignore
|
||||
from datetime import datetime
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
import asyncio
|
||||
import types
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel # type: ignore
|
||||
|
||||
|
||||
def is_serializable(value):
|
||||
|
@ -79,8 +78,6 @@ class LangsmithLogger:
|
|||
except:
|
||||
response_obj = response_obj.dict() # type: ignore
|
||||
|
||||
print(f"response_obj: {response_obj}")
|
||||
|
||||
data = {
|
||||
"name": run_name,
|
||||
"run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
|
||||
|
@ -90,7 +87,6 @@ class LangsmithLogger:
|
|||
"start_time": start_time,
|
||||
"end_time": end_time,
|
||||
}
|
||||
print(f"data: {data}")
|
||||
|
||||
response = requests.post(
|
||||
"https://api.smith.langchain.com/runs",
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
## On Success events log cost to OpenMeter - https://github.com/BerriAI/litellm/issues/1268
|
||||
|
||||
import dotenv, os, json
|
||||
import requests
|
||||
import litellm
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
|
@ -60,7 +59,7 @@ class OpenMeterLogger(CustomLogger):
|
|||
"total_tokens": response_obj["usage"].get("total_tokens"),
|
||||
}
|
||||
|
||||
subject = kwargs.get("user", None), # end-user passed in via 'user' param
|
||||
subject = (kwargs.get("user", None),) # end-user passed in via 'user' param
|
||||
if not subject:
|
||||
raise Exception("OpenMeter: user is required")
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
# On success, log events to Prometheus
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
@ -19,7 +19,6 @@ class PrometheusLogger:
|
|||
**kwargs,
|
||||
):
|
||||
try:
|
||||
print(f"in init prometheus metrics")
|
||||
from prometheus_client import Counter
|
||||
|
||||
self.litellm_llm_api_failed_requests_metric = Counter(
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
@ -183,7 +183,6 @@ class PrometheusServicesLogger:
|
|||
)
|
||||
|
||||
async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
|
||||
print(f"received error payload: {payload.error}")
|
||||
if self.mock_testing:
|
||||
self.mock_testing_failure_calls += 1
|
||||
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
from pydantic import BaseModel
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
||||
|
||||
class PromptLayerLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
|
@ -32,7 +33,11 @@ class PromptLayerLogger:
|
|||
tags = kwargs["litellm_params"]["metadata"]["pl_tags"]
|
||||
|
||||
# Remove "pl_tags" from metadata
|
||||
metadata = {k:v for k, v in kwargs["litellm_params"]["metadata"].items() if k != "pl_tags"}
|
||||
metadata = {
|
||||
k: v
|
||||
for k, v in kwargs["litellm_params"]["metadata"].items()
|
||||
if k != "pl_tags"
|
||||
}
|
||||
|
||||
print_verbose(
|
||||
f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import os, types, traceback
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import time, httpx
|
||||
import requests # type: ignore
|
||||
import time, httpx # type: ignore
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message
|
||||
import litellm
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
|
||||
|
||||
class AlephAlphaError(Exception):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests, copy
|
||||
import requests, copy # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||
|
@ -9,7 +9,7 @@ import litellm
|
|||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from .base import BaseLLM
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
|
||||
|
||||
class AnthropicConstants(Enum):
|
||||
|
|
|
@ -12,7 +12,7 @@ from litellm.utils import (
|
|||
from typing import Callable, Optional, BinaryIO
|
||||
from litellm import OpenAIConfig
|
||||
import litellm, json
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
|
||||
from openai import AzureOpenAI, AsyncAzureOpenAI
|
||||
import uuid
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from typing import Optional, Union, Any
|
||||
import types, requests
|
||||
import types, requests # type: ignore
|
||||
from .base import BaseLLM
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
|
|
|
@ -163,10 +163,9 @@ class AmazonAnthropicClaude3Config:
|
|||
"stop",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"extra_headers"
|
||||
"extra_headers",
|
||||
]
|
||||
|
||||
|
||||
def map_openai_params(self, non_default_params: dict, optional_params: dict):
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_tokens":
|
||||
|
@ -534,10 +533,12 @@ class AmazonStabilityConfig:
|
|||
|
||||
def add_custom_header(headers):
|
||||
"""Closure to capture the headers and add them."""
|
||||
|
||||
def callback(request, **kwargs):
|
||||
"""Actual callback function that Boto3 will call."""
|
||||
for header_name, header_value in headers.items():
|
||||
request.headers.add_header(header_name, header_value)
|
||||
|
||||
return callback
|
||||
|
||||
|
||||
|
@ -672,7 +673,9 @@ def init_bedrock_client(
|
|||
config=config,
|
||||
)
|
||||
if extra_headers:
|
||||
client.meta.events.register('before-sign.bedrock-runtime.*', add_custom_header(extra_headers))
|
||||
client.meta.events.register(
|
||||
"before-sign.bedrock-runtime.*", add_custom_header(extra_headers)
|
||||
)
|
||||
|
||||
return client
|
||||
|
||||
|
@ -1224,7 +1227,7 @@ def _embedding_func_single(
|
|||
"input_type", "search_document"
|
||||
) # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
|
||||
data = {"texts": [input], **inference_params} # type: ignore
|
||||
body = json.dumps(data).encode("utf-8")
|
||||
body = json.dumps(data).encode("utf-8") # type: ignore
|
||||
## LOGGING
|
||||
request_str = f"""
|
||||
response = client.invoke_model(
|
||||
|
@ -1416,7 +1419,7 @@ def image_generation(
|
|||
## LOGGING
|
||||
request_str = f"""
|
||||
response = client.invoke_model(
|
||||
body={body},
|
||||
body={body}, # type: ignore
|
||||
modelId={modelId},
|
||||
accept="application/json",
|
||||
contentType="application/json",
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time, traceback
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
|
||||
|
||||
class CohereError(Exception):
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time, traceback
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from .prompt_templates.factory import cohere_message_pt
|
||||
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time, traceback
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Choices, Message, Usage
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
from itertools import chain
|
||||
import requests, types, time
|
||||
import requests, types, time # type: ignore
|
||||
import json, uuid
|
||||
import traceback
|
||||
from typing import Optional
|
||||
import litellm
|
||||
import httpx, aiohttp, asyncio
|
||||
import httpx, aiohttp, asyncio # type: ignore
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
||||
|
@ -220,7 +220,10 @@ def get_ollama_response(
|
|||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -232,7 +235,9 @@ def get_ollama_response(
|
|||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + model
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt, disallowed_special=()))) # type: ignore
|
||||
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count", len(response_json.get("message", dict()).get("content", ""))
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -273,7 +278,10 @@ def ollama_completion_stream(url, data, logging_obj):
|
|||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -314,9 +322,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
|||
first_chunk_content = first_chunk.choices[0].delta.content or ""
|
||||
response_content = first_chunk_content + "".join(
|
||||
[
|
||||
chunk.choices[0].delta.content
|
||||
async for chunk in streamwrapper
|
||||
if chunk.choices[0].delta.content]
|
||||
chunk.choices[0].delta.content
|
||||
async for chunk in streamwrapper
|
||||
if chunk.choices[0].delta.content
|
||||
]
|
||||
)
|
||||
function_call = json.loads(response_content)
|
||||
delta = litellm.utils.Delta(
|
||||
|
@ -324,7 +333,10 @@ async def ollama_async_streaming(url, data, model_response, encoding, logging_ob
|
|||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -373,7 +385,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
tool_calls=[
|
||||
{
|
||||
"id": f"call_{str(uuid.uuid4())}",
|
||||
"function": {"name": function_call["name"], "arguments": json.dumps(function_call["arguments"])},
|
||||
"function": {
|
||||
"name": function_call["name"],
|
||||
"arguments": json.dumps(function_call["arguments"]),
|
||||
},
|
||||
"type": "function",
|
||||
}
|
||||
],
|
||||
|
@ -387,7 +402,10 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + data["model"]
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"], disallowed_special=()))) # type: ignore
|
||||
completion_tokens = response_json.get("eval_count", len(response_json.get("message",dict()).get("content", "")))
|
||||
completion_tokens = response_json.get(
|
||||
"eval_count",
|
||||
len(response_json.get("message", dict()).get("content", "")),
|
||||
)
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
|
@ -475,6 +493,7 @@ async def ollama_aembeddings(
|
|||
}
|
||||
return model_response
|
||||
|
||||
|
||||
def ollama_embeddings(
|
||||
api_base: str,
|
||||
model: str,
|
||||
|
@ -492,5 +511,6 @@ def ollama_embeddings(
|
|||
optional_params,
|
||||
logging_obj,
|
||||
model_response,
|
||||
encoding)
|
||||
encoding,
|
||||
)
|
||||
)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
|
|
|
@ -22,7 +22,6 @@ from litellm.utils import (
|
|||
TextCompletionResponse,
|
||||
)
|
||||
from typing import Callable, Optional
|
||||
import aiohttp, requests
|
||||
import litellm
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from openai import OpenAI, AsyncOpenAI
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
|
|
|
@ -487,7 +487,7 @@ def format_prompt_togetherai(messages, prompt_format, chat_template):
|
|||
|
||||
def ibm_granite_pt(messages: list):
|
||||
"""
|
||||
IBM's Granite models uses the template:
|
||||
IBM's Granite chat models uses the template:
|
||||
<|system|> {system_message} <|user|> {user_message} <|assistant|> {assistant_message}
|
||||
|
||||
See: https://www.ibm.com/docs/en/watsonx-as-a-service?topic=solutions-supported-foundation-models
|
||||
|
@ -503,12 +503,13 @@ def ibm_granite_pt(messages: list):
|
|||
"pre_message": "<|user|>\n",
|
||||
"post_message": "\n",
|
||||
},
|
||||
"assistant": {
|
||||
"pre_message": "<|assistant|>\n",
|
||||
"post_message": "\n",
|
||||
'assistant': {
|
||||
'pre_message': '<|assistant|>\n',
|
||||
'post_message': '\n',
|
||||
},
|
||||
},
|
||||
).strip()
|
||||
final_prompt_value='<|assistant|>\n',
|
||||
)
|
||||
|
||||
|
||||
### ANTHROPIC ###
|
||||
|
@ -981,7 +982,7 @@ def anthropic_messages_pt(messages: list):
|
|||
# add role=tool support to allow function call result/error submission
|
||||
user_message_types = {"user", "tool", "function"}
|
||||
# reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, merge them.
|
||||
new_messages = []
|
||||
new_messages: list = []
|
||||
msg_i = 0
|
||||
tool_use_param = False
|
||||
while msg_i < len(messages):
|
||||
|
@ -1524,24 +1525,9 @@ def prompt_factory(
|
|||
return mistral_instruct_pt(messages=messages)
|
||||
elif "meta-llama/llama-3" in model and "instruct" in model:
|
||||
# https://llama.meta.com/docs/model-cards-and-prompt-formats/meta-llama-3/
|
||||
return custom_prompt(
|
||||
role_dict={
|
||||
"system": {
|
||||
"pre_message": "<|start_header_id|>system<|end_header_id|>\n",
|
||||
"post_message": "<|eot_id|>",
|
||||
},
|
||||
"user": {
|
||||
"pre_message": "<|start_header_id|>user<|end_header_id|>\n",
|
||||
"post_message": "<|eot_id|>",
|
||||
},
|
||||
"assistant": {
|
||||
"pre_message": "<|start_header_id|>assistant<|end_header_id|>\n",
|
||||
"post_message": "<|eot_id|>",
|
||||
},
|
||||
},
|
||||
return hf_chat_template(
|
||||
model="meta-llama/Meta-Llama-3-8B-Instruct",
|
||||
messages=messages,
|
||||
initial_prompt_value="<|begin_of_text|>",
|
||||
final_prompt_value="<|start_header_id|>assistant<|end_header_id|>\n",
|
||||
)
|
||||
try:
|
||||
if "meta-llama/llama-2" in model and "chat" in model:
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
import os, types
|
||||
import json
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
import os, types, traceback
|
||||
from enum import Enum
|
||||
import json
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, Any
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, EmbeddingResponse, get_secret, Usage
|
||||
import sys
|
||||
from copy import deepcopy
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
||||
|
@ -295,7 +295,7 @@ def completion(
|
|||
EndpointName={model},
|
||||
InferenceComponentName={model_id},
|
||||
ContentType="application/json",
|
||||
Body={data},
|
||||
Body={data}, # type: ignore
|
||||
CustomAttributes="accept_eula=true",
|
||||
)
|
||||
""" # type: ignore
|
||||
|
@ -321,7 +321,7 @@ def completion(
|
|||
response = client.invoke_endpoint(
|
||||
EndpointName={model},
|
||||
ContentType="application/json",
|
||||
Body={data},
|
||||
Body={data}, # type: ignore
|
||||
CustomAttributes="accept_eula=true",
|
||||
)
|
||||
""" # type: ignore
|
||||
|
@ -688,7 +688,7 @@ def embedding(
|
|||
response = client.invoke_endpoint(
|
||||
EndpointName={model},
|
||||
ContentType="application/json",
|
||||
Body={data},
|
||||
Body={data}, # type: ignore
|
||||
CustomAttributes="accept_eula=true",
|
||||
)""" # type: ignore
|
||||
logging_obj.pre_call(
|
||||
|
|
|
@ -6,11 +6,11 @@ Reference: https://docs.together.ai/docs/openai-api-compatibility
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional
|
||||
import litellm
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import requests # type: ignore
|
||||
import time
|
||||
from typing import Callable, Optional, Union, List
|
||||
from litellm.utils import ModelResponse, Usage, CustomStreamWrapper, map_finish_reason
|
||||
import litellm, uuid
|
||||
import httpx, inspect
|
||||
import httpx, inspect # type: ignore
|
||||
|
||||
|
||||
class VertexAIError(Exception):
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
import os, types
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests, copy
|
||||
import requests, copy # type: ignore
|
||||
import time, uuid
|
||||
from typing import Callable, Optional, List
|
||||
from litellm.utils import ModelResponse, Usage, map_finish_reason, CustomStreamWrapper
|
||||
|
@ -17,7 +17,7 @@ from .prompt_templates.factory import (
|
|||
extract_between_tags,
|
||||
parse_xml_params,
|
||||
)
|
||||
import httpx
|
||||
import httpx # type: ignore
|
||||
|
||||
|
||||
class VertexAIError(Exception):
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
import os
|
||||
import json
|
||||
from enum import Enum
|
||||
import requests
|
||||
import time, httpx
|
||||
import requests # type: ignore
|
||||
import time, httpx # type: ignore
|
||||
from typing import Callable, Any
|
||||
from litellm.utils import ModelResponse, Usage
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
from enum import Enum
|
||||
import json, types, time # noqa: E401
|
||||
from contextlib import contextmanager
|
||||
from typing import Callable, Dict, Optional, Any, Union, List
|
||||
from contextlib import asynccontextmanager, contextmanager
|
||||
from typing import AsyncGenerator, Callable, Dict, Generator, Optional, Any, Union, List
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
import httpx # type: ignore
|
||||
import requests # type: ignore
|
||||
import litellm
|
||||
from litellm.utils import ModelResponse, get_secret, Usage
|
||||
from litellm.utils import Logging, ModelResponse, Usage, get_secret
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates import factory as ptf
|
||||
|
@ -192,7 +193,7 @@ class WatsonXAIEndpoint(str, Enum):
|
|||
|
||||
class IBMWatsonXAI(BaseLLM):
|
||||
"""
|
||||
Class to interface with IBM Watsonx.ai API for text generation and embeddings.
|
||||
Class to interface with IBM watsonx.ai API for text generation and embeddings.
|
||||
|
||||
Reference: https://cloud.ibm.com/apidocs/watsonx-ai
|
||||
"""
|
||||
|
@ -343,7 +344,7 @@ class IBMWatsonXAI(BaseLLM):
|
|||
)
|
||||
if token is None and api_key is not None:
|
||||
# generate the auth token
|
||||
if print_verbose:
|
||||
if print_verbose is not None:
|
||||
print_verbose("Generating IAM token for Watsonx.ai")
|
||||
token = self.generate_iam_token(api_key)
|
||||
elif token is None and api_key is None:
|
||||
|
@ -377,8 +378,9 @@ class IBMWatsonXAI(BaseLLM):
|
|||
model_response: ModelResponse,
|
||||
print_verbose: Callable,
|
||||
encoding,
|
||||
logging_obj,
|
||||
optional_params: dict,
|
||||
logging_obj: Logging,
|
||||
optional_params: Optional[dict] = None,
|
||||
acompletion: bool = None,
|
||||
litellm_params: Optional[dict] = None,
|
||||
logger_fn=None,
|
||||
timeout: Optional[float] = None,
|
||||
|
@ -401,13 +403,15 @@ class IBMWatsonXAI(BaseLLM):
|
|||
prompt = convert_messages_to_prompt(
|
||||
model, messages, provider, custom_prompt_dict
|
||||
)
|
||||
|
||||
manage_response = self._make_response_manager(async_=(acompletion is True), logging_obj=logging_obj)
|
||||
|
||||
def process_text_request(request_params: dict) -> ModelResponse:
|
||||
with self._manage_response(
|
||||
request_params, logging_obj=logging_obj, input=prompt, timeout=timeout
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
|
||||
def process_text_gen_response(json_resp: dict) -> ModelResponse:
|
||||
if "results" not in json_resp:
|
||||
raise WatsonXAIError(
|
||||
status_code=500,
|
||||
message=f"Error: Invalid response from Watsonx.ai API: {json_resp}",
|
||||
)
|
||||
generated_text = json_resp["results"][0]["generated_text"]
|
||||
prompt_tokens = json_resp["results"][0]["input_token_count"]
|
||||
completion_tokens = json_resp["results"][0]["generated_token_count"]
|
||||
|
@ -426,25 +430,52 @@ class IBMWatsonXAI(BaseLLM):
|
|||
)
|
||||
return model_response
|
||||
|
||||
def process_stream_request(
|
||||
def handle_text_request(request_params: dict) -> ModelResponse:
|
||||
with manage_response(
|
||||
request_params, input=prompt, timeout=timeout,
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
|
||||
return process_text_gen_response(json_resp)
|
||||
|
||||
async def handle_text_request_async(request_params: dict) -> ModelResponse:
|
||||
async with manage_response(
|
||||
request_params, input=prompt, timeout=timeout,
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
return process_text_gen_response(json_resp)
|
||||
|
||||
def handle_stream_request(
|
||||
request_params: dict,
|
||||
) -> litellm.CustomStreamWrapper:
|
||||
# stream the response - generated chunks will be handled
|
||||
# by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
|
||||
with self._manage_response(
|
||||
request_params,
|
||||
logging_obj=logging_obj,
|
||||
stream=True,
|
||||
input=prompt,
|
||||
timeout=timeout,
|
||||
with manage_response(
|
||||
request_params, stream=True, input=prompt, timeout=timeout,
|
||||
) as resp:
|
||||
response = litellm.CustomStreamWrapper(
|
||||
streamwrapper = litellm.CustomStreamWrapper(
|
||||
resp.iter_lines(),
|
||||
model=model,
|
||||
custom_llm_provider="watsonx",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
return response
|
||||
return streamwrapper
|
||||
|
||||
async def handle_stream_request_async(
|
||||
request_params: dict,
|
||||
) -> litellm.CustomStreamWrapper:
|
||||
# stream the response - generated chunks will be handled
|
||||
# by litellm.utils.CustomStreamWrapper.handle_watsonx_stream
|
||||
async with manage_response(
|
||||
request_params, stream=True, input=prompt, timeout=timeout,
|
||||
) as resp:
|
||||
streamwrapper = litellm.CustomStreamWrapper(
|
||||
resp.aiter_lines(),
|
||||
model=model,
|
||||
custom_llm_provider="watsonx",
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
return streamwrapper
|
||||
|
||||
try:
|
||||
## Get the response from the model
|
||||
|
@ -455,10 +486,18 @@ class IBMWatsonXAI(BaseLLM):
|
|||
optional_params=optional_params,
|
||||
print_verbose=print_verbose,
|
||||
)
|
||||
if stream:
|
||||
return process_stream_request(req_params)
|
||||
if stream and acompletion:
|
||||
# stream and async text generation
|
||||
return handle_stream_request_async(req_params)
|
||||
elif stream:
|
||||
# streaming text generation
|
||||
return handle_stream_request(req_params)
|
||||
elif acompletion:
|
||||
# async text generation
|
||||
return handle_text_request_async(req_params)
|
||||
else:
|
||||
return process_text_request(req_params)
|
||||
# regular text generation
|
||||
return handle_text_request(req_params)
|
||||
except WatsonXAIError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
|
@ -473,6 +512,7 @@ class IBMWatsonXAI(BaseLLM):
|
|||
model_response=None,
|
||||
optional_params=None,
|
||||
encoding=None,
|
||||
aembedding=None,
|
||||
):
|
||||
"""
|
||||
Send a text embedding request to the IBM Watsonx.ai API.
|
||||
|
@ -507,9 +547,6 @@ class IBMWatsonXAI(BaseLLM):
|
|||
}
|
||||
request_params = dict(version=api_params["api_version"])
|
||||
url = api_params["url"].rstrip("/") + WatsonXAIEndpoint.EMBEDDINGS
|
||||
# request = httpx.Request(
|
||||
# "POST", url, headers=headers, json=payload, params=request_params
|
||||
# )
|
||||
req_params = {
|
||||
"method": "POST",
|
||||
"url": url,
|
||||
|
@ -517,25 +554,47 @@ class IBMWatsonXAI(BaseLLM):
|
|||
"json": payload,
|
||||
"params": request_params,
|
||||
}
|
||||
with self._manage_response(
|
||||
req_params, logging_obj=logging_obj, input=input
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
|
||||
results = json_resp.get("results", [])
|
||||
embedding_response = []
|
||||
for idx, result in enumerate(results):
|
||||
embedding_response.append(
|
||||
{"object": "embedding", "index": idx, "embedding": result["embedding"]}
|
||||
manage_response = self._make_response_manager(async_=(aembedding is True), logging_obj=logging_obj)
|
||||
|
||||
def process_embedding_response(json_resp: dict) -> ModelResponse:
|
||||
results = json_resp.get("results", [])
|
||||
embedding_response = []
|
||||
for idx, result in enumerate(results):
|
||||
embedding_response.append(
|
||||
{"object": "embedding", "index": idx, "embedding": result["embedding"]}
|
||||
)
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens = json_resp.get("input_token_count", 0)
|
||||
model_response.usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
model_response["object"] = "list"
|
||||
model_response["data"] = embedding_response
|
||||
model_response["model"] = model
|
||||
input_tokens = json_resp.get("input_token_count", 0)
|
||||
model_response.usage = Usage(
|
||||
prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens
|
||||
)
|
||||
return model_response
|
||||
return model_response
|
||||
|
||||
def handle_embedding_request(request_params: dict) -> ModelResponse:
|
||||
with manage_response(
|
||||
request_params, input=input
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
return process_embedding_response(json_resp)
|
||||
|
||||
async def handle_embedding_request_async(request_params: dict) -> ModelResponse:
|
||||
async with manage_response(
|
||||
request_params, input=input
|
||||
) as resp:
|
||||
json_resp = resp.json()
|
||||
return process_embedding_response(json_resp)
|
||||
|
||||
try:
|
||||
if aembedding:
|
||||
return handle_embedding_request_async(req_params)
|
||||
else:
|
||||
return handle_embedding_request(req_params)
|
||||
except WatsonXAIError as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
|
||||
def generate_iam_token(self, api_key=None, **params):
|
||||
headers = {}
|
||||
|
@ -557,53 +616,116 @@ class IBMWatsonXAI(BaseLLM):
|
|||
iam_access_token = json_data["access_token"]
|
||||
self.token = iam_access_token
|
||||
return iam_access_token
|
||||
|
||||
def _make_response_manager(
|
||||
self,
|
||||
async_: bool,
|
||||
logging_obj: Logging
|
||||
) -> Callable[..., Generator[Union[requests.Response, httpx.Response], None, None]]:
|
||||
"""
|
||||
Returns a context manager that manages the response from the request.
|
||||
if async_ is True, returns an async context manager, otherwise returns a regular context manager.
|
||||
|
||||
@contextmanager
|
||||
def _manage_response(
|
||||
self,
|
||||
request_params: dict,
|
||||
logging_obj: Any,
|
||||
stream: bool = False,
|
||||
input: Optional[Any] = None,
|
||||
timeout: Optional[float] = None,
|
||||
):
|
||||
request_str = (
|
||||
f"response = {request_params['method']}(\n"
|
||||
f"\turl={request_params['url']},\n"
|
||||
f"\tjson={request_params['json']},\n"
|
||||
f")"
|
||||
)
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
api_key=request_params["headers"].get("Authorization"),
|
||||
additional_args={
|
||||
"complete_input_dict": request_params["json"],
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
if timeout:
|
||||
request_params["timeout"] = timeout
|
||||
try:
|
||||
if stream:
|
||||
resp = requests.request(
|
||||
**request_params,
|
||||
stream=True,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
yield resp
|
||||
else:
|
||||
resp = requests.request(**request_params)
|
||||
resp.raise_for_status()
|
||||
yield resp
|
||||
except Exception as e:
|
||||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
if not stream:
|
||||
Usage:
|
||||
```python
|
||||
manage_response = self._make_response_manager(async_=True, logging_obj=logging_obj)
|
||||
async with manage_response(request_params) as resp:
|
||||
...
|
||||
# or
|
||||
manage_response = self._make_response_manager(async_=False, logging_obj=logging_obj)
|
||||
with manage_response(request_params) as resp:
|
||||
...
|
||||
```
|
||||
"""
|
||||
|
||||
def pre_call(
|
||||
request_params: dict,
|
||||
input:Optional[Any]=None,
|
||||
):
|
||||
request_str = (
|
||||
f"response = {'await ' if async_ else ''}{request_params['method']}(\n"
|
||||
f"\turl={request_params['url']},\n"
|
||||
f"\tjson={request_params['json']},\n"
|
||||
f")"
|
||||
)
|
||||
logging_obj.pre_call(
|
||||
input=input,
|
||||
api_key=request_params["headers"].get("Authorization"),
|
||||
additional_args={
|
||||
"complete_input_dict": request_params["json"],
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
|
||||
def post_call(resp, request_params):
|
||||
logging_obj.post_call(
|
||||
input=input,
|
||||
api_key=request_params["headers"].get("Authorization"),
|
||||
original_response=json.dumps(resp.json()),
|
||||
additional_args={
|
||||
"status_code": resp.status_code,
|
||||
"complete_input_dict": request_params["json"],
|
||||
"complete_input_dict": request_params.get("data", request_params.get("json")),
|
||||
},
|
||||
)
|
||||
|
||||
@contextmanager
|
||||
def _manage_response(
|
||||
request_params: dict,
|
||||
stream: bool = False,
|
||||
input: Optional[Any] = None,
|
||||
timeout: float = None,
|
||||
) -> Generator[requests.Response, None, None]:
|
||||
"""
|
||||
Returns a context manager that yields the response from the request.
|
||||
"""
|
||||
pre_call(request_params, input)
|
||||
if timeout:
|
||||
request_params["timeout"] = timeout
|
||||
if stream:
|
||||
request_params["stream"] = stream
|
||||
try:
|
||||
resp = requests.request(**request_params)
|
||||
resp.raise_for_status()
|
||||
yield resp
|
||||
except Exception as e:
|
||||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
if not stream:
|
||||
post_call(resp, request_params)
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def _manage_response_async(
|
||||
request_params: dict,
|
||||
stream: bool = False,
|
||||
input: Optional[Any] = None,
|
||||
timeout: float = None,
|
||||
) -> AsyncGenerator[httpx.Response, None]:
|
||||
pre_call(request_params, input)
|
||||
if timeout:
|
||||
request_params["timeout"] = timeout
|
||||
if stream:
|
||||
request_params["stream"] = stream
|
||||
try:
|
||||
# async with AsyncHTTPHandler(timeout=timeout) as client:
|
||||
self.async_handler = AsyncHTTPHandler(
|
||||
timeout=httpx.Timeout(timeout=request_params.pop("timeout", 600.0), connect=5.0),
|
||||
)
|
||||
# async_handler.client.verify = False
|
||||
if "json" in request_params:
|
||||
request_params['data'] = json.dumps(request_params.pop("json", {}))
|
||||
method = request_params.pop("method")
|
||||
if method.upper() == "POST":
|
||||
resp = await self.async_handler.post(**request_params)
|
||||
else:
|
||||
resp = await self.async_handler.get(**request_params)
|
||||
yield resp
|
||||
# await async_handler.close()
|
||||
except Exception as e:
|
||||
raise WatsonXAIError(status_code=500, message=str(e))
|
||||
if not stream:
|
||||
post_call(resp, request_params)
|
||||
|
||||
if async_:
|
||||
return _manage_response_async
|
||||
else:
|
||||
return _manage_response
|
||||
|
|
|
@ -73,6 +73,7 @@ from .llms.azure_text import AzureTextCompletion
|
|||
from .llms.anthropic import AnthropicChatCompletion
|
||||
from .llms.anthropic_text import AnthropicTextCompletion
|
||||
from .llms.huggingface_restapi import Huggingface
|
||||
from .llms.watsonx import IBMWatsonXAI
|
||||
from .llms.prompt_templates.factory import (
|
||||
prompt_factory,
|
||||
custom_prompt,
|
||||
|
@ -109,6 +110,7 @@ anthropic_text_completions = AnthropicTextCompletion()
|
|||
azure_chat_completions = AzureChatCompletion()
|
||||
azure_text_completions = AzureTextCompletion()
|
||||
huggingface = Huggingface()
|
||||
watsonxai = IBMWatsonXAI()
|
||||
####### COMPLETION ENDPOINTS ################
|
||||
|
||||
|
||||
|
@ -313,6 +315,7 @@ async def acompletion(
|
|||
or custom_llm_provider == "gemini"
|
||||
or custom_llm_provider == "sagemaker"
|
||||
or custom_llm_provider == "anthropic"
|
||||
or custom_llm_provider == "watsonx"
|
||||
or custom_llm_provider in litellm.openai_compatible_providers
|
||||
): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
|
@ -1908,7 +1911,7 @@ def completion(
|
|||
response = response
|
||||
elif custom_llm_provider == "watsonx":
|
||||
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict
|
||||
response = watsonx.IBMWatsonXAI().completion(
|
||||
response = watsonxai.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
custom_prompt_dict=custom_prompt_dict,
|
||||
|
@ -1919,7 +1922,8 @@ def completion(
|
|||
logger_fn=logger_fn,
|
||||
encoding=encoding,
|
||||
logging_obj=logging,
|
||||
timeout=timeout, # type: ignore
|
||||
acompletion=acompletion,
|
||||
timeout=timeout,
|
||||
)
|
||||
if (
|
||||
"stream" in optional_params
|
||||
|
@ -2572,6 +2576,7 @@ async def aembedding(*args, **kwargs):
|
|||
or custom_llm_provider == "fireworks_ai"
|
||||
or custom_llm_provider == "ollama"
|
||||
or custom_llm_provider == "vertex_ai"
|
||||
or custom_llm_provider == "watsonx"
|
||||
): # currently implemented aiohttp calls for just azure and openai, soon all.
|
||||
# Await normally
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
|
@ -3029,13 +3034,14 @@ def embedding(
|
|||
aembedding=aembedding,
|
||||
)
|
||||
elif custom_llm_provider == "watsonx":
|
||||
response = watsonx.IBMWatsonXAI().embedding(
|
||||
response = watsonxai.embedding(
|
||||
model=model,
|
||||
input=input,
|
||||
encoding=encoding,
|
||||
logging_obj=logging,
|
||||
optional_params=optional_params,
|
||||
model_response=EmbeddingResponse(),
|
||||
aembedding=aembedding,
|
||||
)
|
||||
else:
|
||||
args = locals()
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +0,0 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{11837:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_12bbc4', '__Inter_Fallback_12bbc4'",fontStyle:"normal"},className:"__className_12bbc4"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=11837)}),_N_E=n.O()}]);
|
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{70377:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(70377)}),_N_E=e.O()}]);
|
||||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -1 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/7de0c97d470f519f.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
||||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""]
|
||||
3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -515,6 +515,8 @@ class NewTeamRequest(TeamBase):
|
|||
|
||||
class GlobalEndUsersSpend(LiteLLMBase):
|
||||
api_key: Optional[str] = None
|
||||
startTime: Optional[datetime] = None
|
||||
endTime: Optional[datetime] = None
|
||||
|
||||
|
||||
class TeamMemberAddRequest(LiteLLMBase):
|
||||
|
|
|
@ -252,7 +252,7 @@ def run_server(
|
|||
if model and "ollama" in model and api_base is None:
|
||||
run_ollama_serve()
|
||||
if test_async is True:
|
||||
import requests, concurrent, time
|
||||
import requests, concurrent, time # type: ignore
|
||||
|
||||
api_base = f"http://{host}:{port}"
|
||||
|
||||
|
@ -418,7 +418,7 @@ def run_server(
|
|||
read from there and save it to os.env['DATABASE_URL']
|
||||
"""
|
||||
try:
|
||||
import yaml, asyncio
|
||||
import yaml, asyncio # type: ignore
|
||||
except:
|
||||
raise ImportError(
|
||||
"yaml needs to be imported. Run - `pip install 'litellm[proxy]'`"
|
||||
|
|
|
@ -30,7 +30,7 @@ sys.path.insert(
|
|||
try:
|
||||
import fastapi
|
||||
import backoff
|
||||
import yaml
|
||||
import yaml # type: ignore
|
||||
import orjson
|
||||
import logging
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
|
@ -3731,6 +3731,7 @@ async def chat_completion(
|
|||
"x-litellm-model-id": model_id,
|
||||
"x-litellm-cache-key": cache_key,
|
||||
"x-litellm-model-api-base": api_base,
|
||||
"x-litellm-version": version,
|
||||
}
|
||||
selected_data_generator = select_data_generator(
|
||||
response=response,
|
||||
|
@ -3746,6 +3747,7 @@ async def chat_completion(
|
|||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
### CALL HOOKS ### - modify outgoing data
|
||||
response = await proxy_logging_obj.post_call_success_hook(
|
||||
|
@ -3902,14 +3904,10 @@ async def completion(
|
|||
},
|
||||
)
|
||||
|
||||
if hasattr(response, "_hidden_params"):
|
||||
model_id = response._hidden_params.get("model_id", None) or ""
|
||||
original_response = (
|
||||
response._hidden_params.get("original_response", None) or ""
|
||||
)
|
||||
else:
|
||||
model_id = ""
|
||||
original_response = ""
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
verbose_proxy_logger.debug("final response: %s", response)
|
||||
if (
|
||||
|
@ -3917,6 +3915,9 @@ async def completion(
|
|||
): # use generate_responses to stream responses
|
||||
custom_headers = {
|
||||
"x-litellm-model-id": model_id,
|
||||
"x-litellm-cache-key": cache_key,
|
||||
"x-litellm-model-api-base": api_base,
|
||||
"x-litellm-version": version,
|
||||
}
|
||||
selected_data_generator = select_data_generator(
|
||||
response=response,
|
||||
|
@ -3931,6 +3932,10 @@ async def completion(
|
|||
)
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -3970,6 +3975,7 @@ async def completion(
|
|||
) # azure compatible endpoint
|
||||
async def embeddings(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
model: Optional[str] = None,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
|
@ -4116,6 +4122,17 @@ async def embeddings(
|
|||
### ALERTING ###
|
||||
data["litellm_status"] = "success" # used for alerting
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -4154,6 +4171,7 @@ async def embeddings(
|
|||
)
|
||||
async def image_generation(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
global proxy_logging_obj
|
||||
|
@ -4273,6 +4291,17 @@ async def image_generation(
|
|||
### ALERTING ###
|
||||
data["litellm_status"] = "success" # used for alerting
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -4309,6 +4338,7 @@ async def image_generation(
|
|||
)
|
||||
async def audio_transcriptions(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
file: UploadFile = File(...),
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
|
@ -4453,6 +4483,18 @@ async def audio_transcriptions(
|
|||
|
||||
### ALERTING ###
|
||||
data["litellm_status"] = "success" # used for alerting
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -4492,6 +4534,7 @@ async def audio_transcriptions(
|
|||
)
|
||||
async def moderations(
|
||||
request: Request,
|
||||
fastapi_response: Response,
|
||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||
):
|
||||
"""
|
||||
|
@ -4616,6 +4659,17 @@ async def moderations(
|
|||
### ALERTING ###
|
||||
data["litellm_status"] = "success" # used for alerting
|
||||
|
||||
### RESPONSE HEADERS ###
|
||||
hidden_params = getattr(response, "_hidden_params", {}) or {}
|
||||
model_id = hidden_params.get("model_id", None) or ""
|
||||
cache_key = hidden_params.get("cache_key", None) or ""
|
||||
api_base = hidden_params.get("api_base", None) or ""
|
||||
|
||||
fastapi_response.headers["x-litellm-model-id"] = model_id
|
||||
fastapi_response.headers["x-litellm-cache-key"] = cache_key
|
||||
fastapi_response.headers["x-litellm-model-api-base"] = api_base
|
||||
fastapi_response.headers["x-litellm-version"] = version
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
data["litellm_status"] = "fail" # used for alerting
|
||||
|
@ -5821,35 +5875,38 @@ async def global_spend_end_users(data: Optional[GlobalEndUsersSpend] = None):
|
|||
if prisma_client is None:
|
||||
raise HTTPException(status_code=500, detail={"error": "No db connected"})
|
||||
|
||||
if data is None:
|
||||
sql_query = f"""SELECT * FROM "Last30dTopEndUsersSpend";"""
|
||||
"""
|
||||
Gets the top 100 end-users for a given api key
|
||||
"""
|
||||
startTime = None
|
||||
endTime = None
|
||||
selected_api_key = None
|
||||
if data is not None:
|
||||
startTime = data.startTime
|
||||
endTime = data.endTime
|
||||
selected_api_key = data.api_key
|
||||
|
||||
response = await prisma_client.db.query_raw(query=sql_query)
|
||||
else:
|
||||
"""
|
||||
Gets the top 100 end-users for a given api key
|
||||
"""
|
||||
current_date = datetime.now()
|
||||
past_date = current_date - timedelta(days=30)
|
||||
response = await prisma_client.db.litellm_spendlogs.group_by( # type: ignore
|
||||
by=["end_user"],
|
||||
where={
|
||||
"AND": [{"startTime": {"gte": past_date}}, {"api_key": data.api_key}] # type: ignore
|
||||
},
|
||||
sum={"spend": True},
|
||||
order={"_sum": {"spend": "desc"}}, # type: ignore
|
||||
take=100,
|
||||
count=True,
|
||||
)
|
||||
if response is not None and isinstance(response, list):
|
||||
new_response = []
|
||||
for r in response:
|
||||
new_r = r
|
||||
new_r["total_spend"] = r["_sum"]["spend"]
|
||||
new_r["total_count"] = r["_count"]["_all"]
|
||||
new_r.pop("_sum")
|
||||
new_r.pop("_count")
|
||||
new_response.append(new_r)
|
||||
startTime = startTime or datetime.now() - timedelta(days=30)
|
||||
endTime = endTime or datetime.now()
|
||||
|
||||
sql_query = """
|
||||
SELECT end_user, COUNT(*) AS total_count, SUM(spend) AS total_spend
|
||||
FROM "LiteLLM_SpendLogs"
|
||||
WHERE "startTime" >= $1::timestamp
|
||||
AND "startTime" < $2::timestamp
|
||||
AND (
|
||||
CASE
|
||||
WHEN $3::TEXT IS NULL THEN TRUE
|
||||
ELSE api_key = $3
|
||||
END
|
||||
)
|
||||
GROUP BY end_user
|
||||
ORDER BY total_spend DESC
|
||||
LIMIT 100
|
||||
"""
|
||||
response = await prisma_client.db.query_raw(
|
||||
sql_query, startTime, endTime, selected_api_key
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
|
|
@ -1689,12 +1689,12 @@ def get_instance_fn(value: str, config_file_path: Optional[str] = None) -> Any:
|
|||
module_file_path = os.path.join(directory, *module_name.split("."))
|
||||
module_file_path += ".py"
|
||||
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_file_path)
|
||||
spec = importlib.util.spec_from_file_location(module_name, module_file_path) # type: ignore
|
||||
if spec is None:
|
||||
raise ImportError(
|
||||
f"Could not find a module specification for {module_file_path}"
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
module = importlib.util.module_from_spec(spec) # type: ignore
|
||||
spec.loader.exec_module(module) # type: ignore
|
||||
else:
|
||||
# Dynamically import the module
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
# - use litellm.success + failure callbacks to log when a request completed
|
||||
# - in get_available_deployment, for a given model group name -> pick based on traffic
|
||||
|
||||
import dotenv, os, requests, random
|
||||
import dotenv, os, requests, random # type: ignore
|
||||
from typing import Optional
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#### What this does ####
|
||||
# picks based on response time (for streaming, this is time to first token)
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
import dotenv, os, requests, random
|
||||
import dotenv, os, requests, random # type: ignore
|
||||
from typing import Optional, Union, List, Dict
|
||||
from datetime import datetime, timedelta
|
||||
import random
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#### What this does ####
|
||||
# picks based on response time (for streaming, this is time to first token)
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
import dotenv, os, requests, random
|
||||
from pydantic import BaseModel, Extra, Field, root_validator # type: ignore
|
||||
import dotenv, os, requests, random # type: ignore
|
||||
from typing import Optional, Union, List, Dict
|
||||
from datetime import datetime, timedelta
|
||||
import random
|
||||
|
|
|
@ -231,14 +231,17 @@ def test_cost_bedrock_pricing():
|
|||
assert cost == predicted_cost
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="AWS disabled our access")
|
||||
def test_cost_bedrock_pricing_actual_calls():
|
||||
litellm.set_verbose = True
|
||||
model = "anthropic.claude-instant-v1"
|
||||
messages = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
response = litellm.completion(model=model, messages=messages)
|
||||
assert response._hidden_params["region_name"] is not None
|
||||
response = litellm.completion(
|
||||
model=model, messages=messages, mock_response="hello cool one"
|
||||
)
|
||||
|
||||
print("response", response)
|
||||
cost = litellm.completion_cost(
|
||||
model="bedrock/anthropic.claude-instant-v1",
|
||||
completion_response=response,
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
)
|
||||
|
|
|
@ -14,7 +14,7 @@ import subprocess, os
|
|||
from os.path import abspath, join, dirname
|
||||
import litellm, openai
|
||||
import itertools
|
||||
import random, uuid, requests
|
||||
import random, uuid, requests # type: ignore
|
||||
from functools import wraps
|
||||
import datetime, time
|
||||
import tiktoken
|
||||
|
@ -36,7 +36,7 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata
|
|||
|
||||
try:
|
||||
# this works in python 3.8
|
||||
import pkg_resources
|
||||
import pkg_resources # type: ignore
|
||||
|
||||
filename = pkg_resources.resource_filename(__name__, "llms/tokenizers")
|
||||
# try:
|
||||
|
@ -4161,8 +4161,30 @@ def cost_per_token(
|
|||
model_with_provider_and_region in model_cost_ref
|
||||
): # use region based pricing, if it's available
|
||||
model_with_provider = model_with_provider_and_region
|
||||
if model_with_provider in model_cost_ref:
|
||||
|
||||
model_without_prefix = model
|
||||
model_parts = model.split("/")
|
||||
if len(model_parts) > 1:
|
||||
model_without_prefix = model_parts[1]
|
||||
else:
|
||||
model_without_prefix = model
|
||||
"""
|
||||
Code block that formats model to lookup in litellm.model_cost
|
||||
Option1. model = "bedrock/ap-northeast-1/anthropic.claude-instant-v1". This is the most accurate since it is region based. Should always be option 1
|
||||
Option2. model = "openai/gpt-4" - model = provider/model
|
||||
Option3. model = "anthropic.claude-3" - model = model
|
||||
"""
|
||||
if (
|
||||
model_with_provider in model_cost_ref
|
||||
): # Option 2. use model with provider, model = "openai/gpt-4"
|
||||
model = model_with_provider
|
||||
elif model in model_cost_ref: # Option 1. use model passed, model="gpt-4"
|
||||
model = model
|
||||
elif (
|
||||
model_without_prefix in model_cost_ref
|
||||
): # Option 3. if user passed model="bedrock/anthropic.claude-3", use model="anthropic.claude-3"
|
||||
model = model_without_prefix
|
||||
|
||||
# see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
|
||||
print_verbose(f"Looking up model={model} in model_cost_map")
|
||||
if model in model_cost_ref:
|
||||
|
@ -7766,11 +7788,11 @@ def _calculate_retry_after(
|
|||
try:
|
||||
retry_after = int(retry_header)
|
||||
except Exception:
|
||||
retry_date_tuple = email.utils.parsedate_tz(retry_header)
|
||||
retry_date_tuple = email.utils.parsedate_tz(retry_header) # type: ignore
|
||||
if retry_date_tuple is None:
|
||||
retry_after = -1
|
||||
else:
|
||||
retry_date = email.utils.mktime_tz(retry_date_tuple)
|
||||
retry_date = email.utils.mktime_tz(retry_date_tuple) # type: ignore
|
||||
retry_after = int(retry_date - time.time())
|
||||
else:
|
||||
retry_after = -1
|
||||
|
@ -10545,6 +10567,18 @@ class CustomStreamWrapper:
|
|||
elif self.custom_llm_provider == "watsonx":
|
||||
response_obj = self.handle_watsonx_stream(chunk)
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||
if getattr(model_response, "usage", None) is None:
|
||||
model_response.usage = Usage()
|
||||
if response_obj.get("prompt_tokens") is not None:
|
||||
prompt_token_count = getattr(model_response.usage, "prompt_tokens", 0)
|
||||
model_response.usage.prompt_tokens = (prompt_token_count+response_obj["prompt_tokens"])
|
||||
if response_obj.get("completion_tokens") is not None:
|
||||
model_response.usage.completion_tokens = response_obj["completion_tokens"]
|
||||
model_response.usage.total_tokens = (
|
||||
getattr(model_response.usage, "prompt_tokens", 0)
|
||||
+ getattr(model_response.usage, "completion_tokens", 0)
|
||||
)
|
||||
if response_obj["is_finished"]:
|
||||
self.received_finish_reason = response_obj["finish_reason"]
|
||||
elif self.custom_llm_provider == "text-completion-openai":
|
||||
|
@ -10949,6 +10983,7 @@ class CustomStreamWrapper:
|
|||
or self.custom_llm_provider == "sagemaker"
|
||||
or self.custom_llm_provider == "gemini"
|
||||
or self.custom_llm_provider == "cached_response"
|
||||
or self.custom_llm_provider == "watsonx"
|
||||
or self.custom_llm_provider in litellm.openai_compatible_endpoints
|
||||
):
|
||||
async for chunk in self.completion_stream:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.36.3"
|
||||
version = "1.36.4"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -80,7 +80,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.36.3"
|
||||
version = "1.36.4"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
self.__BUILD_MANIFEST={__rewrites:{afterFiles:[],beforeFiles:[],fallback:[]},"/_error":["static/chunks/pages/_error-d6107f1aac0c574c.js"],sortedPages:["/_app","/_error"]},self.__BUILD_MANIFEST_CB&&self.__BUILD_MANIFEST_CB();
|
|
@ -0,0 +1 @@
|
|||
self.__SSG_MANIFEST=new Set([]);self.__SSG_MANIFEST_CB&&self.__SSG_MANIFEST_CB()
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[185],{87421:function(n,e,t){Promise.resolve().then(t.t.bind(t,99646,23)),Promise.resolve().then(t.t.bind(t,63385,23))},63385:function(){},99646:function(n){n.exports={style:{fontFamily:"'__Inter_c23dc8', '__Inter_Fallback_c23dc8'",fontStyle:"normal"},className:"__className_c23dc8"}}},function(n){n.O(0,[971,69,744],function(){return n(n.s=87421)}),_N_E=n.O()}]);
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
(self.webpackChunk_N_E=self.webpackChunk_N_E||[]).push([[744],{32028:function(e,n,t){Promise.resolve().then(t.t.bind(t,47690,23)),Promise.resolve().then(t.t.bind(t,48955,23)),Promise.resolve().then(t.t.bind(t,5613,23)),Promise.resolve().then(t.t.bind(t,11902,23)),Promise.resolve().then(t.t.bind(t,31778,23)),Promise.resolve().then(t.t.bind(t,77831,23))}},function(e){var n=function(n){return e(e.s=n)};e.O(0,[971,69],function(){return n(35317),n(32028)}),_N_E=e.O()}]);
|
|
@ -0,0 +1 @@
|
|||
!function(){"use strict";var e,t,n,r,o,u,i,c,f,a={},l={};function d(e){var t=l[e];if(void 0!==t)return t.exports;var n=l[e]={id:e,loaded:!1,exports:{}},r=!0;try{a[e](n,n.exports,d),r=!1}finally{r&&delete l[e]}return n.loaded=!0,n.exports}d.m=a,e=[],d.O=function(t,n,r,o){if(n){o=o||0;for(var u=e.length;u>0&&e[u-1][2]>o;u--)e[u]=e[u-1];e[u]=[n,r,o];return}for(var i=1/0,u=0;u<e.length;u++){for(var n=e[u][0],r=e[u][1],o=e[u][2],c=!0,f=0;f<n.length;f++)i>=o&&Object.keys(d.O).every(function(e){return d.O[e](n[f])})?n.splice(f--,1):(c=!1,o<i&&(i=o));if(c){e.splice(u--,1);var a=r();void 0!==a&&(t=a)}}return t},d.n=function(e){var t=e&&e.__esModule?function(){return e.default}:function(){return e};return d.d(t,{a:t}),t},n=Object.getPrototypeOf?function(e){return Object.getPrototypeOf(e)}:function(e){return e.__proto__},d.t=function(e,r){if(1&r&&(e=this(e)),8&r||"object"==typeof e&&e&&(4&r&&e.__esModule||16&r&&"function"==typeof e.then))return e;var o=Object.create(null);d.r(o);var u={};t=t||[null,n({}),n([]),n(n)];for(var i=2&r&&e;"object"==typeof i&&!~t.indexOf(i);i=n(i))Object.getOwnPropertyNames(i).forEach(function(t){u[t]=function(){return e[t]}});return u.default=function(){return e},d.d(o,u),o},d.d=function(e,t){for(var n in t)d.o(t,n)&&!d.o(e,n)&&Object.defineProperty(e,n,{enumerable:!0,get:t[n]})},d.f={},d.e=function(e){return Promise.all(Object.keys(d.f).reduce(function(t,n){return d.f[n](e,t),t},[]))},d.u=function(e){},d.miniCssF=function(e){return"static/css/a1602eb39f799143.css"},d.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||Function("return this")()}catch(e){if("object"==typeof window)return window}}(),d.o=function(e,t){return Object.prototype.hasOwnProperty.call(e,t)},r={},o="_N_E:",d.l=function(e,t,n,u){if(r[e]){r[e].push(t);return}if(void 0!==n)for(var i,c,f=document.getElementsByTagName("script"),a=0;a<f.length;a++){var l=f[a];if(l.getAttribute("src")==e||l.getAttribute("data-webpack")==o+n){i=l;break}}i||(c=!0,(i=document.createElement("script")).charset="utf-8",i.timeout=120,d.nc&&i.setAttribute("nonce",d.nc),i.setAttribute("data-webpack",o+n),i.src=d.tu(e)),r[e]=[t];var s=function(t,n){i.onerror=i.onload=null,clearTimeout(p);var o=r[e];if(delete r[e],i.parentNode&&i.parentNode.removeChild(i),o&&o.forEach(function(e){return e(n)}),t)return t(n)},p=setTimeout(s.bind(null,void 0,{type:"timeout",target:i}),12e4);i.onerror=s.bind(null,i.onerror),i.onload=s.bind(null,i.onload),c&&document.head.appendChild(i)},d.r=function(e){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},d.nmd=function(e){return e.paths=[],e.children||(e.children=[]),e},d.tt=function(){return void 0===u&&(u={createScriptURL:function(e){return e}},"undefined"!=typeof trustedTypes&&trustedTypes.createPolicy&&(u=trustedTypes.createPolicy("nextjs#bundler",u))),u},d.tu=function(e){return d.tt().createScriptURL(e)},d.p="/ui/_next/",i={272:0},d.f.j=function(e,t){var n=d.o(i,e)?i[e]:void 0;if(0!==n){if(n)t.push(n[2]);else if(272!=e){var r=new Promise(function(t,r){n=i[e]=[t,r]});t.push(n[2]=r);var o=d.p+d.u(e),u=Error();d.l(o,function(t){if(d.o(i,e)&&(0!==(n=i[e])&&(i[e]=void 0),n)){var r=t&&("load"===t.type?"missing":t.type),o=t&&t.target&&t.target.src;u.message="Loading chunk "+e+" failed.\n("+r+": "+o+")",u.name="ChunkLoadError",u.type=r,u.request=o,n[1](u)}},"chunk-"+e,e)}else i[e]=0}},d.O.j=function(e){return 0===i[e]},c=function(e,t){var n,r,o=t[0],u=t[1],c=t[2],f=0;if(o.some(function(e){return 0!==i[e]})){for(n in u)d.o(u,n)&&(d.m[n]=u[n]);if(c)var a=c(d)}for(e&&e(t);f<o.length;f++)r=o[f],d.o(i,r)&&i[r]&&i[r][0](),i[r]=0;return d.O(a)},(f=self.webpackChunk_N_E=self.webpackChunk_N_E||[]).forEach(c.bind(null,0)),f.push=c.bind(null,f.push.bind(f))}();
|
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-096338c8e1915716.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-d85d62a2bbfac48f.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/7de0c97d470f519f.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[18889,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"319\",\"static/chunks/319-4467f3d35ad11cf1.js\",\"931\",\"static/chunks/app/page-f32196ae7cd3d914.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/7de0c97d470f519f.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"OcLXYgLcgQyjMd6bH1bqU\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-dafd44dfa2da140c.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e49705773ae41779.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-5b257e1ab47d4b4a.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/a1602eb39f799143.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[25539,[\"936\",\"static/chunks/2f6dbc85-17d29013b8ff3da5.js\",\"566\",\"static/chunks/566-ccd699ab19124658.js\",\"931\",\"static/chunks/app/page-c804e862b63be987.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/a1602eb39f799143.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"K8KXTbmuI2ArWjjdMi2iq\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[18889,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","319","static/chunks/319-4467f3d35ad11cf1.js","931","static/chunks/app/page-f32196ae7cd3d914.js"],""]
|
||||
3:I[25539,["936","static/chunks/2f6dbc85-17d29013b8ff3da5.js","566","static/chunks/566-ccd699ab19124658.js","931","static/chunks/app/page-c804e862b63be987.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["OcLXYgLcgQyjMd6bH1bqU",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/7de0c97d470f519f.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["K8KXTbmuI2ArWjjdMi2iq",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/a1602eb39f799143.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -189,6 +189,7 @@ const CreateKeyPage = () => {
|
|||
userRole={userRole}
|
||||
token={token}
|
||||
accessToken={accessToken}
|
||||
keys={keys}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
|
|
@ -786,7 +786,9 @@ export const adminTopKeysCall = async (accessToken: String) => {
|
|||
|
||||
export const adminTopEndUsersCall = async (
|
||||
accessToken: String,
|
||||
keyToken: String | null
|
||||
keyToken: String | null,
|
||||
startTime: String | undefined,
|
||||
endTime: String | undefined
|
||||
) => {
|
||||
try {
|
||||
let url = proxyBaseUrl
|
||||
|
@ -795,8 +797,11 @@ export const adminTopEndUsersCall = async (
|
|||
|
||||
let body = "";
|
||||
if (keyToken) {
|
||||
body = JSON.stringify({ api_key: keyToken });
|
||||
body = JSON.stringify({ api_key: keyToken, startTime: startTime, endTime: endTime });
|
||||
} else {
|
||||
body = JSON.stringify({ startTime: startTime, endTime: endTime });
|
||||
}
|
||||
|
||||
//message.info("Making top end users request");
|
||||
|
||||
// Define requestOptions with body as an optional property
|
||||
|
@ -815,9 +820,7 @@ export const adminTopEndUsersCall = async (
|
|||
},
|
||||
};
|
||||
|
||||
if (keyToken) {
|
||||
requestOptions.body = JSON.stringify({ api_key: keyToken });
|
||||
}
|
||||
requestOptions.body = body;
|
||||
|
||||
const response = await fetch(url, requestOptions);
|
||||
if (!response.ok) {
|
||||
|
|
|
@ -3,13 +3,14 @@ import { BarChart, BarList, Card, Title, Table, TableHead, TableHeaderCell, Tabl
|
|||
import React, { useState, useEffect } from "react";
|
||||
|
||||
import ViewUserSpend from "./view_user_spend";
|
||||
import { Grid, Col, Text, LineChart, TabPanel, TabPanels, TabGroup, TabList, Tab, Select, SelectItem } from "@tremor/react";
|
||||
import { Grid, Col, Text, LineChart, TabPanel, TabPanels, TabGroup, TabList, Tab, Select, SelectItem, DateRangePicker, DateRangePickerValue } from "@tremor/react";
|
||||
import {
|
||||
userSpendLogsCall,
|
||||
keyInfoCall,
|
||||
adminSpendLogsCall,
|
||||
adminTopKeysCall,
|
||||
adminTopModelsCall,
|
||||
adminTopEndUsersCall,
|
||||
teamSpendLogsCall,
|
||||
tagsSpendLogsCall,
|
||||
modelMetricsCall,
|
||||
|
@ -23,6 +24,7 @@ interface UsagePageProps {
|
|||
token: string | null;
|
||||
userRole: string | null;
|
||||
userID: string | null;
|
||||
keys: any[] | null;
|
||||
}
|
||||
|
||||
type CustomTooltipTypeBar = {
|
||||
|
@ -95,47 +97,14 @@ function getTopKeys(data: Array<{ [key: string]: unknown }>): any[] {
|
|||
}
|
||||
type DataDict = { [key: string]: unknown };
|
||||
type UserData = { user_id: string; spend: number };
|
||||
function getTopUsers(data: Array<DataDict>): UserData[] {
|
||||
const userSpend: { [key: string]: number } = {};
|
||||
|
||||
data.forEach((dict) => {
|
||||
const payload: DataDict = dict["users"] as DataDict;
|
||||
Object.entries(payload).forEach(([user_id, value]) => {
|
||||
if (
|
||||
user_id === "" ||
|
||||
user_id === undefined ||
|
||||
user_id === null ||
|
||||
user_id == "None"
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!userSpend[user_id]) {
|
||||
userSpend[user_id] = 0;
|
||||
}
|
||||
userSpend[user_id] += value as number;
|
||||
});
|
||||
});
|
||||
|
||||
const spendUsers: UserData[] = Object.entries(userSpend).map(
|
||||
([user_id, spend]) => ({
|
||||
user_id,
|
||||
spend,
|
||||
})
|
||||
);
|
||||
|
||||
spendUsers.sort((a, b) => b.spend - a.spend);
|
||||
|
||||
const topKeys = spendUsers.slice(0, 5);
|
||||
console.log(`topKeys: ${Object.values(topKeys[0])}`);
|
||||
return topKeys;
|
||||
}
|
||||
|
||||
const UsagePage: React.FC<UsagePageProps> = ({
|
||||
accessToken,
|
||||
token,
|
||||
userRole,
|
||||
userID,
|
||||
keys,
|
||||
}) => {
|
||||
const currentDate = new Date();
|
||||
const [keySpendData, setKeySpendData] = useState<any[]>([]);
|
||||
|
@ -146,6 +115,11 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
const [topTagsData, setTopTagsData] = useState<any[]>([]);
|
||||
const [uniqueTeamIds, setUniqueTeamIds] = useState<any[]>([]);
|
||||
const [totalSpendPerTeam, setTotalSpendPerTeam] = useState<any[]>([]);
|
||||
const [selectedKeyID, setSelectedKeyID] = useState<string | null>("");
|
||||
const [dateValue, setDateValue] = useState<DateRangePickerValue>({
|
||||
from: new Date(Date.now() - 7 * 24 * 60 * 60 * 1000),
|
||||
to: new Date(),
|
||||
});
|
||||
|
||||
const firstDay = new Date(
|
||||
currentDate.getFullYear(),
|
||||
|
@ -161,6 +135,26 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
let startTime = formatDate(firstDay);
|
||||
let endTime = formatDate(lastDay);
|
||||
|
||||
console.log("keys in usage", keys);
|
||||
|
||||
const updateEndUserData = async (startTime: Date | undefined, endTime: Date | undefined, uiSelectedKey: string | null) => {
|
||||
if (!startTime || !endTime || !accessToken) {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("uiSelectedKey", uiSelectedKey);
|
||||
|
||||
let newTopUserData = await adminTopEndUsersCall(
|
||||
accessToken,
|
||||
uiSelectedKey,
|
||||
startTime.toISOString(),
|
||||
endTime.toISOString()
|
||||
)
|
||||
console.log("End user data updated successfully", newTopUserData);
|
||||
setTopUsers(newTopUserData);
|
||||
|
||||
}
|
||||
|
||||
function formatDate(date: Date) {
|
||||
const year = date.getFullYear();
|
||||
let month = date.getMonth() + 1; // JS month index starts from 0
|
||||
|
@ -227,6 +221,12 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
const top_tags = await tagsSpendLogsCall(accessToken);
|
||||
setTopTagsData(top_tags.top_10_tags);
|
||||
|
||||
// get spend per end-user
|
||||
let spend_user_call = await adminTopEndUsersCall(accessToken, null, undefined, undefined);
|
||||
setTopUsers(spend_user_call);
|
||||
|
||||
console.log("spend/user result", spend_user_call);
|
||||
|
||||
} else if (userRole == "App Owner") {
|
||||
await userSpendLogsCall(
|
||||
accessToken,
|
||||
|
@ -258,7 +258,6 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
spend: k["spend"],
|
||||
}));
|
||||
setTopKeys(filtered_keys);
|
||||
setTopUsers(getTopUsers(response));
|
||||
setKeySpendData(response);
|
||||
}
|
||||
});
|
||||
|
@ -286,6 +285,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
<TabList className="mt-2">
|
||||
<Tab>All Up</Tab>
|
||||
<Tab>Team Based Usage</Tab>
|
||||
<Tab>End User Usage</Tab>
|
||||
<Tab>Tag Based Usage</Tab>
|
||||
</TabList>
|
||||
<TabPanels>
|
||||
|
@ -324,23 +324,7 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
</Card>
|
||||
</Col>
|
||||
<Col numColSpan={1}>
|
||||
<Card>
|
||||
<Title>Top Users</Title>
|
||||
<BarChart
|
||||
className="mt-4 h-40"
|
||||
data={topUsers}
|
||||
index="user_id"
|
||||
categories={["spend"]}
|
||||
colors={["blue"]}
|
||||
yAxisWidth={200}
|
||||
layout="vertical"
|
||||
showXAxis={false}
|
||||
showLegend={false}
|
||||
/>
|
||||
</Card>
|
||||
</Col>
|
||||
<Col numColSpan={1}>
|
||||
<Card>
|
||||
<Card>
|
||||
<Title>Top Models</Title>
|
||||
<BarChart
|
||||
className="mt-4 h-40"
|
||||
|
@ -354,6 +338,10 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
showLegend={false}
|
||||
/>
|
||||
</Card>
|
||||
|
||||
</Col>
|
||||
<Col numColSpan={1}>
|
||||
|
||||
</Col>
|
||||
</Grid>
|
||||
</TabPanel>
|
||||
|
@ -385,6 +373,88 @@ const UsagePage: React.FC<UsagePageProps> = ({
|
|||
<Col numColSpan={2}>
|
||||
</Col>
|
||||
</Grid>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<p className="mb-2 text-gray-500 italic text-[12px]">End-Users of your LLM API calls. Tracked when a `user` param is passed in your LLM calls <a className="text-blue-500" href="https://docs.litellm.ai/docs/proxy/users" target="_blank">docs here</a></p>
|
||||
<Grid numItems={2}>
|
||||
<Col>
|
||||
<Text>Select Time Range</Text>
|
||||
|
||||
<DateRangePicker
|
||||
enableSelect={true}
|
||||
value={dateValue}
|
||||
onValueChange={(value) => {
|
||||
setDateValue(value);
|
||||
updateEndUserData(value.from, value.to, null); // Call updateModelMetrics with the new date range
|
||||
}}
|
||||
/>
|
||||
</Col>
|
||||
<Col>
|
||||
<Text>Select Key</Text>
|
||||
<Select defaultValue="all-keys">
|
||||
<SelectItem
|
||||
key="all-keys"
|
||||
value="all-keys"
|
||||
onClick={() => {
|
||||
updateEndUserData(dateValue.from, dateValue.to, null);
|
||||
}}
|
||||
>
|
||||
All Keys
|
||||
</SelectItem>
|
||||
{keys?.map((key: any, index: number) => {
|
||||
if (
|
||||
key &&
|
||||
key["key_alias"] !== null &&
|
||||
key["key_alias"].length > 0
|
||||
) {
|
||||
return (
|
||||
|
||||
<SelectItem
|
||||
key={index}
|
||||
value={String(index)}
|
||||
onClick={() => {
|
||||
updateEndUserData(dateValue.from, dateValue.to, key["token"]);
|
||||
}}
|
||||
>
|
||||
{key["key_alias"]}
|
||||
</SelectItem>
|
||||
);
|
||||
}
|
||||
return null; // Add this line to handle the case when the condition is not met
|
||||
})}
|
||||
</Select>
|
||||
</Col>
|
||||
|
||||
</Grid>
|
||||
|
||||
|
||||
|
||||
<Card className="mt-4">
|
||||
|
||||
|
||||
|
||||
<Table className="max-h-[70vh] min-h-[500px]">
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableHeaderCell>End User</TableHeaderCell>
|
||||
<TableHeaderCell>Spend</TableHeaderCell>
|
||||
<TableHeaderCell>Total Events</TableHeaderCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
|
||||
<TableBody>
|
||||
{topUsers?.map((user: any, index: number) => (
|
||||
<TableRow key={index}>
|
||||
<TableCell>{user.end_user}</TableCell>
|
||||
<TableCell>{user.total_spend?.toFixed(4)}</TableCell>
|
||||
<TableCell>{user.total_count}</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
|
||||
</Card>
|
||||
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<Grid numItems={2} className="gap-2 h-[75vh] w-full mb-4">
|
||||
|
|
|
@ -24,7 +24,7 @@ import {
|
|||
Icon,
|
||||
TextInput,
|
||||
} from "@tremor/react";
|
||||
import { userInfoCall, adminTopEndUsersCall } from "./networking";
|
||||
import { userInfoCall } from "./networking";
|
||||
import { Badge, BadgeDelta, Button } from "@tremor/react";
|
||||
import RequestAccess from "./request_model_access";
|
||||
import CreateUser from "./create_user_button";
|
||||
|
@ -83,22 +83,7 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
|||
fetchData();
|
||||
}
|
||||
|
||||
const fetchEndUserSpend = async () => {
|
||||
try {
|
||||
const topEndUsers = await adminTopEndUsersCall(accessToken, null);
|
||||
console.log("user data response:", topEndUsers);
|
||||
setEndUsers(topEndUsers);
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the model data", error);
|
||||
}
|
||||
};
|
||||
if (
|
||||
userRole &&
|
||||
(userRole == "Admin" || userRole == "Admin Viewer") &&
|
||||
!endUsers
|
||||
) {
|
||||
fetchEndUserSpend();
|
||||
}
|
||||
|
||||
}, [accessToken, token, userRole, userID, currentPage]);
|
||||
|
||||
if (!userData) {
|
||||
|
@ -109,16 +94,6 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
|||
return <div>Loading...</div>;
|
||||
}
|
||||
|
||||
const onKeyClick = async (keyToken: String) => {
|
||||
try {
|
||||
const topEndUsers = await adminTopEndUsersCall(accessToken, keyToken);
|
||||
console.log("user data response:", topEndUsers);
|
||||
setEndUsers(topEndUsers);
|
||||
} catch (error) {
|
||||
console.error("There was an error fetching the model data", error);
|
||||
}
|
||||
};
|
||||
|
||||
function renderPagination() {
|
||||
if (!userData) return null;
|
||||
|
||||
|
@ -157,14 +132,11 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
|||
<CreateUser userID={userID} accessToken={accessToken} teams={teams}/>
|
||||
<Card className="w-full mx-auto flex-auto overflow-y-auto max-h-[80vh] mb-4">
|
||||
<div className="mb-4 mt-1">
|
||||
<Text><b>Key Owners: </b> Users on LiteLLM that created API Keys. Automatically tracked by LiteLLM</Text>
|
||||
<Text className="mt-1"><b>End Users: </b>End Users of your LLM API calls. Tracked When a `user` param is passed in your LLM calls</Text>
|
||||
<Text>These are Users on LiteLLM that created API Keys. Automatically tracked by LiteLLM</Text>
|
||||
|
||||
</div>
|
||||
<TabGroup>
|
||||
<TabList variant="line" defaultValue="1">
|
||||
<Tab value="1">Key Owners</Tab>
|
||||
<Tab value="2">End-Users</Tab>
|
||||
</TabList>
|
||||
|
||||
<TabPanels>
|
||||
<TabPanel>
|
||||
|
||||
|
@ -190,7 +162,7 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
|||
? user.models
|
||||
: "All Models"}
|
||||
</TableCell>
|
||||
<TableCell>{user.spend ? user.spend : 0}</TableCell>
|
||||
<TableCell>{user.spend ? user.spend?.toFixed(2) : 0}</TableCell>
|
||||
<TableCell>
|
||||
{user.max_budget ? user.max_budget : "Unlimited"}
|
||||
</TableCell>
|
||||
|
@ -220,29 +192,10 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
|||
<div className="flex items-center">
|
||||
<div className="flex-1"></div>
|
||||
<div className="flex-1 flex justify-between items-center">
|
||||
<Text className="w-1/4 mr-2 text-right">Key</Text>
|
||||
<Select defaultValue="1" className="w-3/4">
|
||||
{keys?.map((key: any, index: number) => {
|
||||
if (
|
||||
key &&
|
||||
key["key_alias"] !== null &&
|
||||
key["key_alias"].length > 0
|
||||
) {
|
||||
return (
|
||||
<SelectItem
|
||||
key={index}
|
||||
value={String(index)}
|
||||
onClick={() => onKeyClick(key["token"])}
|
||||
>
|
||||
{key["key_alias"]}
|
||||
</SelectItem>
|
||||
);
|
||||
}
|
||||
})}
|
||||
</Select>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<Table className="max-h-[70vh] min-h-[500px]">
|
||||
{/* <Table className="max-h-[70vh] min-h-[500px]">
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
<TableHeaderCell>End User</TableHeaderCell>
|
||||
|
@ -260,7 +213,7 @@ const ViewUserDashboard: React.FC<ViewUserDashboardProps> = ({
|
|||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</Table> */}
|
||||
</TabPanel>
|
||||
</TabPanels>
|
||||
</TabGroup>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue