mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
Merge branch 'main' into litellm_moderations_improvements
This commit is contained in:
commit
999fab82f7
32 changed files with 683 additions and 72 deletions
|
@ -5,7 +5,7 @@
|
||||||
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, etc.]
|
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, etc.]
|
||||||
<br>
|
<br>
|
||||||
</p>
|
</p>
|
||||||
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">OpenAI Proxy Server</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Support</a></h4>
|
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">OpenAI Proxy Server</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
||||||
<h4 align="center">
|
<h4 align="center">
|
||||||
<a href="https://pypi.org/project/litellm/" target="_blank">
|
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||||
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||||
|
@ -28,7 +28,7 @@ LiteLLM manages:
|
||||||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||||
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
||||||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||||
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
- Set Budgets & Rate limits per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||||
|
|
||||||
|
|
||||||
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||||
|
|
|
@ -8,6 +8,7 @@ import TabItem from '@theme/TabItem';
|
||||||
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket
|
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket
|
||||||
|
|
||||||
- [Async Custom Callbacks](#custom-callback-class-async)
|
- [Async Custom Callbacks](#custom-callback-class-async)
|
||||||
|
- [Async Custom Callback APIs](#custom-callback-apis-async)
|
||||||
- [Logging to Langfuse](#logging-proxy-inputoutput---langfuse)
|
- [Logging to Langfuse](#logging-proxy-inputoutput---langfuse)
|
||||||
- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets)
|
- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets)
|
||||||
- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb)
|
- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb)
|
||||||
|
@ -297,6 +298,106 @@ ModelResponse(
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Custom Callback APIs [Async]
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
This is an Enterprise only feature [Get Started with Enterprise here](https://github.com/BerriAI/litellm/tree/main/enterprise)
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
Use this if you:
|
||||||
|
- Want to use custom callbacks written in a non Python programming language
|
||||||
|
- Want your callbacks to run on a different microservice
|
||||||
|
|
||||||
|
#### Step 1. Create your generic logging API endpoint
|
||||||
|
Set up a generic API endpoint that can receive data in JSON format. The data will be included within a "data" field.
|
||||||
|
|
||||||
|
Your server should support the following Request format:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl --location https://your-domain.com/log-event \
|
||||||
|
--request POST \
|
||||||
|
--header "Content-Type: application/json" \
|
||||||
|
--data '{
|
||||||
|
"data": {
|
||||||
|
"id": "chatcmpl-8sgE89cEQ4q9biRtxMvDfQU1O82PT",
|
||||||
|
"call_type": "acompletion",
|
||||||
|
"cache_hit": "None",
|
||||||
|
"startTime": "2024-02-15 16:18:44.336280",
|
||||||
|
"endTime": "2024-02-15 16:18:45.045539",
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"user": "ishaan-2",
|
||||||
|
"modelParameters": "{'temperature': 0.7, 'max_tokens': 10, 'user': 'ishaan-2', 'extra_body': {}}",
|
||||||
|
"messages": "[{'role': 'user', 'content': 'This is a test'}]",
|
||||||
|
"response": "ModelResponse(id='chatcmpl-8sgE89cEQ4q9biRtxMvDfQU1O82PT', choices=[Choices(finish_reason='length', index=0, message=Message(content='Great! How can I assist you with this test', role='assistant'))], created=1708042724, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=10, prompt_tokens=11, total_tokens=21))",
|
||||||
|
"usage": "Usage(completion_tokens=10, prompt_tokens=11, total_tokens=21)",
|
||||||
|
"metadata": "{}",
|
||||||
|
"cost": "3.65e-05"
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Reference FastAPI Python Server
|
||||||
|
|
||||||
|
Here's a reference FastAPI Server that is compatible with LiteLLM Proxy:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# this is an example endpoint to receive data from litellm
|
||||||
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/log-event")
|
||||||
|
async def log_event(request: Request):
|
||||||
|
try:
|
||||||
|
print("Received /log-event request")
|
||||||
|
# Assuming the incoming request has JSON data
|
||||||
|
data = await request.json()
|
||||||
|
print("Received request data:")
|
||||||
|
print(data)
|
||||||
|
|
||||||
|
# Your additional logic can go here
|
||||||
|
# For now, just printing the received data
|
||||||
|
|
||||||
|
return {"message": "Request received successfully"}
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing request: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
raise HTTPException(status_code=500, detail="Internal Server Error")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="127.0.0.1", port=8000)
|
||||||
|
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
#### Step 2. Set your `GENERIC_LOGGER_ENDPOINT` to the endpoint + route we should send callback logs to
|
||||||
|
|
||||||
|
```shell
|
||||||
|
os.environ["GENERIC_LOGGER_ENDPOINT"] = "http://localhost:8000/log-event"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Step 3. Create a `config.yaml` file and set `litellm_settings`: `success_callback` = ["generic"]
|
||||||
|
|
||||||
|
Example litellm proxy config.yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: gpt-3.5-turbo
|
||||||
|
litellm_params:
|
||||||
|
model: gpt-3.5-turbo
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["generic"]
|
||||||
|
```
|
||||||
|
|
||||||
|
Start the LiteLLM Proxy and make a test request to verify the logs reached your callback API
|
||||||
|
|
||||||
## Logging Proxy Input/Output - Langfuse
|
## Logging Proxy Input/Output - Langfuse
|
||||||
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse
|
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
## LiteLLM Enterprise
|
## LiteLLM Enterprise
|
||||||
|
|
||||||
Code in this folder is licensed under a commercial license. Please review the [LICENSE](/LICENSE.md) file within the /enterprise folder
|
Code in this folder is licensed under a commercial license. Please review the [LICENSE](./LICENSE.md) file within the /enterprise folder
|
||||||
|
|
||||||
**These features are covered under the LiteLLM Enterprise contract**
|
**These features are covered under the LiteLLM Enterprise contract**
|
||||||
|
|
||||||
|
@ -8,4 +8,5 @@ Code in this folder is licensed under a commercial license. Please review the [L
|
||||||
|
|
||||||
## Features:
|
## Features:
|
||||||
- Custom API / microservice callbacks
|
- Custom API / microservice callbacks
|
||||||
|
- Google Text Moderation API
|
||||||
|
|
||||||
|
|
31
enterprise/callbacks/example_logging_api.py
Normal file
31
enterprise/callbacks/example_logging_api.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
# this is an example endpoint to receive data from litellm
|
||||||
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/log-event")
|
||||||
|
async def log_event(request: Request):
|
||||||
|
try:
|
||||||
|
print("Received /log-event request")
|
||||||
|
# Assuming the incoming request has JSON data
|
||||||
|
data = await request.json()
|
||||||
|
print("Received request data:")
|
||||||
|
print(data)
|
||||||
|
|
||||||
|
# Your additional logic can go here
|
||||||
|
# For now, just printing the received data
|
||||||
|
|
||||||
|
return {"message": "Request received successfully"}
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing request: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
traceback.print_exc()
|
||||||
|
raise HTTPException(status_code=500, detail="Internal Server Error")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
uvicorn.run(app, host="127.0.0.1", port=8000)
|
128
enterprise/callbacks/generic_api_callback.py
Normal file
128
enterprise/callbacks/generic_api_callback.py
Normal file
|
@ -0,0 +1,128 @@
|
||||||
|
# callback to make a request to an API endpoint
|
||||||
|
|
||||||
|
#### What this does ####
|
||||||
|
# On success, logs events to Promptlayer
|
||||||
|
import dotenv, os
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
from litellm.caching import DualCache
|
||||||
|
|
||||||
|
from typing import Literal, Union
|
||||||
|
|
||||||
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
|
||||||
|
#### What this does ####
|
||||||
|
# On success + failure, log events to Supabase
|
||||||
|
|
||||||
|
import dotenv, os
|
||||||
|
import requests
|
||||||
|
|
||||||
|
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||||
|
import traceback
|
||||||
|
import datetime, subprocess, sys
|
||||||
|
import litellm, uuid
|
||||||
|
from litellm._logging import print_verbose, verbose_logger
|
||||||
|
|
||||||
|
|
||||||
|
class GenericAPILogger:
|
||||||
|
# Class variables or attributes
|
||||||
|
def __init__(self, endpoint=None, headers=None):
|
||||||
|
try:
|
||||||
|
if endpoint == None:
|
||||||
|
# check env for "GENERIC_LOGGER_ENDPOINT"
|
||||||
|
if os.getenv("GENERIC_LOGGER_ENDPOINT"):
|
||||||
|
# Do something with the endpoint
|
||||||
|
endpoint = os.getenv("GENERIC_LOGGER_ENDPOINT")
|
||||||
|
else:
|
||||||
|
# Handle the case when the endpoint is not found in the environment variables
|
||||||
|
raise ValueError(
|
||||||
|
f"endpoint not set for GenericAPILogger, GENERIC_LOGGER_ENDPOINT not found in environment variables"
|
||||||
|
)
|
||||||
|
headers = headers or litellm.generic_logger_headers
|
||||||
|
self.endpoint = endpoint
|
||||||
|
self.headers = headers
|
||||||
|
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"in init GenericAPILogger, endpoint {self.endpoint}, headers {self.headers}"
|
||||||
|
)
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print_verbose(f"Got exception on init GenericAPILogger client {str(e)}")
|
||||||
|
raise e
|
||||||
|
|
||||||
|
# This is sync, because we run this in a separate thread. Running in a sepearate thread ensures it will never block an LLM API call
|
||||||
|
# Experience with s3, Langfuse shows that async logging events are complicated and can block LLM calls
|
||||||
|
def log_event(
|
||||||
|
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
verbose_logger.debug(
|
||||||
|
f"GenericAPILogger Logging - Enters logging function for model {kwargs}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# construct payload to send custom logger
|
||||||
|
# follows the same params as langfuse.py
|
||||||
|
litellm_params = kwargs.get("litellm_params", {})
|
||||||
|
metadata = (
|
||||||
|
litellm_params.get("metadata", {}) or {}
|
||||||
|
) # if litellm_params['metadata'] == None
|
||||||
|
messages = kwargs.get("messages")
|
||||||
|
cost = kwargs.get("response_cost", 0.0)
|
||||||
|
optional_params = kwargs.get("optional_params", {})
|
||||||
|
call_type = kwargs.get("call_type", "litellm.completion")
|
||||||
|
cache_hit = kwargs.get("cache_hit", False)
|
||||||
|
usage = response_obj["usage"]
|
||||||
|
id = response_obj.get("id", str(uuid.uuid4()))
|
||||||
|
|
||||||
|
# Build the initial payload
|
||||||
|
payload = {
|
||||||
|
"id": id,
|
||||||
|
"call_type": call_type,
|
||||||
|
"cache_hit": cache_hit,
|
||||||
|
"startTime": start_time,
|
||||||
|
"endTime": end_time,
|
||||||
|
"model": kwargs.get("model", ""),
|
||||||
|
"user": kwargs.get("user", ""),
|
||||||
|
"modelParameters": optional_params,
|
||||||
|
"messages": messages,
|
||||||
|
"response": response_obj,
|
||||||
|
"usage": usage,
|
||||||
|
"metadata": metadata,
|
||||||
|
"cost": cost,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ensure everything in the payload is converted to str
|
||||||
|
for key, value in payload.items():
|
||||||
|
try:
|
||||||
|
payload[key] = str(value)
|
||||||
|
except:
|
||||||
|
# non blocking if it can't cast to a str
|
||||||
|
pass
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
data = {
|
||||||
|
"data": payload,
|
||||||
|
}
|
||||||
|
data = json.dumps(data)
|
||||||
|
print_verbose(f"\nGeneric Logger - Logging payload = {data}")
|
||||||
|
|
||||||
|
# make request to endpoint with payload
|
||||||
|
response = requests.post(self.endpoint, json=data, headers=self.headers)
|
||||||
|
|
||||||
|
response_status = response.status_code
|
||||||
|
response_text = response.text
|
||||||
|
|
||||||
|
print_verbose(
|
||||||
|
f"Generic Logger - final response status = {response_status}, response text = {response_text}"
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
traceback.print_exc()
|
||||||
|
verbose_logger.debug(f"Generic - {str(e)}\n{traceback.format_exc()}")
|
||||||
|
pass
|
53
enterprise/hooks/google_text_moderation.py
Normal file
53
enterprise/hooks/google_text_moderation.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# +-----------------------------------------------+
|
||||||
|
#
|
||||||
|
# Google Text Moderation
|
||||||
|
# https://cloud.google.com/natural-language/docs/moderating-text
|
||||||
|
#
|
||||||
|
# +-----------------------------------------------+
|
||||||
|
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Optional, Literal, Union
|
||||||
|
import litellm, traceback, sys, uuid
|
||||||
|
from litellm.caching import DualCache
|
||||||
|
from litellm.proxy._types import UserAPIKeyAuth
|
||||||
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.utils import (
|
||||||
|
ModelResponse,
|
||||||
|
EmbeddingResponse,
|
||||||
|
ImageResponse,
|
||||||
|
StreamingChoices,
|
||||||
|
)
|
||||||
|
from datetime import datetime
|
||||||
|
import aiohttp, asyncio
|
||||||
|
|
||||||
|
|
||||||
|
class _ENTERPRISE_GoogleTextModeration(CustomLogger):
|
||||||
|
user_api_key_cache = None
|
||||||
|
|
||||||
|
# Class variables or attributes
|
||||||
|
def __init__(self, mock_testing: bool = False):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def print_verbose(self, print_statement):
|
||||||
|
try:
|
||||||
|
verbose_proxy_logger.debug(print_statement)
|
||||||
|
if litellm.set_verbose:
|
||||||
|
print(print_statement) # noqa
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def async_pre_call_hook(
|
||||||
|
self,
|
||||||
|
user_api_key_dict: UserAPIKeyAuth,
|
||||||
|
cache: DualCache,
|
||||||
|
data: dict,
|
||||||
|
call_type: str,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
- Calls Google's Text Moderation API
|
||||||
|
- Rejects request if it fails safety check
|
||||||
|
"""
|
||||||
|
pass
|
|
@ -146,6 +146,7 @@ model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/mai
|
||||||
suppress_debug_info = False
|
suppress_debug_info = False
|
||||||
dynamodb_table_name: Optional[str] = None
|
dynamodb_table_name: Optional[str] = None
|
||||||
s3_callback_params: Optional[Dict] = None
|
s3_callback_params: Optional[Dict] = None
|
||||||
|
generic_logger_headers: Optional[Dict] = None
|
||||||
default_key_generate_params: Optional[Dict] = None
|
default_key_generate_params: Optional[Dict] = None
|
||||||
upperbound_key_generate_params: Optional[Dict] = None
|
upperbound_key_generate_params: Optional[Dict] = None
|
||||||
default_team_settings: Optional[List] = None
|
default_team_settings: Optional[List] = None
|
||||||
|
|
|
@ -98,6 +98,9 @@ def _get_redis_client_logic(**env_overrides):
|
||||||
def get_redis_client(**env_overrides):
|
def get_redis_client(**env_overrides):
|
||||||
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
||||||
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||||
|
redis_kwargs.pop(
|
||||||
|
"connection_pool", None
|
||||||
|
) # redis.from_url doesn't support setting your own connection pool
|
||||||
return redis.Redis.from_url(**redis_kwargs)
|
return redis.Redis.from_url(**redis_kwargs)
|
||||||
return redis.Redis(**redis_kwargs)
|
return redis.Redis(**redis_kwargs)
|
||||||
|
|
||||||
|
@ -105,6 +108,9 @@ def get_redis_client(**env_overrides):
|
||||||
def get_redis_async_client(**env_overrides):
|
def get_redis_async_client(**env_overrides):
|
||||||
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
||||||
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||||
|
redis_kwargs.pop(
|
||||||
|
"connection_pool", None
|
||||||
|
) # redis.from_url doesn't support setting your own connection pool
|
||||||
return async_redis.Redis.from_url(**redis_kwargs)
|
return async_redis.Redis.from_url(**redis_kwargs)
|
||||||
return async_redis.Redis(
|
return async_redis.Redis(
|
||||||
socket_timeout=5,
|
socket_timeout=5,
|
||||||
|
|
|
@ -124,7 +124,7 @@ class RedisCache(BaseCache):
|
||||||
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
print_verbose("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
||||||
|
|
||||||
async def async_set_cache(self, key, value, **kwargs):
|
async def async_set_cache(self, key, value, **kwargs):
|
||||||
_redis_client = self.init_async_client()
|
_redis_client = self.init_async_client()
|
||||||
|
@ -134,10 +134,12 @@ class RedisCache(BaseCache):
|
||||||
f"Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
|
f"Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
await redis_client.set(name=key, value=json.dumps(value), ex=ttl)
|
await redis_client.set(
|
||||||
|
name=key, value=json.dumps(value), ex=ttl, get=True
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# NON blocking - notify users Redis is throwing an exception
|
# NON blocking - notify users Redis is throwing an exception
|
||||||
logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
print_verbose("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
||||||
|
|
||||||
async def async_set_cache_pipeline(self, cache_list, ttl=None):
|
async def async_set_cache_pipeline(self, cache_list, ttl=None):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -259,6 +259,7 @@ class LangFuseLogger:
|
||||||
if key in [
|
if key in [
|
||||||
"user_api_key",
|
"user_api_key",
|
||||||
"user_api_key_user_id",
|
"user_api_key_user_id",
|
||||||
|
"user_api_key_team_id",
|
||||||
"semantic-similarity",
|
"semantic-similarity",
|
||||||
]:
|
]:
|
||||||
tags.append(f"{key}:{value}")
|
tags.append(f"{key}:{value}")
|
||||||
|
|
|
@ -343,24 +343,31 @@ def completion(
|
||||||
llm_model = CodeChatModel.from_pretrained(model)
|
llm_model = CodeChatModel.from_pretrained(model)
|
||||||
mode = "chat"
|
mode = "chat"
|
||||||
request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
|
request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
|
||||||
else: # assume vertex model garden
|
elif model == "private":
|
||||||
client = aiplatform.gapic.PredictionServiceClient(
|
mode = "private"
|
||||||
client_options=client_options
|
model = optional_params.pop("model_id", None)
|
||||||
|
# private endpoint requires a dict instead of JSON
|
||||||
|
instances = [optional_params.copy()]
|
||||||
|
instances[0]["prompt"] = prompt
|
||||||
|
llm_model = aiplatform.PrivateEndpoint(
|
||||||
|
endpoint_name=model,
|
||||||
|
project=vertex_project,
|
||||||
|
location=vertex_location,
|
||||||
)
|
)
|
||||||
|
request_str += f"llm_model = aiplatform.PrivateEndpoint(endpoint_name={model}, project={vertex_project}, location={vertex_location})\n"
|
||||||
|
else: # assume vertex model garden on public endpoint
|
||||||
|
mode = "custom"
|
||||||
|
|
||||||
instances = [optional_params]
|
instances = [optional_params.copy()]
|
||||||
instances[0]["prompt"] = prompt
|
instances[0]["prompt"] = prompt
|
||||||
instances = [
|
instances = [
|
||||||
json_format.ParseDict(instance_dict, Value())
|
json_format.ParseDict(instance_dict, Value())
|
||||||
for instance_dict in instances
|
for instance_dict in instances
|
||||||
]
|
]
|
||||||
llm_model = client.endpoint_path(
|
# Will determine the API used based on async parameter
|
||||||
project=vertex_project, location=vertex_location, endpoint=model
|
llm_model = None
|
||||||
)
|
|
||||||
|
|
||||||
mode = "custom"
|
|
||||||
request_str += f"llm_model = client.endpoint_path(project={vertex_project}, location={vertex_location}, endpoint={model})\n"
|
|
||||||
|
|
||||||
|
# NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now
|
||||||
if acompletion == True:
|
if acompletion == True:
|
||||||
data = {
|
data = {
|
||||||
"llm_model": llm_model,
|
"llm_model": llm_model,
|
||||||
|
@ -532,9 +539,6 @@ def completion(
|
||||||
"""
|
"""
|
||||||
Vertex AI Model Garden
|
Vertex AI Model Garden
|
||||||
"""
|
"""
|
||||||
request_str += (
|
|
||||||
f"client.predict(endpoint={llm_model}, instances={instances})\n"
|
|
||||||
)
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=prompt,
|
input=prompt,
|
||||||
|
@ -544,11 +548,21 @@ def completion(
|
||||||
"request_str": request_str,
|
"request_str": request_str,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
llm_model = aiplatform.gapic.PredictionServiceClient(
|
||||||
response = client.predict(
|
client_options=client_options
|
||||||
endpoint=llm_model,
|
)
|
||||||
instances=instances,
|
request_str += f"llm_model = aiplatform.gapic.PredictionServiceClient(client_options={client_options})\n"
|
||||||
|
endpoint_path = llm_model.endpoint_path(
|
||||||
|
project=vertex_project, location=vertex_location, endpoint=model
|
||||||
|
)
|
||||||
|
request_str += (
|
||||||
|
f"llm_model.predict(endpoint={endpoint_path}, instances={instances})\n"
|
||||||
|
)
|
||||||
|
response = llm_model.predict(
|
||||||
|
endpoint=endpoint_path,
|
||||||
|
instances=instances
|
||||||
).predictions
|
).predictions
|
||||||
|
|
||||||
completion_response = response[0]
|
completion_response = response[0]
|
||||||
if (
|
if (
|
||||||
isinstance(completion_response, str)
|
isinstance(completion_response, str)
|
||||||
|
@ -558,6 +572,36 @@ def completion(
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
response = TextStreamer(completion_response)
|
response = TextStreamer(completion_response)
|
||||||
return response
|
return response
|
||||||
|
elif mode == "private":
|
||||||
|
"""
|
||||||
|
Vertex AI Model Garden deployed on private endpoint
|
||||||
|
"""
|
||||||
|
## LOGGING
|
||||||
|
logging_obj.pre_call(
|
||||||
|
input=prompt,
|
||||||
|
api_key=None,
|
||||||
|
additional_args={
|
||||||
|
"complete_input_dict": optional_params,
|
||||||
|
"request_str": request_str,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
request_str += (
|
||||||
|
f"llm_model.predict(instances={instances})\n"
|
||||||
|
)
|
||||||
|
response = llm_model.predict(
|
||||||
|
instances=instances
|
||||||
|
).predictions
|
||||||
|
|
||||||
|
completion_response = response[0]
|
||||||
|
if (
|
||||||
|
isinstance(completion_response, str)
|
||||||
|
and "\nOutput:\n" in completion_response
|
||||||
|
):
|
||||||
|
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||||
|
if "stream" in optional_params and optional_params["stream"] == True:
|
||||||
|
response = TextStreamer(completion_response)
|
||||||
|
return response
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=prompt, api_key=None, original_response=completion_response
|
input=prompt, api_key=None, original_response=completion_response
|
||||||
|
@ -722,17 +766,6 @@ async def async_completion(
|
||||||
Vertex AI Model Garden
|
Vertex AI Model Garden
|
||||||
"""
|
"""
|
||||||
from google.cloud import aiplatform
|
from google.cloud import aiplatform
|
||||||
|
|
||||||
async_client = aiplatform.gapic.PredictionServiceAsyncClient(
|
|
||||||
client_options=client_options
|
|
||||||
)
|
|
||||||
llm_model = async_client.endpoint_path(
|
|
||||||
project=vertex_project, location=vertex_location, endpoint=model
|
|
||||||
)
|
|
||||||
|
|
||||||
request_str += (
|
|
||||||
f"client.predict(endpoint={llm_model}, instances={instances})\n"
|
|
||||||
)
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=prompt,
|
input=prompt,
|
||||||
|
@ -743,8 +776,18 @@ async def async_completion(
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
response_obj = await async_client.predict(
|
llm_model = aiplatform.gapic.PredictionServiceAsyncClient(
|
||||||
endpoint=llm_model,
|
client_options=client_options
|
||||||
|
)
|
||||||
|
request_str += f"llm_model = aiplatform.gapic.PredictionServiceAsyncClient(client_options={client_options})\n"
|
||||||
|
endpoint_path = llm_model.endpoint_path(
|
||||||
|
project=vertex_project, location=vertex_location, endpoint=model
|
||||||
|
)
|
||||||
|
request_str += (
|
||||||
|
f"llm_model.predict(endpoint={endpoint_path}, instances={instances})\n"
|
||||||
|
)
|
||||||
|
response_obj = await llm_model.predict(
|
||||||
|
endpoint=endpoint_path,
|
||||||
instances=instances,
|
instances=instances,
|
||||||
)
|
)
|
||||||
response = response_obj.predictions
|
response = response_obj.predictions
|
||||||
|
@ -754,6 +797,23 @@ async def async_completion(
|
||||||
and "\nOutput:\n" in completion_response
|
and "\nOutput:\n" in completion_response
|
||||||
):
|
):
|
||||||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||||
|
|
||||||
|
elif mode == "private":
|
||||||
|
request_str += (
|
||||||
|
f"llm_model.predict_async(instances={instances})\n"
|
||||||
|
)
|
||||||
|
response_obj = await llm_model.predict_async(
|
||||||
|
instances=instances,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = response_obj.predictions
|
||||||
|
completion_response = response[0]
|
||||||
|
if (
|
||||||
|
isinstance(completion_response, str)
|
||||||
|
and "\nOutput:\n" in completion_response
|
||||||
|
):
|
||||||
|
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||||
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.post_call(
|
logging_obj.post_call(
|
||||||
input=prompt, api_key=None, original_response=completion_response
|
input=prompt, api_key=None, original_response=completion_response
|
||||||
|
@ -894,15 +954,8 @@ async def async_streaming(
|
||||||
response = llm_model.predict_streaming_async(prompt, **optional_params)
|
response = llm_model.predict_streaming_async(prompt, **optional_params)
|
||||||
elif mode == "custom":
|
elif mode == "custom":
|
||||||
from google.cloud import aiplatform
|
from google.cloud import aiplatform
|
||||||
|
stream = optional_params.pop("stream", None)
|
||||||
|
|
||||||
async_client = aiplatform.gapic.PredictionServiceAsyncClient(
|
|
||||||
client_options=client_options
|
|
||||||
)
|
|
||||||
llm_model = async_client.endpoint_path(
|
|
||||||
project=vertex_project, location=vertex_location, endpoint=model
|
|
||||||
)
|
|
||||||
|
|
||||||
request_str += f"client.predict(endpoint={llm_model}, instances={instances})\n"
|
|
||||||
## LOGGING
|
## LOGGING
|
||||||
logging_obj.pre_call(
|
logging_obj.pre_call(
|
||||||
input=prompt,
|
input=prompt,
|
||||||
|
@ -912,9 +965,34 @@ async def async_streaming(
|
||||||
"request_str": request_str,
|
"request_str": request_str,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
llm_model = aiplatform.gapic.PredictionServiceAsyncClient(
|
||||||
|
client_options=client_options
|
||||||
|
)
|
||||||
|
request_str += f"llm_model = aiplatform.gapic.PredictionServiceAsyncClient(client_options={client_options})\n"
|
||||||
|
endpoint_path = llm_model.endpoint_path(
|
||||||
|
project=vertex_project, location=vertex_location, endpoint=model
|
||||||
|
)
|
||||||
|
request_str += f"client.predict(endpoint={endpoint_path}, instances={instances})\n"
|
||||||
|
response_obj = await llm_model.predict(
|
||||||
|
endpoint=endpoint_path,
|
||||||
|
instances=instances,
|
||||||
|
)
|
||||||
|
|
||||||
response_obj = await async_client.predict(
|
response = response_obj.predictions
|
||||||
endpoint=llm_model,
|
completion_response = response[0]
|
||||||
|
if (
|
||||||
|
isinstance(completion_response, str)
|
||||||
|
and "\nOutput:\n" in completion_response
|
||||||
|
):
|
||||||
|
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||||
|
if stream:
|
||||||
|
response = TextStreamer(completion_response)
|
||||||
|
|
||||||
|
elif mode == "private":
|
||||||
|
stream = optional_params.pop("stream", None)
|
||||||
|
_ = instances[0].pop("stream", None)
|
||||||
|
request_str += f"llm_model.predict_async(instances={instances})\n"
|
||||||
|
response_obj = await llm_model.predict_async(
|
||||||
instances=instances,
|
instances=instances,
|
||||||
)
|
)
|
||||||
response = response_obj.predictions
|
response = response_obj.predictions
|
||||||
|
@ -924,8 +1002,9 @@ async def async_streaming(
|
||||||
and "\nOutput:\n" in completion_response
|
and "\nOutput:\n" in completion_response
|
||||||
):
|
):
|
||||||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||||
if "stream" in optional_params and optional_params["stream"] == True:
|
if stream:
|
||||||
response = TextStreamer(completion_response)
|
response = TextStreamer(completion_response)
|
||||||
|
|
||||||
streamwrapper = CustomStreamWrapper(
|
streamwrapper = CustomStreamWrapper(
|
||||||
completion_stream=response,
|
completion_stream=response,
|
||||||
model=model,
|
model=model,
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
import os, openai, sys, json, inspect, uuid, datetime, threading
|
import os, openai, sys, json, inspect, uuid, datetime, threading
|
||||||
from typing import Any, Literal, Union
|
from typing import Any, Literal, Union
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
import dotenv, traceback, random, asyncio, time, contextvars
|
import dotenv, traceback, random, asyncio, time, contextvars
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import httpx
|
import httpx
|
||||||
|
|
|
@ -642,21 +642,40 @@
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
"gemini-pro": {
|
"gemini-pro": {
|
||||||
"max_tokens": 30720,
|
"max_tokens": 32760,
|
||||||
"max_output_tokens": 2048,
|
"max_output_tokens": 2048,
|
||||||
"input_cost_per_token": 0.00000025,
|
"input_cost_per_token": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000005,
|
"output_cost_per_token": 0.0000005,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.0-pro": {
|
||||||
|
"max_tokens": 32760,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.00000025,
|
||||||
|
"output_cost_per_token": 0.0000005,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"gemini-pro-vision": {
|
"gemini-pro-vision": {
|
||||||
"max_tokens": 30720,
|
"max_tokens": 16384,
|
||||||
"max_output_tokens": 2048,
|
"max_output_tokens": 2048,
|
||||||
"input_cost_per_token": 0.00000025,
|
"input_cost_per_token": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000005,
|
"output_cost_per_token": 0.0000005,
|
||||||
"litellm_provider": "vertex_ai-vision-models",
|
"litellm_provider": "vertex_ai-vision-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.0-pro-vision": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"max_images_per_prompt": 16,
|
||||||
|
"max_videos_per_prompt": 1,
|
||||||
|
"max_video_length": 2,
|
||||||
|
"input_cost_per_token": 0.00000025,
|
||||||
|
"output_cost_per_token": 0.0000005,
|
||||||
|
"litellm_provider": "vertex_ai-vision-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"textembedding-gecko": {
|
"textembedding-gecko": {
|
||||||
"max_tokens": 3072,
|
"max_tokens": 3072,
|
||||||
"max_input_tokens": 3072,
|
"max_input_tokens": 3072,
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-fcb69349f15d154b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"lLFQRQnIrRo-GJf5spHEd\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-fcb69349f15d154b.js"],""]
|
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["lLFQRQnIrRo-GJf5spHEd",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -819,6 +819,7 @@ async def _PROXY_track_cost_callback(
|
||||||
user_id = user_id or kwargs["litellm_params"]["metadata"].get(
|
user_id = user_id or kwargs["litellm_params"]["metadata"].get(
|
||||||
"user_api_key_user_id", None
|
"user_api_key_user_id", None
|
||||||
)
|
)
|
||||||
|
team_id = kwargs["litellm_params"]["metadata"].get("user_api_key_team_id", None)
|
||||||
if kwargs.get("response_cost", None) is not None:
|
if kwargs.get("response_cost", None) is not None:
|
||||||
response_cost = kwargs["response_cost"]
|
response_cost = kwargs["response_cost"]
|
||||||
user_api_key = kwargs["litellm_params"]["metadata"].get(
|
user_api_key = kwargs["litellm_params"]["metadata"].get(
|
||||||
|
@ -842,6 +843,7 @@ async def _PROXY_track_cost_callback(
|
||||||
token=user_api_key,
|
token=user_api_key,
|
||||||
response_cost=response_cost,
|
response_cost=response_cost,
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
|
team_id=team_id,
|
||||||
kwargs=kwargs,
|
kwargs=kwargs,
|
||||||
completion_response=completion_response,
|
completion_response=completion_response,
|
||||||
start_time=start_time,
|
start_time=start_time,
|
||||||
|
@ -879,6 +881,7 @@ async def update_database(
|
||||||
token,
|
token,
|
||||||
response_cost,
|
response_cost,
|
||||||
user_id=None,
|
user_id=None,
|
||||||
|
team_id=None,
|
||||||
kwargs=None,
|
kwargs=None,
|
||||||
completion_response=None,
|
completion_response=None,
|
||||||
start_time=None,
|
start_time=None,
|
||||||
|
@ -886,7 +889,7 @@ async def update_database(
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
verbose_proxy_logger.info(
|
verbose_proxy_logger.info(
|
||||||
f"Enters prisma db call, response_cost: {response_cost}, token: {token}; user_id: {user_id}"
|
f"Enters prisma db call, response_cost: {response_cost}, token: {token}; user_id: {user_id}; team_id: {team_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
### [TODO] STEP 1: GET KEY + USER SPEND ### (key, user)
|
### [TODO] STEP 1: GET KEY + USER SPEND ### (key, user)
|
||||||
|
@ -1039,8 +1042,69 @@ async def update_database(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.info(f"Update Spend Logs DB failed to execute")
|
verbose_proxy_logger.info(f"Update Spend Logs DB failed to execute")
|
||||||
|
|
||||||
|
### UPDATE KEY SPEND ###
|
||||||
|
async def _update_team_db():
|
||||||
|
try:
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"adding spend to team db. Response cost: {response_cost}. team_id: {team_id}."
|
||||||
|
)
|
||||||
|
if team_id is None:
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
"track_cost_callback: team_id is None. Not tracking spend for team"
|
||||||
|
)
|
||||||
|
return
|
||||||
|
if prisma_client is not None:
|
||||||
|
# Fetch the existing cost for the given token
|
||||||
|
existing_spend_obj = await prisma_client.get_data(
|
||||||
|
team_id=team_id, table_name="team"
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"_update_team_db: existing spend: {existing_spend_obj}"
|
||||||
|
)
|
||||||
|
if existing_spend_obj is None:
|
||||||
|
existing_spend = 0
|
||||||
|
else:
|
||||||
|
existing_spend = existing_spend_obj.spend
|
||||||
|
# Calculate the new cost by adding the existing cost and response_cost
|
||||||
|
new_spend = existing_spend + response_cost
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
||||||
|
# Update the cost column for the given token
|
||||||
|
await prisma_client.update_data(
|
||||||
|
team_id=team_id, data={"spend": new_spend}, table_name="team"
|
||||||
|
)
|
||||||
|
|
||||||
|
elif custom_db_client is not None:
|
||||||
|
# Fetch the existing cost for the given token
|
||||||
|
existing_spend_obj = await custom_db_client.get_data(
|
||||||
|
key=token, table_name="key"
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.debug(
|
||||||
|
f"_update_key_db existing spend: {existing_spend_obj}"
|
||||||
|
)
|
||||||
|
if existing_spend_obj is None:
|
||||||
|
existing_spend = 0
|
||||||
|
else:
|
||||||
|
existing_spend = existing_spend_obj.spend
|
||||||
|
# Calculate the new cost by adding the existing cost and response_cost
|
||||||
|
new_spend = existing_spend + response_cost
|
||||||
|
|
||||||
|
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
||||||
|
# Update the cost column for the given token
|
||||||
|
await custom_db_client.update_data(
|
||||||
|
key=token, value={"spend": new_spend}, table_name="key"
|
||||||
|
)
|
||||||
|
|
||||||
|
valid_token = user_api_key_cache.get_cache(key=token)
|
||||||
|
if valid_token is not None:
|
||||||
|
valid_token.spend = new_spend
|
||||||
|
user_api_key_cache.set_cache(key=token, value=valid_token)
|
||||||
|
except Exception as e:
|
||||||
|
verbose_proxy_logger.info(f"Update Team DB failed to execute")
|
||||||
|
|
||||||
asyncio.create_task(_update_user_db())
|
asyncio.create_task(_update_user_db())
|
||||||
asyncio.create_task(_update_key_db())
|
asyncio.create_task(_update_key_db())
|
||||||
|
asyncio.create_task(_update_team_db())
|
||||||
asyncio.create_task(_insert_spend_log_to_db())
|
asyncio.create_task(_insert_spend_log_to_db())
|
||||||
verbose_proxy_logger.info("Successfully updated spend in all 3 tables")
|
verbose_proxy_logger.info("Successfully updated spend in all 3 tables")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -2143,6 +2207,9 @@ async def completion(
|
||||||
data["metadata"]["user_api_key"] = user_api_key_dict.api_key
|
data["metadata"]["user_api_key"] = user_api_key_dict.api_key
|
||||||
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
|
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
|
user_api_key_dict, "team_id", None
|
||||||
|
)
|
||||||
_headers = dict(request.headers)
|
_headers = dict(request.headers)
|
||||||
_headers.pop(
|
_headers.pop(
|
||||||
"authorization", None
|
"authorization", None
|
||||||
|
@ -2306,6 +2373,9 @@ async def chat_completion(
|
||||||
data["metadata"] = {}
|
data["metadata"] = {}
|
||||||
data["metadata"]["user_api_key"] = user_api_key_dict.api_key
|
data["metadata"]["user_api_key"] = user_api_key_dict.api_key
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
|
user_api_key_dict, "team_id", None
|
||||||
|
)
|
||||||
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
|
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
|
||||||
_headers = dict(request.headers)
|
_headers = dict(request.headers)
|
||||||
_headers.pop(
|
_headers.pop(
|
||||||
|
@ -2527,6 +2597,9 @@ async def embeddings(
|
||||||
) # do not store the original `sk-..` api key in the db
|
) # do not store the original `sk-..` api key in the db
|
||||||
data["metadata"]["headers"] = _headers
|
data["metadata"]["headers"] = _headers
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
|
user_api_key_dict, "team_id", None
|
||||||
|
)
|
||||||
data["metadata"]["endpoint"] = str(request.url)
|
data["metadata"]["endpoint"] = str(request.url)
|
||||||
|
|
||||||
### TEAM-SPECIFIC PARAMS ###
|
### TEAM-SPECIFIC PARAMS ###
|
||||||
|
@ -2698,6 +2771,9 @@ async def image_generation(
|
||||||
) # do not store the original `sk-..` api key in the db
|
) # do not store the original `sk-..` api key in the db
|
||||||
data["metadata"]["headers"] = _headers
|
data["metadata"]["headers"] = _headers
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
|
user_api_key_dict, "team_id", None
|
||||||
|
)
|
||||||
data["metadata"]["endpoint"] = str(request.url)
|
data["metadata"]["endpoint"] = str(request.url)
|
||||||
|
|
||||||
### TEAM-SPECIFIC PARAMS ###
|
### TEAM-SPECIFIC PARAMS ###
|
||||||
|
@ -2853,6 +2929,9 @@ async def moderations(
|
||||||
) # do not store the original `sk-..` api key in the db
|
) # do not store the original `sk-..` api key in the db
|
||||||
data["metadata"]["headers"] = _headers
|
data["metadata"]["headers"] = _headers
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
|
user_api_key_dict, "team_id", None
|
||||||
|
)
|
||||||
data["metadata"]["endpoint"] = str(request.url)
|
data["metadata"]["endpoint"] = str(request.url)
|
||||||
|
|
||||||
### TEAM-SPECIFIC PARAMS ###
|
### TEAM-SPECIFIC PARAMS ###
|
||||||
|
@ -4208,6 +4287,9 @@ async def async_queue_request(
|
||||||
) # do not store the original `sk-..` api key in the db
|
) # do not store the original `sk-..` api key in the db
|
||||||
data["metadata"]["headers"] = _headers
|
data["metadata"]["headers"] = _headers
|
||||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||||
|
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||||
|
user_api_key_dict, "team_id", None
|
||||||
|
)
|
||||||
data["metadata"]["endpoint"] = str(request.url)
|
data["metadata"]["endpoint"] = str(request.url)
|
||||||
|
|
||||||
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
||||||
|
|
|
@ -808,8 +808,9 @@ class PrismaClient:
|
||||||
data: dict = {},
|
data: dict = {},
|
||||||
data_list: Optional[List] = None,
|
data_list: Optional[List] = None,
|
||||||
user_id: Optional[str] = None,
|
user_id: Optional[str] = None,
|
||||||
|
team_id: Optional[str] = None,
|
||||||
query_type: Literal["update", "update_many"] = "update",
|
query_type: Literal["update", "update_many"] = "update",
|
||||||
table_name: Optional[Literal["user", "key", "config", "spend"]] = None,
|
table_name: Optional[Literal["user", "key", "config", "spend", "team"]] = None,
|
||||||
update_key_values: Optional[dict] = None,
|
update_key_values: Optional[dict] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
@ -860,6 +861,35 @@ class PrismaClient:
|
||||||
+ "\033[0m"
|
+ "\033[0m"
|
||||||
)
|
)
|
||||||
return {"user_id": user_id, "data": db_data}
|
return {"user_id": user_id, "data": db_data}
|
||||||
|
elif (
|
||||||
|
team_id is not None
|
||||||
|
or (table_name is not None and table_name == "team")
|
||||||
|
and query_type == "update"
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
If data['spend'] + data['user'], update the user table with spend info as well
|
||||||
|
"""
|
||||||
|
if team_id is None:
|
||||||
|
team_id = db_data["team_id"]
|
||||||
|
if update_key_values is None:
|
||||||
|
update_key_values = db_data
|
||||||
|
if "team_id" not in db_data and team_id is not None:
|
||||||
|
db_data["team_id"] = team_id
|
||||||
|
update_team_row = await self.db.litellm_teamtable.upsert(
|
||||||
|
where={"team_id": team_id}, # type: ignore
|
||||||
|
data={
|
||||||
|
"create": {**db_data}, # type: ignore
|
||||||
|
"update": {
|
||||||
|
**update_key_values # type: ignore
|
||||||
|
}, # just update user-specified values, if it already exists
|
||||||
|
},
|
||||||
|
)
|
||||||
|
verbose_proxy_logger.info(
|
||||||
|
"\033[91m"
|
||||||
|
+ f"DB Team Table - update succeeded {update_team_row}"
|
||||||
|
+ "\033[0m"
|
||||||
|
)
|
||||||
|
return {"team_id": team_id, "data": db_data}
|
||||||
elif (
|
elif (
|
||||||
table_name is not None
|
table_name is not None
|
||||||
and table_name == "key"
|
and table_name == "key"
|
||||||
|
|
46
litellm/tests/test_custom_api_logger.py
Normal file
46
litellm/tests/test_custom_api_logger.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import io, asyncio
|
||||||
|
|
||||||
|
# import logging
|
||||||
|
# logging.basicConfig(level=logging.DEBUG)
|
||||||
|
sys.path.insert(0, os.path.abspath("../.."))
|
||||||
|
print("Modified sys.path:", sys.path)
|
||||||
|
|
||||||
|
|
||||||
|
from litellm import completion
|
||||||
|
import litellm
|
||||||
|
|
||||||
|
litellm.num_retries = 3
|
||||||
|
|
||||||
|
import time, random
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.skip(reason="new beta feature, will be testing in our ci/cd soon")
|
||||||
|
async def test_custom_api_logging():
|
||||||
|
try:
|
||||||
|
litellm.success_callback = ["generic"]
|
||||||
|
litellm.set_verbose = True
|
||||||
|
os.environ["GENERIC_LOGGER_ENDPOINT"] = "http://localhost:8000/log-event"
|
||||||
|
|
||||||
|
print("Testing generic api logging")
|
||||||
|
|
||||||
|
await litellm.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": f"This is a test"}],
|
||||||
|
max_tokens=10,
|
||||||
|
temperature=0.7,
|
||||||
|
user="ishaan-2",
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
|
finally:
|
||||||
|
# post, close log file and verify
|
||||||
|
# Reset stdout to the original value
|
||||||
|
print("Passed! Testing async s3 logging")
|
||||||
|
|
||||||
|
|
||||||
|
# test_s3_logging()
|
|
@ -44,9 +44,9 @@ except:
|
||||||
filename = str(
|
filename = str(
|
||||||
resources.files(litellm).joinpath("llms/tokenizers") # for python 3.10
|
resources.files(litellm).joinpath("llms/tokenizers") # for python 3.10
|
||||||
) # for python 3.10+
|
) # for python 3.10+
|
||||||
os.environ["TIKTOKEN_CACHE_DIR"] = (
|
os.environ[
|
||||||
filename # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
"TIKTOKEN_CACHE_DIR"
|
||||||
)
|
] = filename # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
||||||
|
|
||||||
encoding = tiktoken.get_encoding("cl100k_base")
|
encoding = tiktoken.get_encoding("cl100k_base")
|
||||||
import importlib.metadata
|
import importlib.metadata
|
||||||
|
@ -4256,7 +4256,14 @@ def get_optional_params(
|
||||||
optional_params["stop_sequences"] = stop
|
optional_params["stop_sequences"] = stop
|
||||||
if max_tokens is not None:
|
if max_tokens is not None:
|
||||||
optional_params["max_output_tokens"] = max_tokens
|
optional_params["max_output_tokens"] = max_tokens
|
||||||
elif custom_llm_provider == "vertex_ai":
|
elif custom_llm_provider == "vertex_ai" and model in (
|
||||||
|
litellm.vertex_chat_models
|
||||||
|
or model in litellm.vertex_code_chat_models
|
||||||
|
or model in litellm.vertex_text_models
|
||||||
|
or model in litellm.vertex_code_text_models
|
||||||
|
or model in litellm.vertex_language_models
|
||||||
|
or model in litellm.vertex_embedding_models
|
||||||
|
):
|
||||||
## check if unsupported param passed in
|
## check if unsupported param passed in
|
||||||
supported_params = [
|
supported_params = [
|
||||||
"temperature",
|
"temperature",
|
||||||
|
|
|
@ -642,21 +642,40 @@
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
"gemini-pro": {
|
"gemini-pro": {
|
||||||
"max_tokens": 30720,
|
"max_tokens": 32760,
|
||||||
"max_output_tokens": 2048,
|
"max_output_tokens": 2048,
|
||||||
"input_cost_per_token": 0.00000025,
|
"input_cost_per_token": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000005,
|
"output_cost_per_token": 0.0000005,
|
||||||
"litellm_provider": "vertex_ai-language-models",
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.0-pro": {
|
||||||
|
"max_tokens": 32760,
|
||||||
|
"max_output_tokens": 8192,
|
||||||
|
"input_cost_per_token": 0.00000025,
|
||||||
|
"output_cost_per_token": 0.0000005,
|
||||||
|
"litellm_provider": "vertex_ai-language-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"gemini-pro-vision": {
|
"gemini-pro-vision": {
|
||||||
"max_tokens": 30720,
|
"max_tokens": 16384,
|
||||||
"max_output_tokens": 2048,
|
"max_output_tokens": 2048,
|
||||||
"input_cost_per_token": 0.00000025,
|
"input_cost_per_token": 0.00000025,
|
||||||
"output_cost_per_token": 0.0000005,
|
"output_cost_per_token": 0.0000005,
|
||||||
"litellm_provider": "vertex_ai-vision-models",
|
"litellm_provider": "vertex_ai-vision-models",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
},
|
},
|
||||||
|
"gemini-1.0-pro-vision": {
|
||||||
|
"max_tokens": 16384,
|
||||||
|
"max_output_tokens": 2048,
|
||||||
|
"max_images_per_prompt": 16,
|
||||||
|
"max_videos_per_prompt": 1,
|
||||||
|
"max_video_length": 2,
|
||||||
|
"input_cost_per_token": 0.00000025,
|
||||||
|
"output_cost_per_token": 0.0000005,
|
||||||
|
"litellm_provider": "vertex_ai-vision-models",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
"textembedding-gecko": {
|
"textembedding-gecko": {
|
||||||
"max_tokens": 3072,
|
"max_tokens": 3072,
|
||||||
"max_input_tokens": 3072,
|
"max_input_tokens": 3072,
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "1.24.0"
|
version = "1.24.3"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
|
@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
[tool.commitizen]
|
[tool.commitizen]
|
||||||
version = "1.24.0"
|
version = "1.24.3"
|
||||||
version_files = [
|
version_files = [
|
||||||
"pyproject.toml:^version"
|
"pyproject.toml:^version"
|
||||||
]
|
]
|
||||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
||||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-fcb69349f15d154b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"lLFQRQnIrRo-GJf5spHEd\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
||||||
2:I[77831,[],""]
|
2:I[77831,[],""]
|
||||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-fcb69349f15d154b.js"],""]
|
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
|
||||||
4:I[5613,[],""]
|
4:I[5613,[],""]
|
||||||
5:I[31778,[],""]
|
5:I[31778,[],""]
|
||||||
0:["lLFQRQnIrRo-GJf5spHEd",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||||
1:null
|
1:null
|
||||||
|
|
|
@ -21,9 +21,13 @@ interface ChatUIProps {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function generateModelResponse(inputMessage: string, updateUI: (chunk: string) => void, selectedModel: string, accessToken: string) {
|
async function generateModelResponse(inputMessage: string, updateUI: (chunk: string) => void, selectedModel: string, accessToken: string) {
|
||||||
const client = new openai.OpenAI({
|
// base url should be the current base_url
|
||||||
|
const isLocal = process.env.NODE_ENV === "development";
|
||||||
|
console.log("isLocal:", isLocal);
|
||||||
|
const proxyBaseUrl = isLocal ? "http://localhost:4000" : window.location.origin;
|
||||||
|
const client = new openai.OpenAI({
|
||||||
apiKey: accessToken, // Replace with your OpenAI API key
|
apiKey: accessToken, // Replace with your OpenAI API key
|
||||||
baseURL: 'http://0.0.0.0:4000', // Replace with your OpenAI API base URL
|
baseURL: proxyBaseUrl, // Replace with your OpenAI API base URL
|
||||||
dangerouslyAllowBrowser: true, // using a temporary litellm proxy key
|
dangerouslyAllowBrowser: true, // using a temporary litellm proxy key
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue