forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_moderations_improvements
This commit is contained in:
commit
999fab82f7
32 changed files with 683 additions and 72 deletions
|
@ -5,7 +5,7 @@
|
|||
<p align="center">Call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, etc.]
|
||||
<br>
|
||||
</p>
|
||||
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">OpenAI Proxy Server</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Support</a></h4>
|
||||
<h4 align="center"><a href="https://docs.litellm.ai/docs/simple_proxy" target="_blank">OpenAI Proxy Server</a> | <a href="https://docs.litellm.ai/docs/enterprise"target="_blank">Enterprise Tier</a></h4>
|
||||
<h4 align="center">
|
||||
<a href="https://pypi.org/project/litellm/" target="_blank">
|
||||
<img src="https://img.shields.io/pypi/v/litellm.svg" alt="PyPI Version">
|
||||
|
@ -28,7 +28,7 @@ LiteLLM manages:
|
|||
- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints
|
||||
- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`
|
||||
- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)
|
||||
- Track spend & set budgets per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||
- Set Budgets & Rate limits per project [OpenAI Proxy Server](https://docs.litellm.ai/docs/simple_proxy)
|
||||
|
||||
|
||||
[**Jump to OpenAI Proxy Docs**](https://github.com/BerriAI/litellm?tab=readme-ov-file#openai-proxy---docs) <br>
|
||||
|
|
|
@ -8,6 +8,7 @@ import TabItem from '@theme/TabItem';
|
|||
Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket
|
||||
|
||||
- [Async Custom Callbacks](#custom-callback-class-async)
|
||||
- [Async Custom Callback APIs](#custom-callback-apis-async)
|
||||
- [Logging to Langfuse](#logging-proxy-inputoutput---langfuse)
|
||||
- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets)
|
||||
- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb)
|
||||
|
@ -297,6 +298,106 @@ ModelResponse(
|
|||
```
|
||||
|
||||
|
||||
## Custom Callback APIs [Async]
|
||||
|
||||
:::info
|
||||
|
||||
This is an Enterprise only feature [Get Started with Enterprise here](https://github.com/BerriAI/litellm/tree/main/enterprise)
|
||||
|
||||
:::
|
||||
|
||||
Use this if you:
|
||||
- Want to use custom callbacks written in a non Python programming language
|
||||
- Want your callbacks to run on a different microservice
|
||||
|
||||
#### Step 1. Create your generic logging API endpoint
|
||||
Set up a generic API endpoint that can receive data in JSON format. The data will be included within a "data" field.
|
||||
|
||||
Your server should support the following Request format:
|
||||
|
||||
```shell
|
||||
curl --location https://your-domain.com/log-event \
|
||||
--request POST \
|
||||
--header "Content-Type: application/json" \
|
||||
--data '{
|
||||
"data": {
|
||||
"id": "chatcmpl-8sgE89cEQ4q9biRtxMvDfQU1O82PT",
|
||||
"call_type": "acompletion",
|
||||
"cache_hit": "None",
|
||||
"startTime": "2024-02-15 16:18:44.336280",
|
||||
"endTime": "2024-02-15 16:18:45.045539",
|
||||
"model": "gpt-3.5-turbo",
|
||||
"user": "ishaan-2",
|
||||
"modelParameters": "{'temperature': 0.7, 'max_tokens': 10, 'user': 'ishaan-2', 'extra_body': {}}",
|
||||
"messages": "[{'role': 'user', 'content': 'This is a test'}]",
|
||||
"response": "ModelResponse(id='chatcmpl-8sgE89cEQ4q9biRtxMvDfQU1O82PT', choices=[Choices(finish_reason='length', index=0, message=Message(content='Great! How can I assist you with this test', role='assistant'))], created=1708042724, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=Usage(completion_tokens=10, prompt_tokens=11, total_tokens=21))",
|
||||
"usage": "Usage(completion_tokens=10, prompt_tokens=11, total_tokens=21)",
|
||||
"metadata": "{}",
|
||||
"cost": "3.65e-05"
|
||||
}
|
||||
}'
|
||||
```
|
||||
|
||||
Reference FastAPI Python Server
|
||||
|
||||
Here's a reference FastAPI Server that is compatible with LiteLLM Proxy:
|
||||
|
||||
```python
|
||||
# this is an example endpoint to receive data from litellm
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.post("/log-event")
|
||||
async def log_event(request: Request):
|
||||
try:
|
||||
print("Received /log-event request")
|
||||
# Assuming the incoming request has JSON data
|
||||
data = await request.json()
|
||||
print("Received request data:")
|
||||
print(data)
|
||||
|
||||
# Your additional logic can go here
|
||||
# For now, just printing the received data
|
||||
|
||||
return {"message": "Request received successfully"}
|
||||
except Exception as e:
|
||||
print(f"Error processing request: {str(e)}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
raise HTTPException(status_code=500, detail="Internal Server Error")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="127.0.0.1", port=8000)
|
||||
|
||||
|
||||
```
|
||||
|
||||
|
||||
#### Step 2. Set your `GENERIC_LOGGER_ENDPOINT` to the endpoint + route we should send callback logs to
|
||||
|
||||
```shell
|
||||
os.environ["GENERIC_LOGGER_ENDPOINT"] = "http://localhost:8000/log-event"
|
||||
```
|
||||
|
||||
#### Step 3. Create a `config.yaml` file and set `litellm_settings`: `success_callback` = ["generic"]
|
||||
|
||||
Example litellm proxy config.yaml
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
litellm_settings:
|
||||
success_callback: ["generic"]
|
||||
```
|
||||
|
||||
Start the LiteLLM Proxy and make a test request to verify the logs reached your callback API
|
||||
|
||||
## Logging Proxy Input/Output - Langfuse
|
||||
We will use the `--config` to set `litellm.success_callback = ["langfuse"]` this will log all successfull LLM calls to langfuse
|
||||
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
## LiteLLM Enterprise
|
||||
|
||||
Code in this folder is licensed under a commercial license. Please review the [LICENSE](/LICENSE.md) file within the /enterprise folder
|
||||
Code in this folder is licensed under a commercial license. Please review the [LICENSE](./LICENSE.md) file within the /enterprise folder
|
||||
|
||||
**These features are covered under the LiteLLM Enterprise contract**
|
||||
|
||||
👉 **Using in an Enterprise / Need specific features ?** Meet with us [here](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat?month=2024-02)
|
||||
|
||||
## Features:
|
||||
- Custom API / microservice callbacks
|
||||
- Custom API / microservice callbacks
|
||||
- Google Text Moderation API
|
||||
|
||||
|
|
31
enterprise/callbacks/example_logging_api.py
Normal file
31
enterprise/callbacks/example_logging_api.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
# this is an example endpoint to receive data from litellm
|
||||
from fastapi import FastAPI, HTTPException, Request
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.post("/log-event")
|
||||
async def log_event(request: Request):
|
||||
try:
|
||||
print("Received /log-event request")
|
||||
# Assuming the incoming request has JSON data
|
||||
data = await request.json()
|
||||
print("Received request data:")
|
||||
print(data)
|
||||
|
||||
# Your additional logic can go here
|
||||
# For now, just printing the received data
|
||||
|
||||
return {"message": "Request received successfully"}
|
||||
except Exception as e:
|
||||
print(f"Error processing request: {str(e)}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
raise HTTPException(status_code=500, detail="Internal Server Error")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(app, host="127.0.0.1", port=8000)
|
128
enterprise/callbacks/generic_api_callback.py
Normal file
128
enterprise/callbacks/generic_api_callback.py
Normal file
|
@ -0,0 +1,128 @@
|
|||
# callback to make a request to an API endpoint
|
||||
|
||||
#### What this does ####
|
||||
# On success, logs events to Promptlayer
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
|
||||
from typing import Literal, Union
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
|
||||
|
||||
#### What this does ####
|
||||
# On success + failure, log events to Supabase
|
||||
|
||||
import dotenv, os
|
||||
import requests
|
||||
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
import datetime, subprocess, sys
|
||||
import litellm, uuid
|
||||
from litellm._logging import print_verbose, verbose_logger
|
||||
|
||||
|
||||
class GenericAPILogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self, endpoint=None, headers=None):
|
||||
try:
|
||||
if endpoint == None:
|
||||
# check env for "GENERIC_LOGGER_ENDPOINT"
|
||||
if os.getenv("GENERIC_LOGGER_ENDPOINT"):
|
||||
# Do something with the endpoint
|
||||
endpoint = os.getenv("GENERIC_LOGGER_ENDPOINT")
|
||||
else:
|
||||
# Handle the case when the endpoint is not found in the environment variables
|
||||
raise ValueError(
|
||||
f"endpoint not set for GenericAPILogger, GENERIC_LOGGER_ENDPOINT not found in environment variables"
|
||||
)
|
||||
headers = headers or litellm.generic_logger_headers
|
||||
self.endpoint = endpoint
|
||||
self.headers = headers
|
||||
|
||||
verbose_logger.debug(
|
||||
f"in init GenericAPILogger, endpoint {self.endpoint}, headers {self.headers}"
|
||||
)
|
||||
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
print_verbose(f"Got exception on init GenericAPILogger client {str(e)}")
|
||||
raise e
|
||||
|
||||
# This is sync, because we run this in a separate thread. Running in a sepearate thread ensures it will never block an LLM API call
|
||||
# Experience with s3, Langfuse shows that async logging events are complicated and can block LLM calls
|
||||
def log_event(
|
||||
self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
|
||||
):
|
||||
try:
|
||||
verbose_logger.debug(
|
||||
f"GenericAPILogger Logging - Enters logging function for model {kwargs}"
|
||||
)
|
||||
|
||||
# construct payload to send custom logger
|
||||
# follows the same params as langfuse.py
|
||||
litellm_params = kwargs.get("litellm_params", {})
|
||||
metadata = (
|
||||
litellm_params.get("metadata", {}) or {}
|
||||
) # if litellm_params['metadata'] == None
|
||||
messages = kwargs.get("messages")
|
||||
cost = kwargs.get("response_cost", 0.0)
|
||||
optional_params = kwargs.get("optional_params", {})
|
||||
call_type = kwargs.get("call_type", "litellm.completion")
|
||||
cache_hit = kwargs.get("cache_hit", False)
|
||||
usage = response_obj["usage"]
|
||||
id = response_obj.get("id", str(uuid.uuid4()))
|
||||
|
||||
# Build the initial payload
|
||||
payload = {
|
||||
"id": id,
|
||||
"call_type": call_type,
|
||||
"cache_hit": cache_hit,
|
||||
"startTime": start_time,
|
||||
"endTime": end_time,
|
||||
"model": kwargs.get("model", ""),
|
||||
"user": kwargs.get("user", ""),
|
||||
"modelParameters": optional_params,
|
||||
"messages": messages,
|
||||
"response": response_obj,
|
||||
"usage": usage,
|
||||
"metadata": metadata,
|
||||
"cost": cost,
|
||||
}
|
||||
|
||||
# Ensure everything in the payload is converted to str
|
||||
for key, value in payload.items():
|
||||
try:
|
||||
payload[key] = str(value)
|
||||
except:
|
||||
# non blocking if it can't cast to a str
|
||||
pass
|
||||
|
||||
import json
|
||||
|
||||
data = {
|
||||
"data": payload,
|
||||
}
|
||||
data = json.dumps(data)
|
||||
print_verbose(f"\nGeneric Logger - Logging payload = {data}")
|
||||
|
||||
# make request to endpoint with payload
|
||||
response = requests.post(self.endpoint, json=data, headers=self.headers)
|
||||
|
||||
response_status = response.status_code
|
||||
response_text = response.text
|
||||
|
||||
print_verbose(
|
||||
f"Generic Logger - final response status = {response_status}, response text = {response_text}"
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
verbose_logger.debug(f"Generic - {str(e)}\n{traceback.format_exc()}")
|
||||
pass
|
53
enterprise/hooks/google_text_moderation.py
Normal file
53
enterprise/hooks/google_text_moderation.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
# +-----------------------------------------------+
|
||||
#
|
||||
# Google Text Moderation
|
||||
# https://cloud.google.com/natural-language/docs/moderating-text
|
||||
#
|
||||
# +-----------------------------------------------+
|
||||
# Thank you users! We ❤️ you! - Krrish & Ishaan
|
||||
|
||||
|
||||
from typing import Optional, Literal, Union
|
||||
import litellm, traceback, sys, uuid
|
||||
from litellm.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from fastapi import HTTPException
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
StreamingChoices,
|
||||
)
|
||||
from datetime import datetime
|
||||
import aiohttp, asyncio
|
||||
|
||||
|
||||
class _ENTERPRISE_GoogleTextModeration(CustomLogger):
|
||||
user_api_key_cache = None
|
||||
|
||||
# Class variables or attributes
|
||||
def __init__(self, mock_testing: bool = False):
|
||||
pass
|
||||
|
||||
def print_verbose(self, print_statement):
|
||||
try:
|
||||
verbose_proxy_logger.debug(print_statement)
|
||||
if litellm.set_verbose:
|
||||
print(print_statement) # noqa
|
||||
except:
|
||||
pass
|
||||
|
||||
async def async_pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
cache: DualCache,
|
||||
data: dict,
|
||||
call_type: str,
|
||||
):
|
||||
"""
|
||||
- Calls Google's Text Moderation API
|
||||
- Rejects request if it fails safety check
|
||||
"""
|
||||
pass
|
|
@ -146,6 +146,7 @@ model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/mai
|
|||
suppress_debug_info = False
|
||||
dynamodb_table_name: Optional[str] = None
|
||||
s3_callback_params: Optional[Dict] = None
|
||||
generic_logger_headers: Optional[Dict] = None
|
||||
default_key_generate_params: Optional[Dict] = None
|
||||
upperbound_key_generate_params: Optional[Dict] = None
|
||||
default_team_settings: Optional[List] = None
|
||||
|
|
|
@ -98,6 +98,9 @@ def _get_redis_client_logic(**env_overrides):
|
|||
def get_redis_client(**env_overrides):
|
||||
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
||||
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||
redis_kwargs.pop(
|
||||
"connection_pool", None
|
||||
) # redis.from_url doesn't support setting your own connection pool
|
||||
return redis.Redis.from_url(**redis_kwargs)
|
||||
return redis.Redis(**redis_kwargs)
|
||||
|
||||
|
@ -105,6 +108,9 @@ def get_redis_client(**env_overrides):
|
|||
def get_redis_async_client(**env_overrides):
|
||||
redis_kwargs = _get_redis_client_logic(**env_overrides)
|
||||
if "url" in redis_kwargs and redis_kwargs["url"] is not None:
|
||||
redis_kwargs.pop(
|
||||
"connection_pool", None
|
||||
) # redis.from_url doesn't support setting your own connection pool
|
||||
return async_redis.Redis.from_url(**redis_kwargs)
|
||||
return async_redis.Redis(
|
||||
socket_timeout=5,
|
||||
|
|
|
@ -124,7 +124,7 @@ class RedisCache(BaseCache):
|
|||
self.redis_client.set(name=key, value=str(value), ex=ttl)
|
||||
except Exception as e:
|
||||
# NON blocking - notify users Redis is throwing an exception
|
||||
logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
||||
print_verbose("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
||||
|
||||
async def async_set_cache(self, key, value, **kwargs):
|
||||
_redis_client = self.init_async_client()
|
||||
|
@ -134,10 +134,12 @@ class RedisCache(BaseCache):
|
|||
f"Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
|
||||
)
|
||||
try:
|
||||
await redis_client.set(name=key, value=json.dumps(value), ex=ttl)
|
||||
await redis_client.set(
|
||||
name=key, value=json.dumps(value), ex=ttl, get=True
|
||||
)
|
||||
except Exception as e:
|
||||
# NON blocking - notify users Redis is throwing an exception
|
||||
logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
||||
print_verbose("LiteLLM Caching: set() - Got exception from REDIS : ", e)
|
||||
|
||||
async def async_set_cache_pipeline(self, cache_list, ttl=None):
|
||||
"""
|
||||
|
|
|
@ -259,6 +259,7 @@ class LangFuseLogger:
|
|||
if key in [
|
||||
"user_api_key",
|
||||
"user_api_key_user_id",
|
||||
"user_api_key_team_id",
|
||||
"semantic-similarity",
|
||||
]:
|
||||
tags.append(f"{key}:{value}")
|
||||
|
|
|
@ -343,24 +343,31 @@ def completion(
|
|||
llm_model = CodeChatModel.from_pretrained(model)
|
||||
mode = "chat"
|
||||
request_str += f"llm_model = CodeChatModel.from_pretrained({model})\n"
|
||||
else: # assume vertex model garden
|
||||
client = aiplatform.gapic.PredictionServiceClient(
|
||||
client_options=client_options
|
||||
elif model == "private":
|
||||
mode = "private"
|
||||
model = optional_params.pop("model_id", None)
|
||||
# private endpoint requires a dict instead of JSON
|
||||
instances = [optional_params.copy()]
|
||||
instances[0]["prompt"] = prompt
|
||||
llm_model = aiplatform.PrivateEndpoint(
|
||||
endpoint_name=model,
|
||||
project=vertex_project,
|
||||
location=vertex_location,
|
||||
)
|
||||
request_str += f"llm_model = aiplatform.PrivateEndpoint(endpoint_name={model}, project={vertex_project}, location={vertex_location})\n"
|
||||
else: # assume vertex model garden on public endpoint
|
||||
mode = "custom"
|
||||
|
||||
instances = [optional_params]
|
||||
instances = [optional_params.copy()]
|
||||
instances[0]["prompt"] = prompt
|
||||
instances = [
|
||||
json_format.ParseDict(instance_dict, Value())
|
||||
for instance_dict in instances
|
||||
]
|
||||
llm_model = client.endpoint_path(
|
||||
project=vertex_project, location=vertex_location, endpoint=model
|
||||
)
|
||||
|
||||
mode = "custom"
|
||||
request_str += f"llm_model = client.endpoint_path(project={vertex_project}, location={vertex_location}, endpoint={model})\n"
|
||||
# Will determine the API used based on async parameter
|
||||
llm_model = None
|
||||
|
||||
# NOTE: async prediction and streaming under "private" mode isn't supported by aiplatform right now
|
||||
if acompletion == True:
|
||||
data = {
|
||||
"llm_model": llm_model,
|
||||
|
@ -532,9 +539,6 @@ def completion(
|
|||
"""
|
||||
Vertex AI Model Garden
|
||||
"""
|
||||
request_str += (
|
||||
f"client.predict(endpoint={llm_model}, instances={instances})\n"
|
||||
)
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
|
@ -544,11 +548,21 @@ def completion(
|
|||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
|
||||
response = client.predict(
|
||||
endpoint=llm_model,
|
||||
instances=instances,
|
||||
llm_model = aiplatform.gapic.PredictionServiceClient(
|
||||
client_options=client_options
|
||||
)
|
||||
request_str += f"llm_model = aiplatform.gapic.PredictionServiceClient(client_options={client_options})\n"
|
||||
endpoint_path = llm_model.endpoint_path(
|
||||
project=vertex_project, location=vertex_location, endpoint=model
|
||||
)
|
||||
request_str += (
|
||||
f"llm_model.predict(endpoint={endpoint_path}, instances={instances})\n"
|
||||
)
|
||||
response = llm_model.predict(
|
||||
endpoint=endpoint_path,
|
||||
instances=instances
|
||||
).predictions
|
||||
|
||||
completion_response = response[0]
|
||||
if (
|
||||
isinstance(completion_response, str)
|
||||
|
@ -558,6 +572,36 @@ def completion(
|
|||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
response = TextStreamer(completion_response)
|
||||
return response
|
||||
elif mode == "private":
|
||||
"""
|
||||
Vertex AI Model Garden deployed on private endpoint
|
||||
"""
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
api_key=None,
|
||||
additional_args={
|
||||
"complete_input_dict": optional_params,
|
||||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
request_str += (
|
||||
f"llm_model.predict(instances={instances})\n"
|
||||
)
|
||||
response = llm_model.predict(
|
||||
instances=instances
|
||||
).predictions
|
||||
|
||||
completion_response = response[0]
|
||||
if (
|
||||
isinstance(completion_response, str)
|
||||
and "\nOutput:\n" in completion_response
|
||||
):
|
||||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
response = TextStreamer(completion_response)
|
||||
return response
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt, api_key=None, original_response=completion_response
|
||||
|
@ -722,17 +766,6 @@ async def async_completion(
|
|||
Vertex AI Model Garden
|
||||
"""
|
||||
from google.cloud import aiplatform
|
||||
|
||||
async_client = aiplatform.gapic.PredictionServiceAsyncClient(
|
||||
client_options=client_options
|
||||
)
|
||||
llm_model = async_client.endpoint_path(
|
||||
project=vertex_project, location=vertex_location, endpoint=model
|
||||
)
|
||||
|
||||
request_str += (
|
||||
f"client.predict(endpoint={llm_model}, instances={instances})\n"
|
||||
)
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
|
@ -743,8 +776,18 @@ async def async_completion(
|
|||
},
|
||||
)
|
||||
|
||||
response_obj = await async_client.predict(
|
||||
endpoint=llm_model,
|
||||
llm_model = aiplatform.gapic.PredictionServiceAsyncClient(
|
||||
client_options=client_options
|
||||
)
|
||||
request_str += f"llm_model = aiplatform.gapic.PredictionServiceAsyncClient(client_options={client_options})\n"
|
||||
endpoint_path = llm_model.endpoint_path(
|
||||
project=vertex_project, location=vertex_location, endpoint=model
|
||||
)
|
||||
request_str += (
|
||||
f"llm_model.predict(endpoint={endpoint_path}, instances={instances})\n"
|
||||
)
|
||||
response_obj = await llm_model.predict(
|
||||
endpoint=endpoint_path,
|
||||
instances=instances,
|
||||
)
|
||||
response = response_obj.predictions
|
||||
|
@ -754,6 +797,23 @@ async def async_completion(
|
|||
and "\nOutput:\n" in completion_response
|
||||
):
|
||||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||
|
||||
elif mode == "private":
|
||||
request_str += (
|
||||
f"llm_model.predict_async(instances={instances})\n"
|
||||
)
|
||||
response_obj = await llm_model.predict_async(
|
||||
instances=instances,
|
||||
)
|
||||
|
||||
response = response_obj.predictions
|
||||
completion_response = response[0]
|
||||
if (
|
||||
isinstance(completion_response, str)
|
||||
and "\nOutput:\n" in completion_response
|
||||
):
|
||||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||
|
||||
## LOGGING
|
||||
logging_obj.post_call(
|
||||
input=prompt, api_key=None, original_response=completion_response
|
||||
|
@ -894,15 +954,8 @@ async def async_streaming(
|
|||
response = llm_model.predict_streaming_async(prompt, **optional_params)
|
||||
elif mode == "custom":
|
||||
from google.cloud import aiplatform
|
||||
stream = optional_params.pop("stream", None)
|
||||
|
||||
async_client = aiplatform.gapic.PredictionServiceAsyncClient(
|
||||
client_options=client_options
|
||||
)
|
||||
llm_model = async_client.endpoint_path(
|
||||
project=vertex_project, location=vertex_location, endpoint=model
|
||||
)
|
||||
|
||||
request_str += f"client.predict(endpoint={llm_model}, instances={instances})\n"
|
||||
## LOGGING
|
||||
logging_obj.pre_call(
|
||||
input=prompt,
|
||||
|
@ -912,9 +965,34 @@ async def async_streaming(
|
|||
"request_str": request_str,
|
||||
},
|
||||
)
|
||||
llm_model = aiplatform.gapic.PredictionServiceAsyncClient(
|
||||
client_options=client_options
|
||||
)
|
||||
request_str += f"llm_model = aiplatform.gapic.PredictionServiceAsyncClient(client_options={client_options})\n"
|
||||
endpoint_path = llm_model.endpoint_path(
|
||||
project=vertex_project, location=vertex_location, endpoint=model
|
||||
)
|
||||
request_str += f"client.predict(endpoint={endpoint_path}, instances={instances})\n"
|
||||
response_obj = await llm_model.predict(
|
||||
endpoint=endpoint_path,
|
||||
instances=instances,
|
||||
)
|
||||
|
||||
response_obj = await async_client.predict(
|
||||
endpoint=llm_model,
|
||||
response = response_obj.predictions
|
||||
completion_response = response[0]
|
||||
if (
|
||||
isinstance(completion_response, str)
|
||||
and "\nOutput:\n" in completion_response
|
||||
):
|
||||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||
if stream:
|
||||
response = TextStreamer(completion_response)
|
||||
|
||||
elif mode == "private":
|
||||
stream = optional_params.pop("stream", None)
|
||||
_ = instances[0].pop("stream", None)
|
||||
request_str += f"llm_model.predict_async(instances={instances})\n"
|
||||
response_obj = await llm_model.predict_async(
|
||||
instances=instances,
|
||||
)
|
||||
response = response_obj.predictions
|
||||
|
@ -924,8 +1002,9 @@ async def async_streaming(
|
|||
and "\nOutput:\n" in completion_response
|
||||
):
|
||||
completion_response = completion_response.split("\nOutput:\n", 1)[1]
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
if stream:
|
||||
response = TextStreamer(completion_response)
|
||||
|
||||
streamwrapper = CustomStreamWrapper(
|
||||
completion_stream=response,
|
||||
model=model,
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
import os, openai, sys, json, inspect, uuid, datetime, threading
|
||||
from typing import Any, Literal, Union
|
||||
from functools import partial
|
||||
|
||||
import dotenv, traceback, random, asyncio, time, contextvars
|
||||
from copy import deepcopy
|
||||
import httpx
|
||||
|
|
|
@ -642,21 +642,40 @@
|
|||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro": {
|
||||
"max_tokens": 30720,
|
||||
"max_tokens": 32760,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.0-pro": {
|
||||
"max_tokens": 32760,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro-vision": {
|
||||
"max_tokens": 30720,
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-vision-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.0-pro-vision": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
"max_images_per_prompt": 16,
|
||||
"max_videos_per_prompt": 1,
|
||||
"max_video_length": 2,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-vision-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"textembedding-gecko": {
|
||||
"max_tokens": 3072,
|
||||
"max_input_tokens": 3072,
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-fcb69349f15d154b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"lLFQRQnIrRo-GJf5spHEd\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-fcb69349f15d154b.js"],""]
|
||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["lLFQRQnIrRo-GJf5spHEd",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -819,6 +819,7 @@ async def _PROXY_track_cost_callback(
|
|||
user_id = user_id or kwargs["litellm_params"]["metadata"].get(
|
||||
"user_api_key_user_id", None
|
||||
)
|
||||
team_id = kwargs["litellm_params"]["metadata"].get("user_api_key_team_id", None)
|
||||
if kwargs.get("response_cost", None) is not None:
|
||||
response_cost = kwargs["response_cost"]
|
||||
user_api_key = kwargs["litellm_params"]["metadata"].get(
|
||||
|
@ -842,6 +843,7 @@ async def _PROXY_track_cost_callback(
|
|||
token=user_api_key,
|
||||
response_cost=response_cost,
|
||||
user_id=user_id,
|
||||
team_id=team_id,
|
||||
kwargs=kwargs,
|
||||
completion_response=completion_response,
|
||||
start_time=start_time,
|
||||
|
@ -879,6 +881,7 @@ async def update_database(
|
|||
token,
|
||||
response_cost,
|
||||
user_id=None,
|
||||
team_id=None,
|
||||
kwargs=None,
|
||||
completion_response=None,
|
||||
start_time=None,
|
||||
|
@ -886,7 +889,7 @@ async def update_database(
|
|||
):
|
||||
try:
|
||||
verbose_proxy_logger.info(
|
||||
f"Enters prisma db call, response_cost: {response_cost}, token: {token}; user_id: {user_id}"
|
||||
f"Enters prisma db call, response_cost: {response_cost}, token: {token}; user_id: {user_id}; team_id: {team_id}"
|
||||
)
|
||||
|
||||
### [TODO] STEP 1: GET KEY + USER SPEND ### (key, user)
|
||||
|
@ -1039,8 +1042,69 @@ async def update_database(
|
|||
except Exception as e:
|
||||
verbose_proxy_logger.info(f"Update Spend Logs DB failed to execute")
|
||||
|
||||
### UPDATE KEY SPEND ###
|
||||
async def _update_team_db():
|
||||
try:
|
||||
verbose_proxy_logger.debug(
|
||||
f"adding spend to team db. Response cost: {response_cost}. team_id: {team_id}."
|
||||
)
|
||||
if team_id is None:
|
||||
verbose_proxy_logger.debug(
|
||||
"track_cost_callback: team_id is None. Not tracking spend for team"
|
||||
)
|
||||
return
|
||||
if prisma_client is not None:
|
||||
# Fetch the existing cost for the given token
|
||||
existing_spend_obj = await prisma_client.get_data(
|
||||
team_id=team_id, table_name="team"
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
f"_update_team_db: existing spend: {existing_spend_obj}"
|
||||
)
|
||||
if existing_spend_obj is None:
|
||||
existing_spend = 0
|
||||
else:
|
||||
existing_spend = existing_spend_obj.spend
|
||||
# Calculate the new cost by adding the existing cost and response_cost
|
||||
new_spend = existing_spend + response_cost
|
||||
|
||||
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
||||
# Update the cost column for the given token
|
||||
await prisma_client.update_data(
|
||||
team_id=team_id, data={"spend": new_spend}, table_name="team"
|
||||
)
|
||||
|
||||
elif custom_db_client is not None:
|
||||
# Fetch the existing cost for the given token
|
||||
existing_spend_obj = await custom_db_client.get_data(
|
||||
key=token, table_name="key"
|
||||
)
|
||||
verbose_proxy_logger.debug(
|
||||
f"_update_key_db existing spend: {existing_spend_obj}"
|
||||
)
|
||||
if existing_spend_obj is None:
|
||||
existing_spend = 0
|
||||
else:
|
||||
existing_spend = existing_spend_obj.spend
|
||||
# Calculate the new cost by adding the existing cost and response_cost
|
||||
new_spend = existing_spend + response_cost
|
||||
|
||||
verbose_proxy_logger.debug(f"new cost: {new_spend}")
|
||||
# Update the cost column for the given token
|
||||
await custom_db_client.update_data(
|
||||
key=token, value={"spend": new_spend}, table_name="key"
|
||||
)
|
||||
|
||||
valid_token = user_api_key_cache.get_cache(key=token)
|
||||
if valid_token is not None:
|
||||
valid_token.spend = new_spend
|
||||
user_api_key_cache.set_cache(key=token, value=valid_token)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.info(f"Update Team DB failed to execute")
|
||||
|
||||
asyncio.create_task(_update_user_db())
|
||||
asyncio.create_task(_update_key_db())
|
||||
asyncio.create_task(_update_team_db())
|
||||
asyncio.create_task(_insert_spend_log_to_db())
|
||||
verbose_proxy_logger.info("Successfully updated spend in all 3 tables")
|
||||
except Exception as e:
|
||||
|
@ -2143,6 +2207,9 @@ async def completion(
|
|||
data["metadata"]["user_api_key"] = user_api_key_dict.api_key
|
||||
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
|
||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||
user_api_key_dict, "team_id", None
|
||||
)
|
||||
_headers = dict(request.headers)
|
||||
_headers.pop(
|
||||
"authorization", None
|
||||
|
@ -2306,6 +2373,9 @@ async def chat_completion(
|
|||
data["metadata"] = {}
|
||||
data["metadata"]["user_api_key"] = user_api_key_dict.api_key
|
||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||
user_api_key_dict, "team_id", None
|
||||
)
|
||||
data["metadata"]["user_api_key_metadata"] = user_api_key_dict.metadata
|
||||
_headers = dict(request.headers)
|
||||
_headers.pop(
|
||||
|
@ -2527,6 +2597,9 @@ async def embeddings(
|
|||
) # do not store the original `sk-..` api key in the db
|
||||
data["metadata"]["headers"] = _headers
|
||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||
user_api_key_dict, "team_id", None
|
||||
)
|
||||
data["metadata"]["endpoint"] = str(request.url)
|
||||
|
||||
### TEAM-SPECIFIC PARAMS ###
|
||||
|
@ -2698,6 +2771,9 @@ async def image_generation(
|
|||
) # do not store the original `sk-..` api key in the db
|
||||
data["metadata"]["headers"] = _headers
|
||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||
user_api_key_dict, "team_id", None
|
||||
)
|
||||
data["metadata"]["endpoint"] = str(request.url)
|
||||
|
||||
### TEAM-SPECIFIC PARAMS ###
|
||||
|
@ -2853,6 +2929,9 @@ async def moderations(
|
|||
) # do not store the original `sk-..` api key in the db
|
||||
data["metadata"]["headers"] = _headers
|
||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||
user_api_key_dict, "team_id", None
|
||||
)
|
||||
data["metadata"]["endpoint"] = str(request.url)
|
||||
|
||||
### TEAM-SPECIFIC PARAMS ###
|
||||
|
@ -4208,6 +4287,9 @@ async def async_queue_request(
|
|||
) # do not store the original `sk-..` api key in the db
|
||||
data["metadata"]["headers"] = _headers
|
||||
data["metadata"]["user_api_key_user_id"] = user_api_key_dict.user_id
|
||||
data["metadata"]["user_api_key_team_id"] = getattr(
|
||||
user_api_key_dict, "team_id", None
|
||||
)
|
||||
data["metadata"]["endpoint"] = str(request.url)
|
||||
|
||||
global user_temperature, user_request_timeout, user_max_tokens, user_api_base
|
||||
|
|
|
@ -808,8 +808,9 @@ class PrismaClient:
|
|||
data: dict = {},
|
||||
data_list: Optional[List] = None,
|
||||
user_id: Optional[str] = None,
|
||||
team_id: Optional[str] = None,
|
||||
query_type: Literal["update", "update_many"] = "update",
|
||||
table_name: Optional[Literal["user", "key", "config", "spend"]] = None,
|
||||
table_name: Optional[Literal["user", "key", "config", "spend", "team"]] = None,
|
||||
update_key_values: Optional[dict] = None,
|
||||
):
|
||||
"""
|
||||
|
@ -860,6 +861,35 @@ class PrismaClient:
|
|||
+ "\033[0m"
|
||||
)
|
||||
return {"user_id": user_id, "data": db_data}
|
||||
elif (
|
||||
team_id is not None
|
||||
or (table_name is not None and table_name == "team")
|
||||
and query_type == "update"
|
||||
):
|
||||
"""
|
||||
If data['spend'] + data['user'], update the user table with spend info as well
|
||||
"""
|
||||
if team_id is None:
|
||||
team_id = db_data["team_id"]
|
||||
if update_key_values is None:
|
||||
update_key_values = db_data
|
||||
if "team_id" not in db_data and team_id is not None:
|
||||
db_data["team_id"] = team_id
|
||||
update_team_row = await self.db.litellm_teamtable.upsert(
|
||||
where={"team_id": team_id}, # type: ignore
|
||||
data={
|
||||
"create": {**db_data}, # type: ignore
|
||||
"update": {
|
||||
**update_key_values # type: ignore
|
||||
}, # just update user-specified values, if it already exists
|
||||
},
|
||||
)
|
||||
verbose_proxy_logger.info(
|
||||
"\033[91m"
|
||||
+ f"DB Team Table - update succeeded {update_team_row}"
|
||||
+ "\033[0m"
|
||||
)
|
||||
return {"team_id": team_id, "data": db_data}
|
||||
elif (
|
||||
table_name is not None
|
||||
and table_name == "key"
|
||||
|
|
46
litellm/tests/test_custom_api_logger.py
Normal file
46
litellm/tests/test_custom_api_logger.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
import sys
|
||||
import os
|
||||
import io, asyncio
|
||||
|
||||
# import logging
|
||||
# logging.basicConfig(level=logging.DEBUG)
|
||||
sys.path.insert(0, os.path.abspath("../.."))
|
||||
print("Modified sys.path:", sys.path)
|
||||
|
||||
|
||||
from litellm import completion
|
||||
import litellm
|
||||
|
||||
litellm.num_retries = 3
|
||||
|
||||
import time, random
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.skip(reason="new beta feature, will be testing in our ci/cd soon")
|
||||
async def test_custom_api_logging():
|
||||
try:
|
||||
litellm.success_callback = ["generic"]
|
||||
litellm.set_verbose = True
|
||||
os.environ["GENERIC_LOGGER_ENDPOINT"] = "http://localhost:8000/log-event"
|
||||
|
||||
print("Testing generic api logging")
|
||||
|
||||
await litellm.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": f"This is a test"}],
|
||||
max_tokens=10,
|
||||
temperature=0.7,
|
||||
user="ishaan-2",
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
finally:
|
||||
# post, close log file and verify
|
||||
# Reset stdout to the original value
|
||||
print("Passed! Testing async s3 logging")
|
||||
|
||||
|
||||
# test_s3_logging()
|
|
@ -44,9 +44,9 @@ except:
|
|||
filename = str(
|
||||
resources.files(litellm).joinpath("llms/tokenizers") # for python 3.10
|
||||
) # for python 3.10+
|
||||
os.environ["TIKTOKEN_CACHE_DIR"] = (
|
||||
filename # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
||||
)
|
||||
os.environ[
|
||||
"TIKTOKEN_CACHE_DIR"
|
||||
] = filename # use local copy of tiktoken b/c of - https://github.com/BerriAI/litellm/issues/1071
|
||||
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
import importlib.metadata
|
||||
|
@ -4256,7 +4256,14 @@ def get_optional_params(
|
|||
optional_params["stop_sequences"] = stop
|
||||
if max_tokens is not None:
|
||||
optional_params["max_output_tokens"] = max_tokens
|
||||
elif custom_llm_provider == "vertex_ai":
|
||||
elif custom_llm_provider == "vertex_ai" and model in (
|
||||
litellm.vertex_chat_models
|
||||
or model in litellm.vertex_code_chat_models
|
||||
or model in litellm.vertex_text_models
|
||||
or model in litellm.vertex_code_text_models
|
||||
or model in litellm.vertex_language_models
|
||||
or model in litellm.vertex_embedding_models
|
||||
):
|
||||
## check if unsupported param passed in
|
||||
supported_params = [
|
||||
"temperature",
|
||||
|
|
|
@ -642,21 +642,40 @@
|
|||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro": {
|
||||
"max_tokens": 30720,
|
||||
"max_tokens": 32760,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.0-pro": {
|
||||
"max_tokens": 32760,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-pro-vision": {
|
||||
"max_tokens": 30720,
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-vision-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"gemini-1.0-pro-vision": {
|
||||
"max_tokens": 16384,
|
||||
"max_output_tokens": 2048,
|
||||
"max_images_per_prompt": 16,
|
||||
"max_videos_per_prompt": 1,
|
||||
"max_video_length": 2,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.0000005,
|
||||
"litellm_provider": "vertex_ai-vision-models",
|
||||
"mode": "chat"
|
||||
},
|
||||
"textembedding-gecko": {
|
||||
"max_tokens": 3072,
|
||||
"max_input_tokens": 3072,
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.24.0"
|
||||
version = "1.24.3"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.24.0"
|
||||
version = "1.24.3"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-fcb69349f15d154b.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"lLFQRQnIrRo-GJf5spHEd\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-a85b2c176012d8e5.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-e1b183dda365ec86.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>🚅 LiteLLM</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-db47c93f042d6d15.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/c18941d97fb7245b.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48016,[\"145\",\"static/chunks/145-9c160ad5539e000f.js\",\"931\",\"static/chunks/app/page-7bb820bd6902dbf2.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/c18941d97fb7245b.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"unBuvDqydg0yodtP5c3nQ\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_c23dc8\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"🚅 LiteLLM\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-fcb69349f15d154b.js"],""]
|
||||
3:I[48016,["145","static/chunks/145-9c160ad5539e000f.js","931","static/chunks/app/page-7bb820bd6902dbf2.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["lLFQRQnIrRo-GJf5spHEd",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["unBuvDqydg0yodtP5c3nQ",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_c23dc8","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/c18941d97fb7245b.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"🚅 LiteLLM"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -21,9 +21,13 @@ interface ChatUIProps {
|
|||
}
|
||||
|
||||
async function generateModelResponse(inputMessage: string, updateUI: (chunk: string) => void, selectedModel: string, accessToken: string) {
|
||||
const client = new openai.OpenAI({
|
||||
// base url should be the current base_url
|
||||
const isLocal = process.env.NODE_ENV === "development";
|
||||
console.log("isLocal:", isLocal);
|
||||
const proxyBaseUrl = isLocal ? "http://localhost:4000" : window.location.origin;
|
||||
const client = new openai.OpenAI({
|
||||
apiKey: accessToken, // Replace with your OpenAI API key
|
||||
baseURL: 'http://0.0.0.0:4000', // Replace with your OpenAI API base URL
|
||||
baseURL: proxyBaseUrl, // Replace with your OpenAI API base URL
|
||||
dangerouslyAllowBrowser: true, // using a temporary litellm proxy key
|
||||
});
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue