forked from phoenix/litellm-mirror
Merge branch 'BerriAI:main' into main
This commit is contained in:
commit
d54d4b6734
81 changed files with 2022 additions and 1231 deletions
10
.github/workflows/ghcr_deploy.yml
vendored
10
.github/workflows/ghcr_deploy.yml
vendored
|
@ -289,7 +289,8 @@ jobs:
|
|||
repo: context.repo.repo,
|
||||
release_id: process.env.RELEASE_ID,
|
||||
});
|
||||
return response.data.body;
|
||||
const formattedBody = JSON.stringify(response.data.body).slice(1, -1);
|
||||
return formattedBody;
|
||||
} catch (error) {
|
||||
core.setFailed(error.message);
|
||||
}
|
||||
|
@ -302,14 +303,15 @@ jobs:
|
|||
RELEASE_NOTES: ${{ steps.release-notes.outputs.result }}
|
||||
run: |
|
||||
curl -H "Content-Type: application/json" -X POST -d '{
|
||||
"content": "New LiteLLM release ${{ env.RELEASE_TAG }}",
|
||||
"content": "New LiteLLM release '"${RELEASE_TAG}"'",
|
||||
"username": "Release Changelog",
|
||||
"avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png",
|
||||
"embeds": [
|
||||
{
|
||||
"title": "Changelog for LiteLLM ${{ env.RELEASE_TAG }}",
|
||||
"description": "${{ env.RELEASE_NOTES }}",
|
||||
"title": "Changelog for LiteLLM '"${RELEASE_TAG}"'",
|
||||
"description": "'"${RELEASE_NOTES}"'",
|
||||
"color": 2105893
|
||||
}
|
||||
]
|
||||
}' $WEBHOOK_URL
|
||||
|
||||
|
|
|
@ -25,6 +25,10 @@ repos:
|
|||
exclude: ^litellm/tests/|^litellm/proxy/tests/
|
||||
additional_dependencies: [flake8-print]
|
||||
files: litellm/.*\.py
|
||||
- repo: https://github.com/python-poetry/poetry
|
||||
rev: 1.8.0
|
||||
hooks:
|
||||
- id: poetry-check
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: check-files-match
|
||||
|
|
|
@ -151,12 +151,9 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
|
|||
</Tabs>
|
||||
|
||||
## ✨ (Enterprise) API Endpoints to get Spend
|
||||
#### Getting Spend Reports - To Charge Other Teams, Customers
|
||||
#### Getting Spend Reports - To Charge Other Teams, Customers, Users
|
||||
|
||||
Use the `/global/spend/report` endpoint to get daily spend report per
|
||||
- Team
|
||||
- Customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
|
||||
- [LiteLLM API key](virtual_keys.md)
|
||||
Use the `/global/spend/report` endpoint to get spend reports
|
||||
|
||||
<Tabs>
|
||||
|
||||
|
@ -285,6 +282,16 @@ Output from script
|
|||
|
||||
<TabItem value="per customer" label="Spend Per Customer">
|
||||
|
||||
:::info
|
||||
|
||||
Customer This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
|
||||
|
||||
[this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
|
||||
- [LiteLLM API key](virtual_keys.md)
|
||||
|
||||
|
||||
:::
|
||||
|
||||
##### Example Request
|
||||
|
||||
👉 Key Change: Specify `group_by=customer`
|
||||
|
@ -341,14 +348,14 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
|
|||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="per key" label="Spend Per API Key">
|
||||
<TabItem value="per key" label="Spend for Specific API Key">
|
||||
|
||||
|
||||
👉 Key Change: Specify `group_by=api_key`
|
||||
👉 Key Change: Specify `api_key=sk-1234`
|
||||
|
||||
|
||||
```shell
|
||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=api_key' \
|
||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&api_key=sk-1234' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
|
@ -357,32 +364,18 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
|
|||
|
||||
```shell
|
||||
[
|
||||
{
|
||||
"api_key": "ad64768847d05d978d62f623d872bff0f9616cc14b9c1e651c84d14fe3b9f539",
|
||||
"total_cost": 0.0002157,
|
||||
"total_input_tokens": 45.0,
|
||||
"total_output_tokens": 1375.0,
|
||||
"model_details": [
|
||||
{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"total_cost": 0.0001095,
|
||||
"total_input_tokens": 9,
|
||||
"total_output_tokens": 70
|
||||
},
|
||||
{
|
||||
"model": "llama3-8b-8192",
|
||||
"total_cost": 0.0001062,
|
||||
"total_input_tokens": 36,
|
||||
"total_output_tokens": 1305
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||
"total_cost": 0.00012924,
|
||||
"total_cost": 0.3201286305151999,
|
||||
"total_input_tokens": 36.0,
|
||||
"total_output_tokens": 1593.0,
|
||||
"model_details": [
|
||||
{
|
||||
"model": "dall-e-3",
|
||||
"total_cost": 0.31999939051519993,
|
||||
"total_input_tokens": 0,
|
||||
"total_output_tokens": 0
|
||||
},
|
||||
{
|
||||
"model": "llama3-8b-8192",
|
||||
"total_cost": 0.00012924,
|
||||
|
@ -396,6 +389,87 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
|
|||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="per user" label="Spend for Internal User (Key Owner)">
|
||||
|
||||
:::info
|
||||
|
||||
Internal User (Key Owner): This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
|
||||
|
||||
:::
|
||||
|
||||
|
||||
👉 Key Change: Specify `internal_user_id=ishaan`
|
||||
|
||||
|
||||
```shell
|
||||
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-12-30&internal_user_id=ishaan' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
```
|
||||
|
||||
##### Example Response
|
||||
|
||||
|
||||
```shell
|
||||
[
|
||||
{
|
||||
"api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
|
||||
"total_cost": 0.00013132,
|
||||
"total_input_tokens": 105.0,
|
||||
"total_output_tokens": 872.0,
|
||||
"model_details": [
|
||||
{
|
||||
"model": "gpt-3.5-turbo-instruct",
|
||||
"total_cost": 5.85e-05,
|
||||
"total_input_tokens": 15,
|
||||
"total_output_tokens": 18
|
||||
},
|
||||
{
|
||||
"model": "llama3-8b-8192",
|
||||
"total_cost": 7.282000000000001e-05,
|
||||
"total_input_tokens": 90,
|
||||
"total_output_tokens": 854
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"api_key": "151e85e46ab8c9c7fad090793e3fe87940213f6ae665b543ca633b0b85ba6dc6",
|
||||
"total_cost": 5.2699999999999993e-05,
|
||||
"total_input_tokens": 26.0,
|
||||
"total_output_tokens": 27.0,
|
||||
"model_details": [
|
||||
{
|
||||
"model": "gpt-3.5-turbo",
|
||||
"total_cost": 5.2499999999999995e-05,
|
||||
"total_input_tokens": 24,
|
||||
"total_output_tokens": 27
|
||||
},
|
||||
{
|
||||
"model": "text-embedding-ada-002",
|
||||
"total_cost": 2e-07,
|
||||
"total_input_tokens": 2,
|
||||
"total_output_tokens": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"api_key": "60cb83a2dcbf13531bd27a25f83546ecdb25a1a6deebe62d007999dc00e1e32a",
|
||||
"total_cost": 9.42e-06,
|
||||
"total_input_tokens": 30.0,
|
||||
"total_output_tokens": 99.0,
|
||||
"model_details": [
|
||||
{
|
||||
"model": "llama3-8b-8192",
|
||||
"total_cost": 9.42e-06,
|
||||
"total_input_tokens": 30,
|
||||
"total_output_tokens": 99
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
#### Allowing Non-Proxy Admins to access `/spend` endpoints
|
||||
|
|
|
@ -1120,12 +1120,14 @@ This is a beta feature, and subject to changes.
|
|||
USE_AWS_KMS="True"
|
||||
```
|
||||
|
||||
**Step 2.** Add `aws_kms/` to encrypted keys in env
|
||||
**Step 2.** Add `LITELLM_SECRET_AWS_KMS_` to encrypted keys in env
|
||||
|
||||
```env
|
||||
DATABASE_URL="aws_kms/AQICAH.."
|
||||
LITELLM_SECRET_AWS_KMS_DATABASE_URL="AQICAH.."
|
||||
```
|
||||
|
||||
LiteLLM will find this and use the decrypted `DATABASE_URL="postgres://.."` value in runtime.
|
||||
|
||||
**Step 3.** Start proxy
|
||||
|
||||
```
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import Tabs from '@theme/Tabs';
|
||||
import TabItem from '@theme/TabItem';
|
||||
|
||||
# Use with Langchain, OpenAI SDK, LlamaIndex, Curl
|
||||
# Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl
|
||||
|
||||
:::info
|
||||
|
||||
|
@ -173,6 +173,37 @@ console.log(message);
|
|||
|
||||
```
|
||||
|
||||
</TabItem>
|
||||
<TabItem value="instructor" label="Instructor">
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
import instructor
|
||||
from pydantic import BaseModel
|
||||
|
||||
my_proxy_api_key = "" # e.g. sk-1234
|
||||
my_proxy_base_url = "" # e.g. http://0.0.0.0:4000
|
||||
|
||||
# This enables response_model keyword
|
||||
# from client.chat.completions.create
|
||||
client = instructor.from_openai(OpenAI(api_key=my_proxy_api_key, base_url=my_proxy_base_url))
|
||||
|
||||
class UserDetail(BaseModel):
|
||||
name: str
|
||||
age: int
|
||||
|
||||
user = client.chat.completions.create(
|
||||
model="gemini-pro-flash",
|
||||
response_model=UserDetail,
|
||||
messages=[
|
||||
{"role": "user", "content": "Extract Jason is 25 years old"},
|
||||
]
|
||||
)
|
||||
|
||||
assert isinstance(user, UserDetail)
|
||||
assert user.name == "Jason"
|
||||
assert user.age == 25
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
|
@ -205,6 +236,97 @@ console.log(message);
|
|||
|
||||
```
|
||||
|
||||
### Function Calling
|
||||
|
||||
Here's some examples of doing function calling with the proxy.
|
||||
|
||||
You can use the proxy for function calling with **any** openai-compatible project.
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="curl" label="curl">
|
||||
|
||||
```bash
|
||||
curl http://0.0.0.0:4000/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer $OPTIONAL_YOUR_PROXY_KEY" \
|
||||
-d '{
|
||||
"model": "gpt-4-turbo",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What'\''s the weather like in Boston today?"
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA"
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"]
|
||||
}
|
||||
},
|
||||
"required": ["location"]
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto"
|
||||
}'
|
||||
```
|
||||
</TabItem>
|
||||
<TabItem value="sdk" label="SDK">
|
||||
|
||||
```python
|
||||
from openai import OpenAI
|
||||
client = OpenAI(
|
||||
api_key="sk-1234", # [OPTIONAL] set if you set one on proxy, else set ""
|
||||
base_url="http://0.0.0.0:4000",
|
||||
)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
}
|
||||
]
|
||||
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
|
||||
completion = client.chat.completions.create(
|
||||
model="gpt-4o", # use 'model_name' from config.yaml
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="auto"
|
||||
)
|
||||
|
||||
print(completion)
|
||||
|
||||
```
|
||||
</TabItem>
|
||||
</Tabs>
|
||||
|
||||
## `/embeddings`
|
||||
|
||||
### Request Format
|
||||
|
|
|
@ -248,8 +248,14 @@ class RedisCache(BaseCache):
|
|||
# asyncio.get_running_loop().create_task(self.ping())
|
||||
result = asyncio.get_running_loop().create_task(self.ping())
|
||||
except Exception as e:
|
||||
if "no running event loop" in str(e):
|
||||
verbose_logger.debug(
|
||||
"Ignoring async redis ping. No running event loop."
|
||||
)
|
||||
else:
|
||||
verbose_logger.error(
|
||||
"Error connecting to Async Redis client", extra={"error": str(e)}
|
||||
"Error connecting to Async Redis client - {}".format(str(e)),
|
||||
extra={"error": str(e)},
|
||||
)
|
||||
|
||||
### SYNC HEALTH PING ###
|
||||
|
|
|
@ -4,6 +4,8 @@ import time
|
|||
import traceback
|
||||
from typing import List, Literal, Optional, Tuple, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm
|
||||
import litellm._logging
|
||||
from litellm import verbose_logger
|
||||
|
@ -13,6 +15,10 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
|
|||
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||
cost_per_token as google_cost_per_token,
|
||||
)
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
|
||||
from litellm.utils import (
|
||||
CallTypes,
|
||||
CostPerToken,
|
||||
|
@ -62,6 +68,23 @@ def cost_per_token(
|
|||
### CUSTOM PRICING ###
|
||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||
custom_cost_per_second: Optional[float] = None,
|
||||
### CALL TYPE ###
|
||||
call_type: Literal[
|
||||
"embedding",
|
||||
"aembedding",
|
||||
"completion",
|
||||
"acompletion",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
"image_generation",
|
||||
"aimage_generation",
|
||||
"moderation",
|
||||
"amoderation",
|
||||
"atranscription",
|
||||
"transcription",
|
||||
"aspeech",
|
||||
"speech",
|
||||
] = "completion",
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
|
@ -76,6 +99,7 @@ def cost_per_token(
|
|||
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
|
||||
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
||||
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
||||
call_type: Optional[str]: the call type
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
||||
|
@ -159,6 +183,27 @@ def cost_per_token(
|
|||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
)
|
||||
elif call_type == "speech" or call_type == "aspeech":
|
||||
prompt_cost, completion_cost = _generic_cost_per_character(
|
||||
model=model_without_prefix,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
prompt_characters=prompt_characters,
|
||||
completion_characters=completion_characters,
|
||||
custom_prompt_cost=None,
|
||||
custom_completion_cost=0,
|
||||
)
|
||||
if prompt_cost is None or completion_cost is None:
|
||||
raise ValueError(
|
||||
"cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
|
||||
prompt_cost,
|
||||
completion_cost,
|
||||
model_without_prefix,
|
||||
custom_llm_provider,
|
||||
prompt_characters,
|
||||
completion_characters,
|
||||
)
|
||||
)
|
||||
return prompt_cost, completion_cost
|
||||
elif model in model_cost_ref:
|
||||
print_verbose(f"Success: model={model} in model_cost_map")
|
||||
print_verbose(
|
||||
|
@ -289,7 +334,7 @@ def cost_per_token(
|
|||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
else:
|
||||
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know
|
||||
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
|
||||
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
|
||||
raise litellm.exceptions.NotFoundError( # type: ignore
|
||||
message=error_str,
|
||||
model=model,
|
||||
|
@ -429,7 +474,10 @@ def completion_cost(
|
|||
prompt_characters = 0
|
||||
completion_tokens = 0
|
||||
completion_characters = 0
|
||||
if completion_response is not None:
|
||||
if completion_response is not None and (
|
||||
isinstance(completion_response, BaseModel)
|
||||
or isinstance(completion_response, dict)
|
||||
): # tts returns a custom class
|
||||
# get input/output tokens from completion_response
|
||||
prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
|
||||
completion_tokens = completion_response.get("usage", {}).get(
|
||||
|
@ -535,6 +583,11 @@ def completion_cost(
|
|||
raise Exception(
|
||||
f"Model={image_gen_model_name} not found in completion cost model map"
|
||||
)
|
||||
elif (
|
||||
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
|
||||
):
|
||||
prompt_characters = litellm.utils._count_characters(text=prompt)
|
||||
|
||||
# Calculate cost based on prompt_tokens, completion_tokens
|
||||
if (
|
||||
"togethercomputer" in model
|
||||
|
@ -591,6 +644,7 @@ def completion_cost(
|
|||
custom_cost_per_token=custom_cost_per_token,
|
||||
prompt_characters=prompt_characters,
|
||||
completion_characters=completion_characters,
|
||||
call_type=call_type,
|
||||
)
|
||||
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||
print_verbose(
|
||||
|
@ -608,6 +662,7 @@ def response_cost_calculator(
|
|||
ImageResponse,
|
||||
TranscriptionResponse,
|
||||
TextCompletionResponse,
|
||||
HttpxBinaryResponseContent,
|
||||
],
|
||||
model: str,
|
||||
custom_llm_provider: Optional[str],
|
||||
|
@ -641,6 +696,7 @@ def response_cost_calculator(
|
|||
if cache_hit is not None and cache_hit is True:
|
||||
response_cost = 0.0
|
||||
else:
|
||||
if isinstance(response_object, BaseModel):
|
||||
response_object._hidden_params["optional_params"] = optional_params
|
||||
if isinstance(response_object, ImageResponse):
|
||||
response_cost = completion_cost(
|
||||
|
@ -651,12 +707,11 @@ def response_cost_calculator(
|
|||
)
|
||||
else:
|
||||
if (
|
||||
model in litellm.model_cost
|
||||
and custom_pricing is not None
|
||||
and custom_llm_provider is True
|
||||
model in litellm.model_cost or custom_pricing is True
|
||||
): # override defaults if custom pricing is set
|
||||
base_model = model
|
||||
# base_model defaults to None if not set on model_info
|
||||
|
||||
response_cost = completion_cost(
|
||||
completion_response=response_object,
|
||||
call_type=call_type,
|
||||
|
|
|
@ -32,6 +32,12 @@ class LangFuseLogger:
|
|||
self.langfuse_host = langfuse_host or os.getenv(
|
||||
"LANGFUSE_HOST", "https://cloud.langfuse.com"
|
||||
)
|
||||
if not (
|
||||
self.langfuse_host.startswith("http://")
|
||||
or self.langfuse_host.startswith("https://")
|
||||
):
|
||||
# add http:// if unset, assume communicating over private network - e.g. render
|
||||
self.langfuse_host = "http://" + self.langfuse_host
|
||||
self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
|
||||
self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ else:
|
|||
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
|
||||
LITELLM_RESOURCE = {
|
||||
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
|
||||
"deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
|
||||
}
|
||||
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
|
||||
LITELLM_REQUEST_SPAN_NAME = "litellm_request"
|
||||
|
|
|
@ -24,6 +24,8 @@ from litellm.integrations.custom_logger import CustomLogger
|
|||
from litellm.litellm_core_utils.redact_messages import (
|
||||
redact_message_input_output_from_logging,
|
||||
)
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
|
||||
from litellm.types.utils import (
|
||||
CallTypes,
|
||||
EmbeddingResponse,
|
||||
|
@ -517,18 +519,20 @@ class Logging:
|
|||
self.model_call_details["cache_hit"] = cache_hit
|
||||
## if model in model cost map - log the response cost
|
||||
## else set cost to None
|
||||
verbose_logger.debug(f"Model={self.model};")
|
||||
if (
|
||||
result is not None
|
||||
and (
|
||||
result is not None and self.stream is not True
|
||||
): # handle streaming separately
|
||||
if (
|
||||
isinstance(result, ModelResponse)
|
||||
or isinstance(result, EmbeddingResponse)
|
||||
or isinstance(result, ImageResponse)
|
||||
or isinstance(result, TranscriptionResponse)
|
||||
or isinstance(result, TextCompletionResponse)
|
||||
or isinstance(result, HttpxBinaryResponseContent) # tts
|
||||
):
|
||||
custom_pricing = use_custom_pricing_for_model(
|
||||
litellm_params=self.litellm_params
|
||||
)
|
||||
and self.stream != True
|
||||
): # handle streaming separately
|
||||
self.model_call_details["response_cost"] = (
|
||||
litellm.response_cost_calculator(
|
||||
response_object=result,
|
||||
|
@ -542,6 +546,7 @@ class Logging:
|
|||
),
|
||||
call_type=self.call_type,
|
||||
optional_params=self.optional_params,
|
||||
custom_pricing=custom_pricing,
|
||||
)
|
||||
)
|
||||
else: # streaming chunks + image gen.
|
||||
|
@ -600,8 +605,7 @@ class Logging:
|
|||
verbose_logger.error(
|
||||
"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
|
||||
str(e), traceback.format_exc()
|
||||
),
|
||||
log_level="ERROR",
|
||||
)
|
||||
)
|
||||
complete_streaming_response = None
|
||||
else:
|
||||
|
@ -626,7 +630,11 @@ class Logging:
|
|||
model_call_details=self.model_call_details
|
||||
),
|
||||
call_type=self.call_type,
|
||||
optional_params=self.optional_params,
|
||||
optional_params=(
|
||||
self.optional_params
|
||||
if hasattr(self, "optional_params")
|
||||
else {}
|
||||
),
|
||||
)
|
||||
)
|
||||
if self.dynamic_success_callbacks is not None and isinstance(
|
||||
|
@ -1795,7 +1803,6 @@ def set_callbacks(callback_list, function_id=None):
|
|||
|
||||
try:
|
||||
for callback in callback_list:
|
||||
print_verbose(f"init callback list: {callback}")
|
||||
if callback == "sentry":
|
||||
try:
|
||||
import sentry_sdk
|
||||
|
@ -2013,3 +2020,17 @@ def get_custom_logger_compatible_class(
|
|||
if isinstance(callback, _PROXY_DynamicRateLimitHandler):
|
||||
return callback # type: ignore
|
||||
return None
|
||||
|
||||
|
||||
def use_custom_pricing_for_model(litellm_params: Optional[dict]) -> bool:
|
||||
if litellm_params is None:
|
||||
return False
|
||||
metadata: Optional[dict] = litellm_params.get("metadata", {})
|
||||
if metadata is None:
|
||||
return False
|
||||
model_info: Optional[dict] = metadata.get("model_info", {})
|
||||
if model_info is not None:
|
||||
for k, v in model_info.items():
|
||||
if k in SPECIAL_MODEL_INFO_PARAMS:
|
||||
return True
|
||||
return False
|
||||
|
|
85
litellm/litellm_core_utils/llm_cost_calc/utils.py
Normal file
85
litellm/litellm_core_utils/llm_cost_calc/utils.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
# What is this?
|
||||
## Helper utilities for cost_per_token()
|
||||
|
||||
import traceback
|
||||
from typing import List, Literal, Optional, Tuple
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
|
||||
|
||||
def _generic_cost_per_character(
|
||||
model: str,
|
||||
custom_llm_provider: str,
|
||||
prompt_characters: float,
|
||||
completion_characters: float,
|
||||
custom_prompt_cost: Optional[float],
|
||||
custom_completion_cost: Optional[float],
|
||||
) -> Tuple[Optional[float], Optional[float]]:
|
||||
"""
|
||||
Generic function to help calculate cost per character.
|
||||
"""
|
||||
"""
|
||||
Calculates the cost per character for a given model, input messages, and response object.
|
||||
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
- custom_llm_provider: str, "vertex_ai-*"
|
||||
- prompt_characters: float, the number of input characters
|
||||
- completion_characters: float, the number of output characters
|
||||
|
||||
Returns:
|
||||
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
|
||||
- returns None if not able to calculate cost.
|
||||
|
||||
Raises:
|
||||
Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
|
||||
"""
|
||||
args = locals()
|
||||
## GET MODEL INFO
|
||||
model_info = litellm.get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
## CALCULATE INPUT COST
|
||||
try:
|
||||
if custom_prompt_cost is None:
|
||||
assert (
|
||||
"input_cost_per_character" in model_info
|
||||
and model_info["input_cost_per_character"] is not None
|
||||
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
|
||||
model, model_info
|
||||
)
|
||||
custom_prompt_cost = model_info["input_cost_per_character"]
|
||||
|
||||
prompt_cost = prompt_characters * custom_prompt_cost
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
|
||||
prompt_cost = None
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
try:
|
||||
if custom_completion_cost is None:
|
||||
assert (
|
||||
"output_cost_per_character" in model_info
|
||||
and model_info["output_cost_per_character"] is not None
|
||||
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
|
||||
model, model_info
|
||||
)
|
||||
custom_completion_cost = model_info["output_cost_per_character"]
|
||||
completion_cost = completion_characters * custom_completion_cost
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
|
||||
completion_cost = None
|
||||
|
||||
return prompt_cost, completion_cost
|
|
@ -55,7 +55,6 @@ from ..types.llms.openai import (
|
|||
Thread,
|
||||
)
|
||||
from .base import BaseLLM
|
||||
from .custom_httpx.azure_dall_e_2 import AsyncCustomHTTPTransport, CustomHTTPTransport
|
||||
|
||||
azure_ad_cache = DualCache()
|
||||
|
||||
|
@ -1718,9 +1717,7 @@ class AzureChatCompletion(BaseLLM):
|
|||
input: Optional[list] = None,
|
||||
prompt: Optional[str] = None,
|
||||
) -> dict:
|
||||
client_session = litellm.client_session or httpx.Client(
|
||||
transport=CustomHTTPTransport(), # handle dall-e-2 calls
|
||||
)
|
||||
client_session = litellm.client_session or httpx.Client()
|
||||
if "gateway.ai.cloudflare.com" in api_base:
|
||||
## build base url - assume api base includes resource name
|
||||
if not api_base.endswith("/"):
|
||||
|
@ -1793,9 +1790,10 @@ class AzureChatCompletion(BaseLLM):
|
|||
input: Optional[list] = None,
|
||||
prompt: Optional[str] = None,
|
||||
) -> dict:
|
||||
client_session = litellm.aclient_session or httpx.AsyncClient(
|
||||
transport=AsyncCustomHTTPTransport(), # handle dall-e-2 calls
|
||||
)
|
||||
client_session = (
|
||||
litellm.aclient_session or httpx.AsyncClient()
|
||||
) # handle dall-e-2 calls
|
||||
|
||||
if "gateway.ai.cloudflare.com" in api_base:
|
||||
## build base url - assume api base includes resource name
|
||||
if not api_base.endswith("/"):
|
||||
|
|
|
@ -1,24 +1,27 @@
|
|||
from typing import Optional, Union, Any
|
||||
import types, requests # type: ignore
|
||||
from .base import BaseLLM
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
Choices,
|
||||
Message,
|
||||
CustomStreamWrapper,
|
||||
convert_to_model_response_object,
|
||||
TranscriptionResponse,
|
||||
TextCompletionResponse,
|
||||
)
|
||||
from typing import Callable, Optional, BinaryIO
|
||||
from litellm import OpenAIConfig
|
||||
import litellm, json
|
||||
import httpx
|
||||
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
|
||||
from openai import AzureOpenAI, AsyncAzureOpenAI
|
||||
from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
|
||||
import json
|
||||
import types # type: ignore
|
||||
import uuid
|
||||
from .prompt_templates.factory import prompt_factory, custom_prompt
|
||||
from typing import Any, BinaryIO, Callable, Optional, Union
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
from openai import AsyncAzureOpenAI, AzureOpenAI
|
||||
|
||||
import litellm
|
||||
from litellm import OpenAIConfig
|
||||
from litellm.utils import (
|
||||
Choices,
|
||||
CustomStreamWrapper,
|
||||
Message,
|
||||
ModelResponse,
|
||||
TextCompletionResponse,
|
||||
TranscriptionResponse,
|
||||
convert_to_model_response_object,
|
||||
)
|
||||
|
||||
from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
|
||||
from .base import BaseLLM
|
||||
from .prompt_templates.factory import custom_prompt, prompt_factory
|
||||
|
||||
openai_text_completion_config = OpenAITextCompletionConfig()
|
||||
|
||||
|
|
|
@ -1,143 +0,0 @@
|
|||
import asyncio
|
||||
import json
|
||||
import time
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
class AsyncCustomHTTPTransport(httpx.AsyncHTTPTransport):
|
||||
"""
|
||||
Async implementation of custom http transport
|
||||
"""
|
||||
|
||||
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
|
||||
_api_version = request.url.params.get("api-version", "")
|
||||
if (
|
||||
"images/generations" in request.url.path
|
||||
and _api_version
|
||||
in [ # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
|
||||
"2023-06-01-preview",
|
||||
"2023-07-01-preview",
|
||||
"2023-08-01-preview",
|
||||
"2023-09-01-preview",
|
||||
"2023-10-01-preview",
|
||||
]
|
||||
):
|
||||
request.url = request.url.copy_with(
|
||||
path="/openai/images/generations:submit"
|
||||
)
|
||||
response = await super().handle_async_request(request)
|
||||
operation_location_url = response.headers["operation-location"]
|
||||
request.url = httpx.URL(operation_location_url)
|
||||
request.method = "GET"
|
||||
response = await super().handle_async_request(request)
|
||||
await response.aread()
|
||||
|
||||
timeout_secs: int = 120
|
||||
start_time = time.time()
|
||||
while response.json()["status"] not in ["succeeded", "failed"]:
|
||||
if time.time() - start_time > timeout_secs:
|
||||
timeout = {
|
||||
"error": {
|
||||
"code": "Timeout",
|
||||
"message": "Operation polling timed out.",
|
||||
}
|
||||
}
|
||||
return httpx.Response(
|
||||
status_code=400,
|
||||
headers=response.headers,
|
||||
content=json.dumps(timeout).encode("utf-8"),
|
||||
request=request,
|
||||
)
|
||||
|
||||
await asyncio.sleep(int(response.headers.get("retry-after") or 10))
|
||||
response = await super().handle_async_request(request)
|
||||
await response.aread()
|
||||
|
||||
if response.json()["status"] == "failed":
|
||||
error_data = response.json()
|
||||
return httpx.Response(
|
||||
status_code=400,
|
||||
headers=response.headers,
|
||||
content=json.dumps(error_data).encode("utf-8"),
|
||||
request=request,
|
||||
)
|
||||
|
||||
result = response.json()["result"]
|
||||
return httpx.Response(
|
||||
status_code=200,
|
||||
headers=response.headers,
|
||||
content=json.dumps(result).encode("utf-8"),
|
||||
request=request,
|
||||
)
|
||||
return await super().handle_async_request(request)
|
||||
|
||||
|
||||
class CustomHTTPTransport(httpx.HTTPTransport):
|
||||
"""
|
||||
This class was written as a workaround to support dall-e-2 on openai > v1.x
|
||||
|
||||
Refer to this issue for more: https://github.com/openai/openai-python/issues/692
|
||||
"""
|
||||
|
||||
def handle_request(
|
||||
self,
|
||||
request: httpx.Request,
|
||||
) -> httpx.Response:
|
||||
_api_version = request.url.params.get("api-version", "")
|
||||
if (
|
||||
"images/generations" in request.url.path
|
||||
and _api_version
|
||||
in [ # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
|
||||
"2023-06-01-preview",
|
||||
"2023-07-01-preview",
|
||||
"2023-08-01-preview",
|
||||
"2023-09-01-preview",
|
||||
"2023-10-01-preview",
|
||||
]
|
||||
):
|
||||
request.url = request.url.copy_with(
|
||||
path="/openai/images/generations:submit"
|
||||
)
|
||||
response = super().handle_request(request)
|
||||
operation_location_url = response.headers["operation-location"]
|
||||
request.url = httpx.URL(operation_location_url)
|
||||
request.method = "GET"
|
||||
response = super().handle_request(request)
|
||||
response.read()
|
||||
timeout_secs: int = 120
|
||||
start_time = time.time()
|
||||
while response.json()["status"] not in ["succeeded", "failed"]:
|
||||
if time.time() - start_time > timeout_secs:
|
||||
timeout = {
|
||||
"error": {
|
||||
"code": "Timeout",
|
||||
"message": "Operation polling timed out.",
|
||||
}
|
||||
}
|
||||
return httpx.Response(
|
||||
status_code=400,
|
||||
headers=response.headers,
|
||||
content=json.dumps(timeout).encode("utf-8"),
|
||||
request=request,
|
||||
)
|
||||
time.sleep(int(response.headers.get("retry-after", None) or 10))
|
||||
response = super().handle_request(request)
|
||||
response.read()
|
||||
if response.json()["status"] == "failed":
|
||||
error_data = response.json()
|
||||
return httpx.Response(
|
||||
status_code=400,
|
||||
headers=response.headers,
|
||||
content=json.dumps(error_data).encode("utf-8"),
|
||||
request=request,
|
||||
)
|
||||
|
||||
result = response.json()["result"]
|
||||
return httpx.Response(
|
||||
status_code=200,
|
||||
headers=response.headers,
|
||||
content=json.dumps(result).encode("utf-8"),
|
||||
request=request,
|
||||
)
|
||||
return super().handle_request(request)
|
|
@ -26,30 +26,12 @@ class AsyncHTTPHandler:
|
|||
self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
|
||||
) -> httpx.AsyncClient:
|
||||
|
||||
async_proxy_mounts = None
|
||||
# Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
|
||||
http_proxy = os.getenv("HTTP_PROXY", None)
|
||||
https_proxy = os.getenv("HTTPS_PROXY", None)
|
||||
no_proxy = os.getenv("NO_PROXY", None)
|
||||
ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify))
|
||||
cert = os.getenv(
|
||||
"SSL_CERTIFICATE", litellm.ssl_certificate
|
||||
) # /path/to/client.pem
|
||||
|
||||
if http_proxy is not None and https_proxy is not None:
|
||||
async_proxy_mounts = {
|
||||
"http://": httpx.AsyncHTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
|
||||
"https://": httpx.AsyncHTTPTransport(
|
||||
proxy=httpx.Proxy(url=https_proxy)
|
||||
),
|
||||
}
|
||||
# assume no_proxy is a list of comma separated urls
|
||||
if no_proxy is not None and isinstance(no_proxy, str):
|
||||
no_proxy_urls = no_proxy.split(",")
|
||||
|
||||
for url in no_proxy_urls: # set no-proxy support for specific urls
|
||||
async_proxy_mounts[url] = None # type: ignore
|
||||
|
||||
if timeout is None:
|
||||
timeout = _DEFAULT_TIMEOUT
|
||||
# Create a client with a connection pool
|
||||
|
@ -61,7 +43,6 @@ class AsyncHTTPHandler:
|
|||
max_keepalive_connections=concurrent_limit,
|
||||
),
|
||||
verify=ssl_verify,
|
||||
mounts=async_proxy_mounts,
|
||||
cert=cert,
|
||||
)
|
||||
|
||||
|
@ -163,27 +144,11 @@ class HTTPHandler:
|
|||
timeout = _DEFAULT_TIMEOUT
|
||||
|
||||
# Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
|
||||
http_proxy = os.getenv("HTTP_PROXY", None)
|
||||
https_proxy = os.getenv("HTTPS_PROXY", None)
|
||||
no_proxy = os.getenv("NO_PROXY", None)
|
||||
ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify))
|
||||
cert = os.getenv(
|
||||
"SSL_CERTIFICATE", litellm.ssl_certificate
|
||||
) # /path/to/client.pem
|
||||
|
||||
sync_proxy_mounts = None
|
||||
if http_proxy is not None and https_proxy is not None:
|
||||
sync_proxy_mounts = {
|
||||
"http://": httpx.HTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
|
||||
"https://": httpx.HTTPTransport(proxy=httpx.Proxy(url=https_proxy)),
|
||||
}
|
||||
# assume no_proxy is a list of comma separated urls
|
||||
if no_proxy is not None and isinstance(no_proxy, str):
|
||||
no_proxy_urls = no_proxy.split(",")
|
||||
|
||||
for url in no_proxy_urls: # set no-proxy support for specific urls
|
||||
sync_proxy_mounts[url] = None # type: ignore
|
||||
|
||||
if client is None:
|
||||
# Create a client with a connection pool
|
||||
self.client = httpx.Client(
|
||||
|
@ -193,7 +158,6 @@ class HTTPHandler:
|
|||
max_keepalive_connections=concurrent_limit,
|
||||
),
|
||||
verify=ssl_verify,
|
||||
mounts=sync_proxy_mounts,
|
||||
cert=cert,
|
||||
)
|
||||
else:
|
||||
|
|
|
@ -1330,17 +1330,30 @@ class ModelResponseIterator:
|
|||
|
||||
gemini_chunk = processed_chunk["candidates"][0]
|
||||
|
||||
if (
|
||||
"content" in gemini_chunk
|
||||
and "text" in gemini_chunk["content"]["parts"][0]
|
||||
):
|
||||
if "content" in gemini_chunk:
|
||||
if "text" in gemini_chunk["content"]["parts"][0]:
|
||||
text = gemini_chunk["content"]["parts"][0]["text"]
|
||||
elif "functionCall" in gemini_chunk["content"]["parts"][0]:
|
||||
function_call = ChatCompletionToolCallFunctionChunk(
|
||||
name=gemini_chunk["content"]["parts"][0]["functionCall"][
|
||||
"name"
|
||||
],
|
||||
arguments=json.dumps(
|
||||
gemini_chunk["content"]["parts"][0]["functionCall"]["args"]
|
||||
),
|
||||
)
|
||||
tool_use = ChatCompletionToolCallChunk(
|
||||
id=str(uuid.uuid4()),
|
||||
type="function",
|
||||
function=function_call,
|
||||
index=0,
|
||||
)
|
||||
|
||||
if "finishReason" in gemini_chunk:
|
||||
finish_reason = map_finish_reason(
|
||||
finish_reason=gemini_chunk["finishReason"]
|
||||
)
|
||||
## DO NOT SET 'finish_reason' = True
|
||||
## DO NOT SET 'is_finished' = True
|
||||
## GEMINI SETS FINISHREASON ON EVERY CHUNK!
|
||||
|
||||
if "usageMetadata" in processed_chunk:
|
||||
|
|
|
@ -896,7 +896,7 @@ def completion(
|
|||
if (
|
||||
supports_system_message is not None
|
||||
and isinstance(supports_system_message, bool)
|
||||
and supports_system_message == False
|
||||
and supports_system_message is False
|
||||
):
|
||||
messages = map_system_message_pt(messages=messages)
|
||||
model_api_key = get_api_key(
|
||||
|
@ -5028,10 +5028,9 @@ def stream_chunk_builder(
|
|||
for chunk in chunks:
|
||||
if "usage" in chunk:
|
||||
if "prompt_tokens" in chunk["usage"]:
|
||||
prompt_tokens += chunk["usage"].get("prompt_tokens", 0) or 0
|
||||
prompt_tokens = chunk["usage"].get("prompt_tokens", 0) or 0
|
||||
if "completion_tokens" in chunk["usage"]:
|
||||
completion_tokens += chunk["usage"].get("completion_tokens", 0) or 0
|
||||
|
||||
completion_tokens = chunk["usage"].get("completion_tokens", 0) or 0
|
||||
try:
|
||||
response["usage"]["prompt_tokens"] = prompt_tokens or token_counter(
|
||||
model=model, messages=messages
|
||||
|
|
|
@ -2022,10 +2022,10 @@
|
|||
"max_tokens": 8192,
|
||||
"max_input_tokens": 2097152,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
||||
"output_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
||||
"input_cost_per_token": 0.0000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.000007,
|
||||
"output_cost_per_token": 0.0000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000021,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -2033,16 +2033,16 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-latest": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
||||
"input_cost_per_token": 0.0000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.000007,
|
||||
"output_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000021,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -2050,7 +2050,7 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://ai.google.dev/models/gemini"
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-pro-vision": {
|
||||
"max_tokens": 2048,
|
||||
|
|
1
litellm/proxy/_experimental/out/404.html
Normal file
1
litellm/proxy/_experimental/out/404.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-da7d95729f2529b5.js"],""]
|
||||
3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-906d7dd6a5bf7be4.js","931","static/chunks/app/page-567f85145e7f0f35.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
1
litellm/proxy/_experimental/out/model_hub.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
|
||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-906d7dd6a5bf7be4.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
1
litellm/proxy/_experimental/out/onboarding.html
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
|
||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-906d7dd6a5bf7be4.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -1,12 +1,10 @@
|
|||
model_list:
|
||||
- model_name: "*"
|
||||
- model_name: tts
|
||||
litellm_params:
|
||||
model: "openai/*"
|
||||
mock_response: "Hello world!"
|
||||
|
||||
litellm_settings:
|
||||
success_callback: ["langfuse"]
|
||||
failure_callback: ["langfuse"]
|
||||
- model_name: gemini-1.5-flash
|
||||
litellm_params:
|
||||
model: gemini/gemini-1.5-flash
|
||||
|
||||
general_settings:
|
||||
alerting: ["slack"]
|
||||
|
|
167
litellm/proxy/common_utils/admin_ui_utils.py
Normal file
167
litellm/proxy/common_utils/admin_ui_utils.py
Normal file
|
@ -0,0 +1,167 @@
|
|||
import os
|
||||
|
||||
|
||||
def show_missing_vars_in_env():
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from litellm.proxy.proxy_server import master_key, prisma_client
|
||||
|
||||
if prisma_client is None and master_key is None:
|
||||
return HTMLResponse(
|
||||
content=missing_keys_form(
|
||||
missing_key_names="DATABASE_URL, LITELLM_MASTER_KEY"
|
||||
),
|
||||
status_code=200,
|
||||
)
|
||||
if prisma_client is None:
|
||||
return HTMLResponse(
|
||||
content=missing_keys_form(missing_key_names="DATABASE_URL"), status_code=200
|
||||
)
|
||||
|
||||
if master_key is None:
|
||||
return HTMLResponse(
|
||||
content=missing_keys_form(missing_key_names="LITELLM_MASTER_KEY"),
|
||||
status_code=200,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# LiteLLM Admin UI - Non SSO Login
|
||||
url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
|
||||
url_to_redirect_to += "/login"
|
||||
html_form = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>LiteLLM Login</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f4;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
height: 100vh;
|
||||
}}
|
||||
|
||||
form {{
|
||||
background-color: #fff;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||
}}
|
||||
|
||||
label {{
|
||||
display: block;
|
||||
margin-bottom: 8px;
|
||||
}}
|
||||
|
||||
input {{
|
||||
width: 100%;
|
||||
padding: 8px;
|
||||
margin-bottom: 16px;
|
||||
box-sizing: border-box;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
}}
|
||||
|
||||
input[type="submit"] {{
|
||||
background-color: #4caf50;
|
||||
color: #fff;
|
||||
cursor: pointer;
|
||||
}}
|
||||
|
||||
input[type="submit"]:hover {{
|
||||
background-color: #45a049;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<form action="{url_to_redirect_to}" method="post">
|
||||
<h2>LiteLLM Login</h2>
|
||||
|
||||
<p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
|
||||
<p>If you need to set UI credentials / SSO docs here: <a href="https://docs.litellm.ai/docs/proxy/ui" target="_blank">https://docs.litellm.ai/docs/proxy/ui</a></p>
|
||||
<br>
|
||||
<label for="username">Username:</label>
|
||||
<input type="text" id="username" name="username" required>
|
||||
<label for="password">Password:</label>
|
||||
<input type="password" id="password" name="password" required>
|
||||
<input type="submit" value="Submit">
|
||||
</form>
|
||||
"""
|
||||
|
||||
|
||||
def missing_keys_form(missing_key_names: str):
|
||||
missing_keys_html_form = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<style>
|
||||
body {{
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f9;
|
||||
color: #333;
|
||||
margin: 20px;
|
||||
line-height: 1.6;
|
||||
}}
|
||||
.container {{
|
||||
max-width: 800px;
|
||||
margin: auto;
|
||||
padding: 20px;
|
||||
background: #fff;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 5px;
|
||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||
}}
|
||||
h1 {{
|
||||
font-size: 24px;
|
||||
margin-bottom: 20px;
|
||||
}}
|
||||
pre {{
|
||||
background: #f8f8f8;
|
||||
padding: 1px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
font-size: 14px;
|
||||
}}
|
||||
.env-var {{
|
||||
font-weight: normal;
|
||||
}}
|
||||
.comment {{
|
||||
font-weight: normal;
|
||||
color: #777;
|
||||
}}
|
||||
</style>
|
||||
<title>Environment Setup Instructions</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>Environment Setup Instructions</h1>
|
||||
<p>Please add the following variables to your environment variables:</p>
|
||||
<pre>
|
||||
<span class="env-var">LITELLM_MASTER_KEY="sk-1234"</span> <span class="comment"># Your master key for the proxy server. Can use this to send /chat/completion requests etc</span>
|
||||
<span class="env-var">LITELLM_SALT_KEY="sk-XXXXXXXX"</span> <span class="comment"># Can NOT CHANGE THIS ONCE SET - It is used to encrypt/decrypt credentials stored in DB. If value of 'LITELLM_SALT_KEY' changes your models cannot be retrieved from DB</span>
|
||||
<span class="env-var">DATABASE_URL="postgres://..."</span> <span class="comment"># Need a postgres database? (Check out Supabase, Neon, etc)</span>
|
||||
<span class="comment">## OPTIONAL ##</span>
|
||||
<span class="env-var">PORT=4000</span> <span class="comment"># DO THIS FOR RENDER/RAILWAY</span>
|
||||
<span class="env-var">STORE_MODEL_IN_DB="True"</span> <span class="comment"># Allow storing models in db</span>
|
||||
</pre>
|
||||
<h1>Missing Environment Variables</h1>
|
||||
<p>{missing_keys}</p>
|
||||
</div>
|
||||
|
||||
<div class="container">
|
||||
<h1>Need Help? Support</h1>
|
||||
<p>Discord: <a href="https://discord.com/invite/wuPM9dRgDw" target="_blank">https://discord.com/invite/wuPM9dRgDw</a></p>
|
||||
<p>Docs: <a href="https://docs.litellm.ai/docs/" target="_blank">https://docs.litellm.ai/docs/</a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
return missing_keys_html_form.format(missing_keys=missing_key_names)
|
89
litellm/proxy/common_utils/encrypt_decrypt_utils.py
Normal file
89
litellm/proxy/common_utils/encrypt_decrypt_utils.py
Normal file
|
@ -0,0 +1,89 @@
|
|||
import base64
|
||||
import os
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
LITELLM_SALT_KEY = os.getenv("LITELLM_SALT_KEY", None)
|
||||
verbose_proxy_logger.debug(
|
||||
"LITELLM_SALT_KEY is None using master_key to encrypt/decrypt secrets stored in DB"
|
||||
)
|
||||
|
||||
|
||||
def encrypt_value_helper(value: str):
|
||||
from litellm.proxy.proxy_server import master_key
|
||||
|
||||
signing_key = LITELLM_SALT_KEY
|
||||
if LITELLM_SALT_KEY is None:
|
||||
signing_key = master_key
|
||||
|
||||
try:
|
||||
if isinstance(value, str):
|
||||
encrypted_value = encrypt_value(value=value, signing_key=signing_key) # type: ignore
|
||||
encrypted_value = base64.b64encode(encrypted_value).decode("utf-8")
|
||||
|
||||
return encrypted_value
|
||||
|
||||
raise ValueError(
|
||||
f"Invalid value type passed to encrypt_value: {type(value)} for Value: {value}\n Value must be a string"
|
||||
)
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
|
||||
def decrypt_value_helper(value: str):
|
||||
from litellm.proxy.proxy_server import master_key
|
||||
|
||||
signing_key = LITELLM_SALT_KEY
|
||||
if LITELLM_SALT_KEY is None:
|
||||
signing_key = master_key
|
||||
|
||||
try:
|
||||
if isinstance(value, str):
|
||||
decoded_b64 = base64.b64decode(value)
|
||||
value = decrypt_value(value=decoded_b64, signing_key=signing_key) # type: ignore
|
||||
return value
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(f"Error decrypting value: {value}\nError: {str(e)}")
|
||||
# [Non-Blocking Exception. - this should not block decrypting other values]
|
||||
pass
|
||||
|
||||
|
||||
def encrypt_value(value: str, signing_key: str):
|
||||
import hashlib
|
||||
|
||||
import nacl.secret
|
||||
import nacl.utils
|
||||
|
||||
# get 32 byte master key #
|
||||
hash_object = hashlib.sha256(signing_key.encode())
|
||||
hash_bytes = hash_object.digest()
|
||||
|
||||
# initialize secret box #
|
||||
box = nacl.secret.SecretBox(hash_bytes)
|
||||
|
||||
# encode message #
|
||||
value_bytes = value.encode("utf-8")
|
||||
|
||||
encrypted = box.encrypt(value_bytes)
|
||||
|
||||
return encrypted
|
||||
|
||||
|
||||
def decrypt_value(value: bytes, signing_key: str) -> str:
|
||||
import hashlib
|
||||
|
||||
import nacl.secret
|
||||
import nacl.utils
|
||||
|
||||
# get 32 byte master key #
|
||||
hash_object = hashlib.sha256(signing_key.encode())
|
||||
hash_bytes = hash_object.digest()
|
||||
|
||||
# initialize secret box #
|
||||
box = nacl.secret.SecretBox(hash_bytes)
|
||||
|
||||
# Convert the bytes object to a string
|
||||
plaintext = box.decrypt(value)
|
||||
|
||||
plaintext = plaintext.decode("utf-8") # type: ignore
|
||||
return plaintext # type: ignore
|
|
@ -31,10 +31,12 @@ def initialize_callbacks_on_proxy(
|
|||
imported_list.append(callback)
|
||||
elif isinstance(callback, str) and callback == "otel":
|
||||
from litellm.integrations.opentelemetry import OpenTelemetry
|
||||
from litellm.proxy import proxy_server
|
||||
|
||||
open_telemetry_logger = OpenTelemetry()
|
||||
|
||||
imported_list.append(open_telemetry_logger)
|
||||
setattr(proxy_server, "open_telemetry_logger", open_telemetry_logger)
|
||||
elif isinstance(callback, str) and callback == "presidio":
|
||||
from litellm.proxy.hooks.presidio_pii_masking import (
|
||||
_OPTIONAL_PresidioPIIMasking,
|
||||
|
|
|
@ -8,21 +8,26 @@
|
|||
# Tell us how we can improve! - Krrish & Ishaan
|
||||
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Optional, Union
|
||||
import litellm, traceback, uuid, json # noqa: E401
|
||||
from litellm.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
import aiohttp
|
||||
from fastapi import HTTPException
|
||||
|
||||
import litellm # noqa: E401
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.utils import (
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
ImageResponse,
|
||||
ModelResponse,
|
||||
StreamingChoices,
|
||||
)
|
||||
import aiohttp
|
||||
import asyncio
|
||||
|
||||
|
||||
class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
||||
|
@ -57,22 +62,41 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
|
|||
f"An error occurred: {str(e)}, file_path={ad_hoc_recognizers}"
|
||||
)
|
||||
|
||||
self.presidio_analyzer_api_base = litellm.get_secret(
|
||||
self.validate_environment()
|
||||
|
||||
def validate_environment(self):
|
||||
self.presidio_analyzer_api_base: Optional[str] = litellm.get_secret(
|
||||
"PRESIDIO_ANALYZER_API_BASE", None
|
||||
)
|
||||
self.presidio_anonymizer_api_base = litellm.get_secret(
|
||||
) # type: ignore
|
||||
self.presidio_anonymizer_api_base: Optional[str] = litellm.get_secret(
|
||||
"PRESIDIO_ANONYMIZER_API_BASE", None
|
||||
)
|
||||
) # type: ignore
|
||||
|
||||
if self.presidio_analyzer_api_base is None:
|
||||
raise Exception("Missing `PRESIDIO_ANALYZER_API_BASE` from environment")
|
||||
elif not self.presidio_analyzer_api_base.endswith("/"):
|
||||
if not self.presidio_analyzer_api_base.endswith("/"):
|
||||
self.presidio_analyzer_api_base += "/"
|
||||
if not (
|
||||
self.presidio_analyzer_api_base.startswith("http://")
|
||||
or self.presidio_analyzer_api_base.startswith("https://")
|
||||
):
|
||||
# add http:// if unset, assume communicating over private network - e.g. render
|
||||
self.presidio_analyzer_api_base = (
|
||||
"http://" + self.presidio_analyzer_api_base
|
||||
)
|
||||
|
||||
if self.presidio_anonymizer_api_base is None:
|
||||
raise Exception("Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment")
|
||||
elif not self.presidio_anonymizer_api_base.endswith("/"):
|
||||
if not self.presidio_anonymizer_api_base.endswith("/"):
|
||||
self.presidio_anonymizer_api_base += "/"
|
||||
if not (
|
||||
self.presidio_anonymizer_api_base.startswith("http://")
|
||||
or self.presidio_anonymizer_api_base.startswith("https://")
|
||||
):
|
||||
# add http:// if unset, assume communicating over private network - e.g. render
|
||||
self.presidio_anonymizer_api_base = (
|
||||
"http://" + self.presidio_anonymizer_api_base
|
||||
)
|
||||
|
||||
def print_verbose(self, print_statement):
|
||||
try:
|
||||
|
|
|
@ -176,6 +176,7 @@ async def add_litellm_data_to_request(
|
|||
|
||||
def _add_otel_traceparent_to_data(data: dict, request: Request):
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
if data is None:
|
||||
return
|
||||
if open_telemetry_logger is None:
|
||||
|
|
|
@ -35,6 +35,7 @@ general_settings:
|
|||
LANGFUSE_SECRET_KEY: "os.environ/LANGFUSE_DEV_SK_KEY"
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["otel"]
|
||||
guardrails:
|
||||
- prompt_injection:
|
||||
callbacks: [lakera_prompt_injection, hide_secrets]
|
||||
|
|
|
@ -140,7 +140,15 @@ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
|||
|
||||
## Import All Misc routes here ##
|
||||
from litellm.proxy.caching_routes import router as caching_router
|
||||
from litellm.proxy.common_utils.admin_ui_utils import (
|
||||
html_form,
|
||||
show_missing_vars_in_env,
|
||||
)
|
||||
from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
|
||||
from litellm.proxy.common_utils.encrypt_decrypt_utils import (
|
||||
decrypt_value_helper,
|
||||
encrypt_value_helper,
|
||||
)
|
||||
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
|
||||
from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
|
||||
from litellm.proxy.common_utils.openai_endpoint_utils import (
|
||||
|
@ -186,13 +194,9 @@ from litellm.proxy.utils import (
|
|||
_get_projected_spend_over_limit,
|
||||
_is_projected_spend_over_limit,
|
||||
_is_valid_team_configs,
|
||||
decrypt_value,
|
||||
encrypt_value,
|
||||
get_error_message_str,
|
||||
get_instance_fn,
|
||||
hash_token,
|
||||
html_form,
|
||||
missing_keys_html_form,
|
||||
reset_budget,
|
||||
send_email,
|
||||
update_spend,
|
||||
|
@ -207,6 +211,7 @@ from litellm.router import ModelInfo as RouterModelInfo
|
|||
from litellm.router import updateDeployment
|
||||
from litellm.scheduler import DefaultPriorities, FlowItem, Scheduler
|
||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||
from litellm.types.router import RouterGeneralSettings
|
||||
|
||||
try:
|
||||
from litellm._version import version
|
||||
|
@ -1242,6 +1247,7 @@ class ProxyConfig:
|
|||
## DB
|
||||
if prisma_client is not None and (
|
||||
general_settings.get("store_model_in_db", False) == True
|
||||
or store_model_in_db is True
|
||||
):
|
||||
_tasks = []
|
||||
keys = [
|
||||
|
@ -1765,7 +1771,11 @@ class ProxyConfig:
|
|||
if k in available_args:
|
||||
router_params[k] = v
|
||||
router = litellm.Router(
|
||||
**router_params, assistants_config=assistants_config
|
||||
**router_params,
|
||||
assistants_config=assistants_config,
|
||||
router_general_settings=RouterGeneralSettings(
|
||||
async_only_mode=True # only init async clients
|
||||
),
|
||||
) # type:ignore
|
||||
return router, router.get_model_list(), general_settings
|
||||
|
||||
|
@ -1880,16 +1890,8 @@ class ProxyConfig:
|
|||
# decrypt values
|
||||
for k, v in _litellm_params.items():
|
||||
if isinstance(v, str):
|
||||
# decode base64
|
||||
try:
|
||||
decoded_b64 = base64.b64decode(v)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"Error decoding value - {}".format(v)
|
||||
)
|
||||
continue
|
||||
# decrypt value
|
||||
_value = decrypt_value(value=decoded_b64, master_key=master_key)
|
||||
_value = decrypt_value_helper(value=v)
|
||||
# sanity check if string > size 0
|
||||
if len(_value) > 0:
|
||||
_litellm_params[k] = _value
|
||||
|
@ -1933,13 +1935,8 @@ class ProxyConfig:
|
|||
if isinstance(_litellm_params, dict):
|
||||
# decrypt values
|
||||
for k, v in _litellm_params.items():
|
||||
if isinstance(v, str):
|
||||
# decode base64
|
||||
decoded_b64 = base64.b64decode(v)
|
||||
# decrypt value
|
||||
_litellm_params[k] = decrypt_value(
|
||||
value=decoded_b64, master_key=master_key # type: ignore
|
||||
)
|
||||
decrypted_value = decrypt_value_helper(value=v)
|
||||
_litellm_params[k] = decrypted_value
|
||||
_litellm_params = LiteLLM_Params(**_litellm_params)
|
||||
else:
|
||||
verbose_proxy_logger.error(
|
||||
|
@ -1957,7 +1954,12 @@ class ProxyConfig:
|
|||
)
|
||||
if len(_model_list) > 0:
|
||||
verbose_proxy_logger.debug(f"_model_list: {_model_list}")
|
||||
llm_router = litellm.Router(model_list=_model_list)
|
||||
llm_router = litellm.Router(
|
||||
model_list=_model_list,
|
||||
router_general_settings=RouterGeneralSettings(
|
||||
async_only_mode=True # only init async clients
|
||||
),
|
||||
)
|
||||
verbose_proxy_logger.debug(f"updated llm_router: {llm_router}")
|
||||
else:
|
||||
verbose_proxy_logger.debug(f"len new_models: {len(new_models)}")
|
||||
|
@ -1995,10 +1997,8 @@ class ProxyConfig:
|
|||
environment_variables = config_data.get("environment_variables", {})
|
||||
for k, v in environment_variables.items():
|
||||
try:
|
||||
if v is not None:
|
||||
decoded_b64 = base64.b64decode(v)
|
||||
value = decrypt_value(value=decoded_b64, master_key=master_key) # type: ignore
|
||||
os.environ[k] = value
|
||||
decrypted_value = decrypt_value_helper(value=v)
|
||||
os.environ[k] = decrypted_value
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
"Error setting env variable: %s - %s", k, str(e)
|
||||
|
@ -2720,6 +2720,10 @@ async def chat_completion(
|
|||
except:
|
||||
data = json.loads(body_str)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
|
||||
)
|
||||
|
||||
data = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=request,
|
||||
|
@ -3372,8 +3376,9 @@ async def embeddings(
|
|||
)
|
||||
verbose_proxy_logger.debug(traceback.format_exc())
|
||||
if isinstance(e, HTTPException):
|
||||
message = get_error_message_str(e)
|
||||
raise ProxyException(
|
||||
message=getattr(e, "message", str(e)),
|
||||
message=message,
|
||||
type=getattr(e, "type", "None"),
|
||||
param=getattr(e, "param", "None"),
|
||||
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
|
||||
|
@ -5930,11 +5935,8 @@ async def add_new_model(
|
|||
_litellm_params_dict = model_params.litellm_params.dict(exclude_none=True)
|
||||
_orignal_litellm_model_name = model_params.litellm_params.model
|
||||
for k, v in _litellm_params_dict.items():
|
||||
if isinstance(v, str):
|
||||
encrypted_value = encrypt_value(value=v, master_key=master_key) # type: ignore
|
||||
model_params.litellm_params[k] = base64.b64encode(
|
||||
encrypted_value
|
||||
).decode("utf-8")
|
||||
encrypted_value = encrypt_value_helper(value=v)
|
||||
model_params.litellm_params[k] = encrypted_value
|
||||
_data: dict = {
|
||||
"model_id": model_params.model_info.id,
|
||||
"model_name": model_params.model_name,
|
||||
|
@ -6065,11 +6067,8 @@ async def update_model(
|
|||
|
||||
### ENCRYPT PARAMS ###
|
||||
for k, v in _new_litellm_params_dict.items():
|
||||
if isinstance(v, str):
|
||||
encrypted_value = encrypt_value(value=v, master_key=master_key) # type: ignore
|
||||
model_params.litellm_params[k] = base64.b64encode(
|
||||
encrypted_value
|
||||
).decode("utf-8")
|
||||
encrypted_value = encrypt_value_helper(value=v)
|
||||
model_params.litellm_params[k] = encrypted_value
|
||||
|
||||
### MERGE WITH EXISTING DATA ###
|
||||
merged_dictionary = {}
|
||||
|
@ -7187,10 +7186,9 @@ async def google_login(request: Request):
|
|||
)
|
||||
|
||||
####### Detect DB + MASTER KEY in .env #######
|
||||
if prisma_client is None or master_key is None:
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
return HTMLResponse(content=missing_keys_html_form, status_code=200)
|
||||
missing_env_vars = show_missing_vars_in_env()
|
||||
if missing_env_vars is not None:
|
||||
return missing_env_vars
|
||||
|
||||
# get url from request
|
||||
redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
|
||||
|
@ -8393,11 +8391,8 @@ async def update_config(config_info: ConfigYAML):
|
|||
|
||||
# encrypt updated_environment_variables #
|
||||
for k, v in _updated_environment_variables.items():
|
||||
if isinstance(v, str):
|
||||
encrypted_value = encrypt_value(value=v, master_key=master_key) # type: ignore
|
||||
_updated_environment_variables[k] = base64.b64encode(
|
||||
encrypted_value
|
||||
).decode("utf-8")
|
||||
encrypted_value = encrypt_value_helper(value=v)
|
||||
_updated_environment_variables[k] = encrypted_value
|
||||
|
||||
_existing_env_variables = config["environment_variables"]
|
||||
|
||||
|
@ -8814,11 +8809,8 @@ async def get_config():
|
|||
env_vars_dict[_var] = None
|
||||
else:
|
||||
# decode + decrypt the value
|
||||
decoded_b64 = base64.b64decode(env_variable)
|
||||
_decrypted_value = decrypt_value(
|
||||
value=decoded_b64, master_key=master_key
|
||||
)
|
||||
env_vars_dict[_var] = _decrypted_value
|
||||
decrypted_value = decrypt_value_helper(value=env_variable)
|
||||
env_vars_dict[_var] = decrypted_value
|
||||
|
||||
_data_to_return.append({"name": _callback, "variables": env_vars_dict})
|
||||
elif _callback == "langfuse":
|
||||
|
@ -8834,11 +8826,8 @@ async def get_config():
|
|||
_langfuse_env_vars[_var] = None
|
||||
else:
|
||||
# decode + decrypt the value
|
||||
decoded_b64 = base64.b64decode(env_variable)
|
||||
_decrypted_value = decrypt_value(
|
||||
value=decoded_b64, master_key=master_key
|
||||
)
|
||||
_langfuse_env_vars[_var] = _decrypted_value
|
||||
decrypted_value = decrypt_value_helper(value=env_variable)
|
||||
_langfuse_env_vars[_var] = decrypted_value
|
||||
|
||||
_data_to_return.append(
|
||||
{"name": _callback, "variables": _langfuse_env_vars}
|
||||
|
@ -8859,10 +8848,7 @@ async def get_config():
|
|||
_slack_env_vars[_var] = _value
|
||||
else:
|
||||
# decode + decrypt the value
|
||||
decoded_b64 = base64.b64decode(env_variable)
|
||||
_decrypted_value = decrypt_value(
|
||||
value=decoded_b64, master_key=master_key
|
||||
)
|
||||
_decrypted_value = decrypt_value_helper(value=env_variable)
|
||||
_slack_env_vars[_var] = _decrypted_value
|
||||
|
||||
_alerting_types = proxy_logging_obj.slack_alerting_instance.alert_types
|
||||
|
@ -8898,10 +8884,7 @@ async def get_config():
|
|||
_email_env_vars[_var] = None
|
||||
else:
|
||||
# decode + decrypt the value
|
||||
decoded_b64 = base64.b64decode(env_variable)
|
||||
_decrypted_value = decrypt_value(
|
||||
value=decoded_b64, master_key=master_key
|
||||
)
|
||||
_decrypted_value = decrypt_value_helper(value=env_variable)
|
||||
_email_env_vars[_var] = _decrypted_value
|
||||
|
||||
alerting_data.append(
|
||||
|
|
|
@ -821,6 +821,14 @@ async def get_global_spend_report(
|
|||
default="team",
|
||||
description="Group spend by internal team or customer or api_key",
|
||||
),
|
||||
api_key: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="View spend for a specific api_key. Example api_key='sk-1234",
|
||||
),
|
||||
internal_user_id: Optional[str] = fastapi.Query(
|
||||
default=None,
|
||||
description="View spend for a specific internal_user_id. Example internal_user_id='1234",
|
||||
),
|
||||
):
|
||||
"""
|
||||
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
|
||||
|
@ -873,6 +881,96 @@ async def get_global_spend_report(
|
|||
raise ValueError(
|
||||
"/spend/report endpoint " + CommonProxyErrors.not_premium_user.value
|
||||
)
|
||||
if api_key is not None:
|
||||
verbose_proxy_logger.debug("Getting /spend for api_key: %s", api_key)
|
||||
if api_key.startswith("sk-"):
|
||||
api_key = hash_token(token=api_key)
|
||||
sql_query = """
|
||||
WITH SpendByModelApiKey AS (
|
||||
SELECT
|
||||
sl.api_key,
|
||||
sl.model,
|
||||
SUM(sl.spend) AS model_cost,
|
||||
SUM(sl.prompt_tokens) AS model_input_tokens,
|
||||
SUM(sl.completion_tokens) AS model_output_tokens
|
||||
FROM
|
||||
"LiteLLM_SpendLogs" sl
|
||||
WHERE
|
||||
sl."startTime" BETWEEN $1::date AND $2::date AND sl.api_key = $3
|
||||
GROUP BY
|
||||
sl.api_key,
|
||||
sl.model
|
||||
)
|
||||
SELECT
|
||||
api_key,
|
||||
SUM(model_cost) AS total_cost,
|
||||
SUM(model_input_tokens) AS total_input_tokens,
|
||||
SUM(model_output_tokens) AS total_output_tokens,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'model', model,
|
||||
'total_cost', model_cost,
|
||||
'total_input_tokens', model_input_tokens,
|
||||
'total_output_tokens', model_output_tokens
|
||||
)) AS model_details
|
||||
FROM
|
||||
SpendByModelApiKey
|
||||
GROUP BY
|
||||
api_key
|
||||
ORDER BY
|
||||
total_cost DESC;
|
||||
"""
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, start_date_obj, end_date_obj, api_key
|
||||
)
|
||||
if db_response is None:
|
||||
return []
|
||||
|
||||
return db_response
|
||||
elif internal_user_id is not None:
|
||||
verbose_proxy_logger.debug(
|
||||
"Getting /spend for internal_user_id: %s", internal_user_id
|
||||
)
|
||||
sql_query = """
|
||||
WITH SpendByModelApiKey AS (
|
||||
SELECT
|
||||
sl.api_key,
|
||||
sl.model,
|
||||
SUM(sl.spend) AS model_cost,
|
||||
SUM(sl.prompt_tokens) AS model_input_tokens,
|
||||
SUM(sl.completion_tokens) AS model_output_tokens
|
||||
FROM
|
||||
"LiteLLM_SpendLogs" sl
|
||||
WHERE
|
||||
sl."startTime" BETWEEN $1::date AND $2::date AND sl.user = $3
|
||||
GROUP BY
|
||||
sl.api_key,
|
||||
sl.model
|
||||
)
|
||||
SELECT
|
||||
api_key,
|
||||
SUM(model_cost) AS total_cost,
|
||||
SUM(model_input_tokens) AS total_input_tokens,
|
||||
SUM(model_output_tokens) AS total_output_tokens,
|
||||
jsonb_agg(jsonb_build_object(
|
||||
'model', model,
|
||||
'total_cost', model_cost,
|
||||
'total_input_tokens', model_input_tokens,
|
||||
'total_output_tokens', model_output_tokens
|
||||
)) AS model_details
|
||||
FROM
|
||||
SpendByModelApiKey
|
||||
GROUP BY
|
||||
api_key
|
||||
ORDER BY
|
||||
total_cost DESC;
|
||||
"""
|
||||
db_response = await prisma_client.db.query_raw(
|
||||
sql_query, start_date_obj, end_date_obj, internal_user_id
|
||||
)
|
||||
if db_response is None:
|
||||
return []
|
||||
|
||||
return db_response
|
||||
|
||||
if group_by == "team":
|
||||
# first get data from spend logs -> SpendByModelApiKey
|
||||
|
|
|
@ -353,7 +353,7 @@ class ProxyLogging:
|
|||
raise HTTPException(
|
||||
status_code=400, detail={"error": response}
|
||||
)
|
||||
print_verbose(f"final data being sent to {call_type} call: {data}")
|
||||
|
||||
return data
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
@ -2705,178 +2705,6 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
|
|||
return
|
||||
|
||||
|
||||
def encrypt_value(value: str, master_key: str):
|
||||
import hashlib
|
||||
|
||||
import nacl.secret
|
||||
import nacl.utils
|
||||
|
||||
# get 32 byte master key #
|
||||
hash_object = hashlib.sha256(master_key.encode())
|
||||
hash_bytes = hash_object.digest()
|
||||
|
||||
# initialize secret box #
|
||||
box = nacl.secret.SecretBox(hash_bytes)
|
||||
|
||||
# encode message #
|
||||
value_bytes = value.encode("utf-8")
|
||||
|
||||
encrypted = box.encrypt(value_bytes)
|
||||
|
||||
return encrypted
|
||||
|
||||
|
||||
def decrypt_value(value: bytes, master_key: str) -> str:
|
||||
import hashlib
|
||||
|
||||
import nacl.secret
|
||||
import nacl.utils
|
||||
|
||||
# get 32 byte master key #
|
||||
hash_object = hashlib.sha256(master_key.encode())
|
||||
hash_bytes = hash_object.digest()
|
||||
|
||||
# initialize secret box #
|
||||
box = nacl.secret.SecretBox(hash_bytes)
|
||||
|
||||
# Convert the bytes object to a string
|
||||
plaintext = box.decrypt(value)
|
||||
|
||||
plaintext = plaintext.decode("utf-8") # type: ignore
|
||||
return plaintext # type: ignore
|
||||
|
||||
|
||||
# LiteLLM Admin UI - Non SSO Login
|
||||
url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
|
||||
url_to_redirect_to += "/login"
|
||||
html_form = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>LiteLLM Login</title>
|
||||
<style>
|
||||
body {{
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f4;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
height: 100vh;
|
||||
}}
|
||||
|
||||
form {{
|
||||
background-color: #fff;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||
}}
|
||||
|
||||
label {{
|
||||
display: block;
|
||||
margin-bottom: 8px;
|
||||
}}
|
||||
|
||||
input {{
|
||||
width: 100%;
|
||||
padding: 8px;
|
||||
margin-bottom: 16px;
|
||||
box-sizing: border-box;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
}}
|
||||
|
||||
input[type="submit"] {{
|
||||
background-color: #4caf50;
|
||||
color: #fff;
|
||||
cursor: pointer;
|
||||
}}
|
||||
|
||||
input[type="submit"]:hover {{
|
||||
background-color: #45a049;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<form action="{url_to_redirect_to}" method="post">
|
||||
<h2>LiteLLM Login</h2>
|
||||
|
||||
<p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
|
||||
<p>If you need to set UI credentials / SSO docs here: <a href="https://docs.litellm.ai/docs/proxy/ui" target="_blank">https://docs.litellm.ai/docs/proxy/ui</a></p>
|
||||
<br>
|
||||
<label for="username">Username:</label>
|
||||
<input type="text" id="username" name="username" required>
|
||||
<label for="password">Password:</label>
|
||||
<input type="password" id="password" name="password" required>
|
||||
<input type="submit" value="Submit">
|
||||
</form>
|
||||
"""
|
||||
|
||||
|
||||
missing_keys_html_form = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f9;
|
||||
color: #333;
|
||||
margin: 20px;
|
||||
line-height: 1.6;
|
||||
}
|
||||
.container {
|
||||
max-width: 600px;
|
||||
margin: auto;
|
||||
padding: 20px;
|
||||
background: #fff;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 5px;
|
||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
h1 {
|
||||
font-size: 24px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
pre {
|
||||
background: #f8f8f8;
|
||||
padding: 10px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
font-size: 14px;
|
||||
}
|
||||
.env-var {
|
||||
font-weight: normal;
|
||||
}
|
||||
.comment {
|
||||
font-weight: normal;
|
||||
color: #777;
|
||||
}
|
||||
</style>
|
||||
<title>Environment Setup Instructions</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>Environment Setup Instructions</h1>
|
||||
<p>Please add the following configurations to your environment variables:</p>
|
||||
<pre>
|
||||
<span class="env-var">LITELLM_MASTER_KEY="sk-1234"</span> <span class="comment"># make this unique. must start with `sk-`.</span>
|
||||
<span class="env-var">DATABASE_URL="postgres://..."</span> <span class="comment"># Need a postgres database? (Check out Supabase, Neon, etc)</span>
|
||||
|
||||
<span class="comment">## OPTIONAL ##</span>
|
||||
<span class="env-var">PORT=4000</span> <span class="comment"># DO THIS FOR RENDER/RAILWAY</span>
|
||||
<span class="env-var">STORE_MODEL_IN_DB="True"</span> <span class="comment"># Allow storing models in db</span>
|
||||
</pre>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
def _to_ns(dt):
|
||||
return int(dt.timestamp() * 1e9)
|
||||
|
||||
|
@ -2888,6 +2716,11 @@ def get_error_message_str(e: Exception) -> str:
|
|||
error_message = e.detail
|
||||
elif isinstance(e.detail, dict):
|
||||
error_message = json.dumps(e.detail)
|
||||
elif hasattr(e, "message"):
|
||||
if isinstance(e.message, "str"):
|
||||
error_message = e.message
|
||||
elif isinstance(e.message, dict):
|
||||
error_message = json.dumps(e.message)
|
||||
else:
|
||||
error_message = str(e)
|
||||
else:
|
||||
|
|
|
@ -46,15 +46,15 @@ from litellm._logging import verbose_router_logger
|
|||
from litellm.caching import DualCache, InMemoryCache, RedisCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
||||
from litellm.llms.custom_httpx.azure_dall_e_2 import (
|
||||
AsyncCustomHTTPTransport,
|
||||
CustomHTTPTransport,
|
||||
)
|
||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
||||
from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
|
||||
from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
|
||||
from litellm.router_utils.client_initalization_utils import (
|
||||
set_client,
|
||||
should_initialize_sync_client,
|
||||
)
|
||||
from litellm.router_utils.handle_error import send_llm_exception_alert
|
||||
from litellm.scheduler import FlowItem, Scheduler
|
||||
from litellm.types.llms.openai import (
|
||||
|
@ -79,6 +79,7 @@ from litellm.types.router import (
|
|||
ModelInfo,
|
||||
RetryPolicy,
|
||||
RouterErrors,
|
||||
RouterGeneralSettings,
|
||||
updateDeployment,
|
||||
updateLiteLLMParams,
|
||||
)
|
||||
|
@ -88,6 +89,7 @@ from litellm.utils import (
|
|||
ModelResponse,
|
||||
_is_region_eu,
|
||||
calculate_max_parallel_requests,
|
||||
create_proxy_transport_and_mounts,
|
||||
get_utc_datetime,
|
||||
)
|
||||
|
||||
|
@ -169,6 +171,7 @@ class Router:
|
|||
routing_strategy_args: dict = {}, # just for latency-based routing
|
||||
semaphore: Optional[asyncio.Semaphore] = None,
|
||||
alerting_config: Optional[AlertingConfig] = None,
|
||||
router_general_settings: Optional[RouterGeneralSettings] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the Router class with the given parameters for caching, reliability, and routing strategy.
|
||||
|
@ -246,6 +249,9 @@ class Router:
|
|||
verbose_router_logger.setLevel(logging.INFO)
|
||||
elif debug_level == "DEBUG":
|
||||
verbose_router_logger.setLevel(logging.DEBUG)
|
||||
self.router_general_settings: Optional[RouterGeneralSettings] = (
|
||||
router_general_settings
|
||||
)
|
||||
|
||||
self.assistants_config = assistants_config
|
||||
self.deployment_names: List = (
|
||||
|
@ -3247,520 +3253,6 @@ class Router:
|
|||
except Exception as e:
|
||||
raise e
|
||||
|
||||
def set_client(self, model: dict):
|
||||
"""
|
||||
- Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
|
||||
- Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
|
||||
"""
|
||||
client_ttl = self.client_ttl
|
||||
litellm_params = model.get("litellm_params", {})
|
||||
model_name = litellm_params.get("model")
|
||||
model_id = model["model_info"]["id"]
|
||||
# ### IF RPM SET - initialize a semaphore ###
|
||||
rpm = litellm_params.get("rpm", None)
|
||||
tpm = litellm_params.get("tpm", None)
|
||||
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
|
||||
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||
rpm=rpm,
|
||||
max_parallel_requests=max_parallel_requests,
|
||||
tpm=tpm,
|
||||
default_max_parallel_requests=self.default_max_parallel_requests,
|
||||
)
|
||||
if calculated_max_parallel_requests:
|
||||
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
|
||||
cache_key = f"{model_id}_max_parallel_requests_client"
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=semaphore,
|
||||
local_only=True,
|
||||
)
|
||||
|
||||
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
||||
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
||||
default_api_base = None
|
||||
default_api_key = None
|
||||
if custom_llm_provider in litellm.openai_compatible_providers:
|
||||
_, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
|
||||
model=model_name
|
||||
)
|
||||
default_api_base = api_base
|
||||
default_api_key = api_key
|
||||
|
||||
if (
|
||||
model_name in litellm.open_ai_chat_completion_models
|
||||
or custom_llm_provider in litellm.openai_compatible_providers
|
||||
or custom_llm_provider == "azure"
|
||||
or custom_llm_provider == "azure_text"
|
||||
or custom_llm_provider == "custom_openai"
|
||||
or custom_llm_provider == "openai"
|
||||
or custom_llm_provider == "text-completion-openai"
|
||||
or "ft:gpt-3.5-turbo" in model_name
|
||||
or model_name in litellm.open_ai_embedding_models
|
||||
):
|
||||
is_azure_ai_studio_model: bool = False
|
||||
if custom_llm_provider == "azure":
|
||||
if litellm.utils._is_non_openai_azure_model(model_name):
|
||||
is_azure_ai_studio_model = True
|
||||
custom_llm_provider = "openai"
|
||||
# remove azure prefx from model_name
|
||||
model_name = model_name.replace("azure/", "")
|
||||
# glorified / complicated reading of configs
|
||||
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
|
||||
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
|
||||
api_key = litellm_params.get("api_key") or default_api_key
|
||||
if (
|
||||
api_key
|
||||
and isinstance(api_key, str)
|
||||
and api_key.startswith("os.environ/")
|
||||
):
|
||||
api_key_env_name = api_key.replace("os.environ/", "")
|
||||
api_key = litellm.get_secret(api_key_env_name)
|
||||
litellm_params["api_key"] = api_key
|
||||
|
||||
api_base = litellm_params.get("api_base")
|
||||
base_url = litellm_params.get("base_url")
|
||||
api_base = (
|
||||
api_base or base_url or default_api_base
|
||||
) # allow users to pass in `api_base` or `base_url` for azure
|
||||
if api_base and api_base.startswith("os.environ/"):
|
||||
api_base_env_name = api_base.replace("os.environ/", "")
|
||||
api_base = litellm.get_secret(api_base_env_name)
|
||||
litellm_params["api_base"] = api_base
|
||||
|
||||
## AZURE AI STUDIO MISTRAL CHECK ##
|
||||
"""
|
||||
Make sure api base ends in /v1/
|
||||
|
||||
if not, add it - https://github.com/BerriAI/litellm/issues/2279
|
||||
"""
|
||||
if (
|
||||
is_azure_ai_studio_model is True
|
||||
and api_base is not None
|
||||
and isinstance(api_base, str)
|
||||
and not api_base.endswith("/v1/")
|
||||
):
|
||||
# check if it ends with a trailing slash
|
||||
if api_base.endswith("/"):
|
||||
api_base += "v1/"
|
||||
elif api_base.endswith("/v1"):
|
||||
api_base += "/"
|
||||
else:
|
||||
api_base += "/v1/"
|
||||
|
||||
api_version = litellm_params.get("api_version")
|
||||
if api_version and api_version.startswith("os.environ/"):
|
||||
api_version_env_name = api_version.replace("os.environ/", "")
|
||||
api_version = litellm.get_secret(api_version_env_name)
|
||||
litellm_params["api_version"] = api_version
|
||||
|
||||
timeout = litellm_params.pop("timeout", None) or litellm.request_timeout
|
||||
if isinstance(timeout, str) and timeout.startswith("os.environ/"):
|
||||
timeout_env_name = timeout.replace("os.environ/", "")
|
||||
timeout = litellm.get_secret(timeout_env_name)
|
||||
litellm_params["timeout"] = timeout
|
||||
|
||||
stream_timeout = litellm_params.pop(
|
||||
"stream_timeout", timeout
|
||||
) # if no stream_timeout is set, default to timeout
|
||||
if isinstance(stream_timeout, str) and stream_timeout.startswith(
|
||||
"os.environ/"
|
||||
):
|
||||
stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
|
||||
stream_timeout = litellm.get_secret(stream_timeout_env_name)
|
||||
litellm_params["stream_timeout"] = stream_timeout
|
||||
|
||||
max_retries = litellm_params.pop(
|
||||
"max_retries", 0
|
||||
) # router handles retry logic
|
||||
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
||||
max_retries_env_name = max_retries.replace("os.environ/", "")
|
||||
max_retries = litellm.get_secret(max_retries_env_name)
|
||||
litellm_params["max_retries"] = max_retries
|
||||
|
||||
# proxy support
|
||||
import os
|
||||
|
||||
import httpx
|
||||
|
||||
# Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
|
||||
http_proxy = os.getenv("HTTP_PROXY", None)
|
||||
https_proxy = os.getenv("HTTPS_PROXY", None)
|
||||
no_proxy = os.getenv("NO_PROXY", None)
|
||||
|
||||
# Create the proxies dictionary only if the environment variables are set.
|
||||
sync_proxy_mounts = None
|
||||
async_proxy_mounts = None
|
||||
if http_proxy is not None and https_proxy is not None:
|
||||
sync_proxy_mounts = {
|
||||
"http://": httpx.HTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
|
||||
"https://": httpx.HTTPTransport(proxy=httpx.Proxy(url=https_proxy)),
|
||||
}
|
||||
async_proxy_mounts = {
|
||||
"http://": httpx.AsyncHTTPTransport(
|
||||
proxy=httpx.Proxy(url=http_proxy)
|
||||
),
|
||||
"https://": httpx.AsyncHTTPTransport(
|
||||
proxy=httpx.Proxy(url=https_proxy)
|
||||
),
|
||||
}
|
||||
|
||||
# assume no_proxy is a list of comma separated urls
|
||||
if no_proxy is not None and isinstance(no_proxy, str):
|
||||
no_proxy_urls = no_proxy.split(",")
|
||||
|
||||
for url in no_proxy_urls: # set no-proxy support for specific urls
|
||||
sync_proxy_mounts[url] = None # type: ignore
|
||||
async_proxy_mounts[url] = None # type: ignore
|
||||
|
||||
organization = litellm_params.get("organization", None)
|
||||
if isinstance(organization, str) and organization.startswith("os.environ/"):
|
||||
organization_env_name = organization.replace("os.environ/", "")
|
||||
organization = litellm.get_secret(organization_env_name)
|
||||
litellm_params["organization"] = organization
|
||||
|
||||
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
|
||||
if api_base is None or not isinstance(api_base, str):
|
||||
filtered_litellm_params = {
|
||||
k: v
|
||||
for k, v in model["litellm_params"].items()
|
||||
if k != "api_key"
|
||||
}
|
||||
_filtered_model = {
|
||||
"model_name": model["model_name"],
|
||||
"litellm_params": filtered_litellm_params,
|
||||
}
|
||||
raise ValueError(
|
||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
|
||||
)
|
||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||
if azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
if api_version is None:
|
||||
api_version = litellm.AZURE_DEFAULT_API_VERSION
|
||||
|
||||
if "gateway.ai.cloudflare.com" in api_base:
|
||||
if not api_base.endswith("/"):
|
||||
api_base += "/"
|
||||
azure_model = model_name.replace("azure/", "")
|
||||
api_base += f"{azure_model}"
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI(
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.AsyncClient(
|
||||
transport=AsyncCustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=async_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.Client(
|
||||
transport=CustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=sync_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
# streaming clients can have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.AsyncClient(
|
||||
transport=AsyncCustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=async_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.Client(
|
||||
transport=CustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=sync_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
else:
|
||||
_api_key = api_key
|
||||
if _api_key is not None and isinstance(_api_key, str):
|
||||
# only show first 5 chars of api_key
|
||||
_api_key = _api_key[:8] + "*" * 15
|
||||
verbose_router_logger.debug(
|
||||
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
|
||||
)
|
||||
azure_client_params = {
|
||||
"api_key": api_key,
|
||||
"azure_endpoint": api_base,
|
||||
"api_version": api_version,
|
||||
"azure_ad_token": azure_ad_token,
|
||||
}
|
||||
from litellm.llms.azure import select_azure_base_url_or_endpoint
|
||||
|
||||
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
|
||||
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
|
||||
azure_client_params = select_azure_base_url_or_endpoint(
|
||||
azure_client_params
|
||||
)
|
||||
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.AsyncClient(
|
||||
transport=AsyncCustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=async_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.Client(
|
||||
transport=CustomHTTPTransport(
|
||||
verify=litellm.ssl_verify,
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
),
|
||||
mounts=sync_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.AsyncClient(
|
||||
transport=AsyncCustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=async_proxy_mounts,
|
||||
),
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.Client(
|
||||
transport=CustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=sync_proxy_mounts,
|
||||
),
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
else:
|
||||
_api_key = api_key # type: ignore
|
||||
if _api_key is not None and isinstance(_api_key, str):
|
||||
# only show first 5 chars of api_key
|
||||
_api_key = _api_key[:8] + "*" * 15
|
||||
verbose_router_logger.debug(
|
||||
f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
|
||||
)
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
http_client=httpx.AsyncClient(
|
||||
transport=AsyncCustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=async_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.OpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
http_client=httpx.Client(
|
||||
transport=CustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=sync_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
http_client=httpx.AsyncClient(
|
||||
transport=AsyncCustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=async_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.OpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
http_client=httpx.Client(
|
||||
transport=CustomHTTPTransport(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
mounts=sync_proxy_mounts,
|
||||
), # type: ignore
|
||||
)
|
||||
self.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
def _generate_model_id(self, model_group: str, litellm_params: dict):
|
||||
"""
|
||||
Helper function to consistently generate the same id for a deployment
|
||||
|
@ -3904,7 +3396,9 @@ class Router:
|
|||
raise Exception(f"Unsupported provider - {custom_llm_provider}")
|
||||
|
||||
# init OpenAI, Azure clients
|
||||
self.set_client(model=deployment.to_json(exclude_none=True))
|
||||
set_client(
|
||||
litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
|
||||
)
|
||||
|
||||
# set region (if azure model) ## PREVIEW FEATURE ##
|
||||
if litellm.enable_preview_features == True:
|
||||
|
@ -4432,7 +3926,7 @@ class Router:
|
|||
"""
|
||||
Re-initialize the client
|
||||
"""
|
||||
self.set_client(model=deployment)
|
||||
set_client(litellm_router_instance=self, model=deployment)
|
||||
client = self.cache.get_cache(key=cache_key, local_only=True)
|
||||
return client
|
||||
else:
|
||||
|
@ -4442,7 +3936,7 @@ class Router:
|
|||
"""
|
||||
Re-initialize the client
|
||||
"""
|
||||
self.set_client(model=deployment)
|
||||
set_client(litellm_router_instance=self, model=deployment)
|
||||
client = self.cache.get_cache(key=cache_key, local_only=True)
|
||||
return client
|
||||
else:
|
||||
|
@ -4453,7 +3947,7 @@ class Router:
|
|||
"""
|
||||
Re-initialize the client
|
||||
"""
|
||||
self.set_client(model=deployment)
|
||||
set_client(litellm_router_instance=self, model=deployment)
|
||||
client = self.cache.get_cache(key=cache_key)
|
||||
return client
|
||||
else:
|
||||
|
@ -4463,7 +3957,7 @@ class Router:
|
|||
"""
|
||||
Re-initialize the client
|
||||
"""
|
||||
self.set_client(model=deployment)
|
||||
set_client(litellm_router_instance=self, model=deployment)
|
||||
client = self.cache.get_cache(key=cache_key)
|
||||
return client
|
||||
|
||||
|
|
495
litellm/router_utils/client_initalization_utils.py
Normal file
495
litellm/router_utils/client_initalization_utils.py
Normal file
|
@ -0,0 +1,495 @@
|
|||
import asyncio
|
||||
import os
|
||||
import traceback
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import httpx
|
||||
import openai
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_router_logger
|
||||
from litellm.llms.azure import get_azure_ad_token_from_oidc
|
||||
from litellm.utils import calculate_max_parallel_requests
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.router import Router as _Router
|
||||
|
||||
LitellmRouter = _Router
|
||||
else:
|
||||
LitellmRouter = Any
|
||||
|
||||
|
||||
def should_initialize_sync_client(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
) -> bool:
|
||||
"""
|
||||
Returns if Sync OpenAI, Azure Clients should be initialized.
|
||||
|
||||
Do not init sync clients when router.router_general_settings.async_only_mode is True
|
||||
|
||||
"""
|
||||
if litellm_router_instance is None:
|
||||
return False
|
||||
|
||||
if litellm_router_instance.router_general_settings is not None:
|
||||
if (
|
||||
hasattr(litellm_router_instance, "router_general_settings")
|
||||
and hasattr(
|
||||
litellm_router_instance.router_general_settings, "async_only_mode"
|
||||
)
|
||||
and litellm_router_instance.router_general_settings.async_only_mode is True
|
||||
):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def set_client(litellm_router_instance: LitellmRouter, model: dict):
|
||||
"""
|
||||
- Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
|
||||
- Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
|
||||
"""
|
||||
client_ttl = litellm_router_instance.client_ttl
|
||||
litellm_params = model.get("litellm_params", {})
|
||||
model_name = litellm_params.get("model")
|
||||
model_id = model["model_info"]["id"]
|
||||
# ### IF RPM SET - initialize a semaphore ###
|
||||
rpm = litellm_params.get("rpm", None)
|
||||
tpm = litellm_params.get("tpm", None)
|
||||
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
|
||||
calculated_max_parallel_requests = calculate_max_parallel_requests(
|
||||
rpm=rpm,
|
||||
max_parallel_requests=max_parallel_requests,
|
||||
tpm=tpm,
|
||||
default_max_parallel_requests=litellm_router_instance.default_max_parallel_requests,
|
||||
)
|
||||
if calculated_max_parallel_requests:
|
||||
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
|
||||
cache_key = f"{model_id}_max_parallel_requests_client"
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=semaphore,
|
||||
local_only=True,
|
||||
)
|
||||
|
||||
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
|
||||
custom_llm_provider = litellm_params.get("custom_llm_provider")
|
||||
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
|
||||
default_api_base = None
|
||||
default_api_key = None
|
||||
if custom_llm_provider in litellm.openai_compatible_providers:
|
||||
_, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
|
||||
model=model_name
|
||||
)
|
||||
default_api_base = api_base
|
||||
default_api_key = api_key
|
||||
|
||||
if (
|
||||
model_name in litellm.open_ai_chat_completion_models
|
||||
or custom_llm_provider in litellm.openai_compatible_providers
|
||||
or custom_llm_provider == "azure"
|
||||
or custom_llm_provider == "azure_text"
|
||||
or custom_llm_provider == "custom_openai"
|
||||
or custom_llm_provider == "openai"
|
||||
or custom_llm_provider == "text-completion-openai"
|
||||
or "ft:gpt-3.5-turbo" in model_name
|
||||
or model_name in litellm.open_ai_embedding_models
|
||||
):
|
||||
is_azure_ai_studio_model: bool = False
|
||||
if custom_llm_provider == "azure":
|
||||
if litellm.utils._is_non_openai_azure_model(model_name):
|
||||
is_azure_ai_studio_model = True
|
||||
custom_llm_provider = "openai"
|
||||
# remove azure prefx from model_name
|
||||
model_name = model_name.replace("azure/", "")
|
||||
# glorified / complicated reading of configs
|
||||
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
|
||||
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
|
||||
api_key = litellm_params.get("api_key") or default_api_key
|
||||
if api_key and isinstance(api_key, str) and api_key.startswith("os.environ/"):
|
||||
api_key_env_name = api_key.replace("os.environ/", "")
|
||||
api_key = litellm.get_secret(api_key_env_name)
|
||||
litellm_params["api_key"] = api_key
|
||||
|
||||
api_base = litellm_params.get("api_base")
|
||||
base_url = litellm_params.get("base_url")
|
||||
api_base = (
|
||||
api_base or base_url or default_api_base
|
||||
) # allow users to pass in `api_base` or `base_url` for azure
|
||||
if api_base and api_base.startswith("os.environ/"):
|
||||
api_base_env_name = api_base.replace("os.environ/", "")
|
||||
api_base = litellm.get_secret(api_base_env_name)
|
||||
litellm_params["api_base"] = api_base
|
||||
|
||||
## AZURE AI STUDIO MISTRAL CHECK ##
|
||||
"""
|
||||
Make sure api base ends in /v1/
|
||||
|
||||
if not, add it - https://github.com/BerriAI/litellm/issues/2279
|
||||
"""
|
||||
if (
|
||||
is_azure_ai_studio_model is True
|
||||
and api_base is not None
|
||||
and isinstance(api_base, str)
|
||||
and not api_base.endswith("/v1/")
|
||||
):
|
||||
# check if it ends with a trailing slash
|
||||
if api_base.endswith("/"):
|
||||
api_base += "v1/"
|
||||
elif api_base.endswith("/v1"):
|
||||
api_base += "/"
|
||||
else:
|
||||
api_base += "/v1/"
|
||||
|
||||
api_version = litellm_params.get("api_version")
|
||||
if api_version and api_version.startswith("os.environ/"):
|
||||
api_version_env_name = api_version.replace("os.environ/", "")
|
||||
api_version = litellm.get_secret(api_version_env_name)
|
||||
litellm_params["api_version"] = api_version
|
||||
|
||||
timeout = litellm_params.pop("timeout", None) or litellm.request_timeout
|
||||
if isinstance(timeout, str) and timeout.startswith("os.environ/"):
|
||||
timeout_env_name = timeout.replace("os.environ/", "")
|
||||
timeout = litellm.get_secret(timeout_env_name)
|
||||
litellm_params["timeout"] = timeout
|
||||
|
||||
stream_timeout = litellm_params.pop(
|
||||
"stream_timeout", timeout
|
||||
) # if no stream_timeout is set, default to timeout
|
||||
if isinstance(stream_timeout, str) and stream_timeout.startswith("os.environ/"):
|
||||
stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
|
||||
stream_timeout = litellm.get_secret(stream_timeout_env_name)
|
||||
litellm_params["stream_timeout"] = stream_timeout
|
||||
|
||||
max_retries = litellm_params.pop("max_retries", 0) # router handles retry logic
|
||||
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
|
||||
max_retries_env_name = max_retries.replace("os.environ/", "")
|
||||
max_retries = litellm.get_secret(max_retries_env_name)
|
||||
litellm_params["max_retries"] = max_retries
|
||||
|
||||
organization = litellm_params.get("organization", None)
|
||||
if isinstance(organization, str) and organization.startswith("os.environ/"):
|
||||
organization_env_name = organization.replace("os.environ/", "")
|
||||
organization = litellm.get_secret(organization_env_name)
|
||||
litellm_params["organization"] = organization
|
||||
|
||||
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
|
||||
if api_base is None or not isinstance(api_base, str):
|
||||
filtered_litellm_params = {
|
||||
k: v for k, v in model["litellm_params"].items() if k != "api_key"
|
||||
}
|
||||
_filtered_model = {
|
||||
"model_name": model["model_name"],
|
||||
"litellm_params": filtered_litellm_params,
|
||||
}
|
||||
raise ValueError(
|
||||
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
|
||||
)
|
||||
azure_ad_token = litellm_params.get("azure_ad_token")
|
||||
if azure_ad_token is not None:
|
||||
if azure_ad_token.startswith("oidc/"):
|
||||
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
|
||||
if api_version is None:
|
||||
api_version = litellm.AZURE_DEFAULT_API_VERSION
|
||||
|
||||
if "gateway.ai.cloudflare.com" in api_base:
|
||||
if not api_base.endswith("/"):
|
||||
api_base += "/"
|
||||
azure_model = model_name.replace("azure/", "")
|
||||
api_base += f"{azure_model}"
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI(
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
# streaming clients can have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
azure_ad_token=azure_ad_token,
|
||||
base_url=api_base,
|
||||
api_version=api_version,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
else:
|
||||
_api_key = api_key
|
||||
if _api_key is not None and isinstance(_api_key, str):
|
||||
# only show first 5 chars of api_key
|
||||
_api_key = _api_key[:8] + "*" * 15
|
||||
verbose_router_logger.debug(
|
||||
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
|
||||
)
|
||||
azure_client_params = {
|
||||
"api_key": api_key,
|
||||
"azure_endpoint": api_base,
|
||||
"api_version": api_version,
|
||||
"azure_ad_token": azure_ad_token,
|
||||
}
|
||||
from litellm.llms.azure import select_azure_base_url_or_endpoint
|
||||
|
||||
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
|
||||
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
|
||||
azure_client_params = select_azure_base_url_or_endpoint(
|
||||
azure_client_params
|
||||
)
|
||||
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncAzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.AzureOpenAI( # type: ignore
|
||||
**azure_client_params,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
),
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
else:
|
||||
_api_key = api_key # type: ignore
|
||||
if _api_key is not None and isinstance(_api_key, str):
|
||||
# only show first 5 chars of api_key
|
||||
_api_key = _api_key[:8] + "*" * 15
|
||||
verbose_router_logger.debug(
|
||||
f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
|
||||
)
|
||||
cache_key = f"{model_id}_async_client"
|
||||
_client = openai.AsyncOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
cache_key = f"{model_id}_client"
|
||||
_client = openai.OpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_async_client"
|
||||
_client = openai.AsyncOpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
http_client=httpx.AsyncClient(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
||||
|
||||
if should_initialize_sync_client(
|
||||
litellm_router_instance=litellm_router_instance
|
||||
):
|
||||
# streaming clients should have diff timeouts
|
||||
cache_key = f"{model_id}_stream_client"
|
||||
_client = openai.OpenAI( # type: ignore
|
||||
api_key=api_key,
|
||||
base_url=api_base,
|
||||
timeout=stream_timeout,
|
||||
max_retries=max_retries,
|
||||
organization=organization,
|
||||
http_client=httpx.Client(
|
||||
limits=httpx.Limits(
|
||||
max_connections=1000, max_keepalive_connections=100
|
||||
),
|
||||
verify=litellm.ssl_verify,
|
||||
), # type: ignore
|
||||
)
|
||||
litellm_router_instance.cache.set_cache(
|
||||
key=cache_key,
|
||||
value=_client,
|
||||
ttl=client_ttl,
|
||||
local_only=True,
|
||||
) # cache for 1 hr
|
File diff suppressed because one or more lines are too long
|
@ -1607,7 +1607,17 @@ def test_caching_redis_simple(caplog):
|
|||
print(m)
|
||||
print(time.time() - s2)
|
||||
|
||||
redis_async_caching_error = False
|
||||
redis_service_logging_error = False
|
||||
captured_logs = [rec.message for rec in caplog.records]
|
||||
|
||||
assert "LiteLLM Redis Caching: async set" not in captured_logs
|
||||
assert "ServiceLogging.async_service_success_hook" not in captured_logs
|
||||
print(f"captured_logs: {captured_logs}")
|
||||
for item in captured_logs:
|
||||
if "Error connecting to Async Redis client" in item:
|
||||
redis_async_caching_error = True
|
||||
|
||||
if "ServiceLogging.async_service_success_hook" in item:
|
||||
redis_service_logging_error = True
|
||||
|
||||
assert redis_async_caching_error is False
|
||||
assert redis_service_logging_error is False
|
||||
|
|
|
@ -712,7 +712,6 @@ def test_vertex_ai_claude_completion_cost():
|
|||
assert cost == predicted_cost
|
||||
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_completion_cost_hidden_params(sync_mode):
|
||||
|
@ -732,6 +731,7 @@ async def test_completion_cost_hidden_params(sync_mode):
|
|||
assert "response_cost" in response._hidden_params
|
||||
assert isinstance(response._hidden_params["response_cost"], float)
|
||||
|
||||
|
||||
def test_vertex_ai_gemini_predict_cost():
|
||||
model = "gemini-1.5-flash"
|
||||
messages = [{"role": "user", "content": "Hey, hows it going???"}]
|
||||
|
@ -739,3 +739,16 @@ def test_vertex_ai_gemini_predict_cost():
|
|||
|
||||
assert predictive_cost > 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
|
||||
def test_completion_cost_tts(model):
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
cost = completion_cost(
|
||||
model=model,
|
||||
prompt="the quick brown fox jumped over the lazy dogs",
|
||||
call_type="speech",
|
||||
)
|
||||
|
||||
assert cost > 0
|
||||
|
|
|
@ -2,23 +2,30 @@
|
|||
## Unit tests for ProxyConfig class
|
||||
|
||||
|
||||
import sys, os
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
import os, io
|
||||
import io
|
||||
import os
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest, litellm
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from litellm.proxy.proxy_server import ProxyConfig
|
||||
from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
|
||||
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
|
||||
from typing import Literal
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
import litellm
|
||||
from litellm.proxy.common_utils.encrypt_decrypt_utils import encrypt_value
|
||||
from litellm.proxy.proxy_server import ProxyConfig
|
||||
from litellm.proxy.utils import DualCache, ProxyLogging
|
||||
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
|
||||
|
||||
|
||||
class DBModel(BaseModel):
|
||||
model_id: str
|
||||
|
@ -28,6 +35,7 @@ class DBModel(BaseModel):
|
|||
|
||||
model_config = ConfigDict(protected_namespaces=())
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_deployment():
|
||||
"""
|
||||
|
|
|
@ -1,8 +1,13 @@
|
|||
# What is this?
|
||||
## Unit test for presidio pii masking
|
||||
import sys, os, asyncio, time, random
|
||||
from datetime import datetime
|
||||
import asyncio
|
||||
import os
|
||||
import random
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
@ -12,12 +17,40 @@ sys.path.insert(
|
|||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
|
||||
import litellm
|
||||
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
|
||||
from litellm import Router, mock_completion
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
|
||||
from litellm.proxy.utils import ProxyLogging
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"base_url",
|
||||
[
|
||||
"presidio-analyzer-s3pa:10000",
|
||||
"https://presidio-analyzer-s3pa:10000",
|
||||
"http://presidio-analyzer-s3pa:10000",
|
||||
],
|
||||
)
|
||||
def test_validate_environment_missing_http(base_url):
|
||||
pii_masking = _OPTIONAL_PresidioPIIMasking(mock_testing=True)
|
||||
|
||||
os.environ["PRESIDIO_ANALYZER_API_BASE"] = f"{base_url}/analyze"
|
||||
os.environ["PRESIDIO_ANONYMIZER_API_BASE"] = f"{base_url}/anonymize"
|
||||
pii_masking.validate_environment()
|
||||
|
||||
expected_url = base_url
|
||||
if not (base_url.startswith("https://") or base_url.startswith("http://")):
|
||||
expected_url = "http://" + base_url
|
||||
|
||||
assert (
|
||||
pii_masking.presidio_anonymizer_api_base == f"{expected_url}/anonymize/"
|
||||
), "Got={}, Expected={}".format(
|
||||
pii_masking.presidio_anonymizer_api_base, f"{expected_url}/anonymize/"
|
||||
)
|
||||
assert pii_masking.presidio_analyzer_api_base == f"{expected_url}/analyze/"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
|
@ -1894,6 +1894,49 @@ async def test_router_model_usage(mock_response):
|
|||
raise e
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Check if this is causing ci/cd issues.")
|
||||
@pytest.mark.asyncio
|
||||
async def test_is_proxy_set():
|
||||
"""
|
||||
Assert if proxy is set
|
||||
"""
|
||||
from httpx import AsyncHTTPTransport
|
||||
|
||||
os.environ["HTTPS_PROXY"] = "https://proxy.example.com:8080"
|
||||
from openai import AsyncAzureOpenAI
|
||||
|
||||
# Function to check if a proxy is set on the client
|
||||
# Function to check if a proxy is set on the client
|
||||
def check_proxy(client: httpx.AsyncClient) -> bool:
|
||||
print(f"client._mounts: {client._mounts}")
|
||||
assert len(client._mounts) == 1
|
||||
for k, v in client._mounts.items():
|
||||
assert isinstance(v, AsyncHTTPTransport)
|
||||
return True
|
||||
|
||||
llm_router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-4",
|
||||
"litellm_params": {
|
||||
"model": "azure/gpt-3.5-turbo",
|
||||
"api_key": "my-key",
|
||||
"api_base": "my-base",
|
||||
"mock_response": "hello world",
|
||||
},
|
||||
"model_info": {"id": "1"},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
_deployment = llm_router.get_deployment(model_id="1")
|
||||
model_client: AsyncAzureOpenAI = llm_router._get_client(
|
||||
deployment=_deployment, kwargs={}, client_type="async"
|
||||
) # type: ignore
|
||||
|
||||
assert check_proxy(client=model_client._client)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, base_model, llm_provider",
|
||||
[
|
||||
|
|
|
@ -1,16 +1,22 @@
|
|||
# this tests if the router is initialized correctly
|
||||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from collections import defaultdict
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from collections import defaultdict
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
@ -24,6 +30,7 @@ load_dotenv()
|
|||
def test_init_clients():
|
||||
litellm.set_verbose = True
|
||||
import logging
|
||||
|
||||
from litellm._logging import verbose_router_logger
|
||||
|
||||
verbose_router_logger.setLevel(logging.DEBUG)
|
||||
|
@ -489,6 +496,7 @@ def test_init_clients_azure_command_r_plus():
|
|||
# For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
|
||||
litellm.set_verbose = True
|
||||
import logging
|
||||
|
||||
from litellm._logging import verbose_router_logger
|
||||
|
||||
verbose_router_logger.setLevel(logging.DEBUG)
|
||||
|
@ -585,3 +593,46 @@ async def test_text_completion_with_organization():
|
|||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
def test_init_clients_async_mode():
|
||||
litellm.set_verbose = True
|
||||
import logging
|
||||
|
||||
from litellm._logging import verbose_router_logger
|
||||
from litellm.types.router import RouterGeneralSettings
|
||||
|
||||
verbose_router_logger.setLevel(logging.DEBUG)
|
||||
try:
|
||||
print("testing init 4 clients with diff timeouts")
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_version": os.getenv("AZURE_API_VERSION"),
|
||||
"api_base": os.getenv("AZURE_API_BASE"),
|
||||
"timeout": 0.01,
|
||||
"stream_timeout": 0.000_001,
|
||||
"max_retries": 7,
|
||||
},
|
||||
},
|
||||
]
|
||||
router = Router(
|
||||
model_list=model_list,
|
||||
set_verbose=True,
|
||||
router_general_settings=RouterGeneralSettings(async_only_mode=True),
|
||||
)
|
||||
for elem in router.model_list:
|
||||
model_id = elem["model_info"]["id"]
|
||||
|
||||
# sync clients not initialized in async_only_mode=True
|
||||
assert router.cache.get_cache(f"{model_id}_client") is None
|
||||
assert router.cache.get_cache(f"{model_id}_stream_client") is None
|
||||
|
||||
# only async clients initialized in async_only_mode=True
|
||||
assert router.cache.get_cache(f"{model_id}_async_client") is not None
|
||||
assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
|
|
@ -1,15 +1,22 @@
|
|||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
from litellm import completion, stream_chunk_builder
|
||||
import litellm
|
||||
import os, dotenv
|
||||
from openai import OpenAI
|
||||
import os
|
||||
|
||||
import dotenv
|
||||
import pytest
|
||||
from openai import OpenAI
|
||||
|
||||
import litellm
|
||||
from litellm import completion, stream_chunk_builder
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
@ -147,3 +154,45 @@ def test_stream_chunk_builder_litellm_tool_call_regular_message():
|
|||
|
||||
|
||||
# test_stream_chunk_builder_litellm_tool_call_regular_message()
|
||||
|
||||
|
||||
def test_stream_chunk_builder_litellm_usage_chunks():
|
||||
"""
|
||||
Checks if stream_chunk_builder is able to correctly rebuild with given metadata from streaming chunks
|
||||
"""
|
||||
messages = [
|
||||
{"role": "user", "content": "Tell me the funniest joke you know."},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Why did the chicken cross the road?\nYou will not guess this one I bet\n",
|
||||
},
|
||||
{"role": "user", "content": "I do not know, why?"},
|
||||
{"role": "assistant", "content": "uhhhh\n\n\nhmmmm.....\nthinking....\n"},
|
||||
{"role": "user", "content": "\nI am waiting...\n\n...\n"},
|
||||
]
|
||||
# make a regular gemini call
|
||||
response = completion(
|
||||
model="gemini/gemini-1.5-flash",
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
usage: litellm.Usage = response.usage
|
||||
|
||||
gemini_pt = usage.prompt_tokens
|
||||
|
||||
# make a streaming gemini call
|
||||
response = completion(
|
||||
model="gemini/gemini-1.5-flash",
|
||||
messages=messages,
|
||||
stream=True,
|
||||
complete_response=True,
|
||||
stream_options={"include_usage": True},
|
||||
)
|
||||
|
||||
usage: litellm.Usage = response.usage
|
||||
|
||||
stream_rebuilt_pt = usage.prompt_tokens
|
||||
|
||||
# assert prompt tokens are the same
|
||||
|
||||
assert gemini_pt == stream_rebuilt_pt
|
||||
|
|
|
@ -12,6 +12,9 @@ from typing import Tuple
|
|||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
import litellm.litellm_core_utils
|
||||
import litellm.litellm_core_utils.litellm_logging
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
@ -1078,7 +1081,6 @@ def test_vertex_ai_stream(provider):
|
|||
print(f"completion_response: {complete_response}")
|
||||
assert is_finished == True
|
||||
|
||||
assert False
|
||||
except litellm.RateLimitError as e:
|
||||
pass
|
||||
except Exception as e:
|
||||
|
@ -3034,8 +3036,11 @@ def test_completion_claude_3_function_call_with_streaming():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model", ["gemini/gemini-1.5-flash"]
|
||||
) # "claude-3-opus-20240229",
|
||||
@pytest.mark.asyncio
|
||||
async def test_acompletion_claude_3_function_call_with_streaming():
|
||||
async def test_acompletion_claude_3_function_call_with_streaming(model):
|
||||
litellm.set_verbose = True
|
||||
tools = [
|
||||
{
|
||||
|
@ -3066,7 +3071,7 @@ async def test_acompletion_claude_3_function_call_with_streaming():
|
|||
try:
|
||||
# test without max tokens
|
||||
response = await acompletion(
|
||||
model="claude-3-opus-20240229",
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
tool_choice="required",
|
||||
|
@ -3453,3 +3458,55 @@ def test_aamazing_unit_test_custom_stream_wrapper_n():
|
|||
assert (
|
||||
chunk_dict == chunks[idx]
|
||||
), f"idx={idx} translated chunk = {chunk_dict} != openai chunk = {chunks[idx]}"
|
||||
|
||||
|
||||
def test_unit_test_custom_stream_wrapper_function_call():
|
||||
"""
|
||||
Test if model returns a tool call, the finish reason is correctly set to 'tool_calls'
|
||||
"""
|
||||
from litellm.types.llms.openai import ChatCompletionDeltaChunk
|
||||
|
||||
litellm.set_verbose = False
|
||||
delta: ChatCompletionDeltaChunk = {
|
||||
"content": None,
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"function": {"arguments": '"}'},
|
||||
"type": "function",
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
}
|
||||
chunk = {
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion.chunk",
|
||||
"created": 1694268190,
|
||||
"model": "gpt-3.5-turbo-0125",
|
||||
"system_fingerprint": "fp_44709d6fcb",
|
||||
"choices": [{"index": 0, "delta": delta, "finish_reason": "stop"}],
|
||||
}
|
||||
chunk = litellm.ModelResponse(**chunk, stream=True)
|
||||
|
||||
completion_stream = ModelResponseIterator(model_response=chunk)
|
||||
|
||||
response = litellm.CustomStreamWrapper(
|
||||
completion_stream=completion_stream,
|
||||
model="gpt-3.5-turbo",
|
||||
custom_llm_provider="cached_response",
|
||||
logging_obj=litellm.litellm_core_utils.litellm_logging.Logging(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey"}],
|
||||
stream=True,
|
||||
call_type="completion",
|
||||
start_time=time.time(),
|
||||
litellm_call_id="12345",
|
||||
function_id="1245",
|
||||
),
|
||||
)
|
||||
|
||||
finish_reason: Optional[str] = None
|
||||
for chunk in response:
|
||||
if chunk.choices[0].finish_reason is not None:
|
||||
finish_reason = chunk.choices[0].finish_reason
|
||||
assert finish_reason == "tool_calls"
|
||||
|
|
|
@ -300,7 +300,7 @@ class ListBatchRequest(TypedDict, total=False):
|
|||
timeout: Optional[float]
|
||||
|
||||
|
||||
class ChatCompletionToolCallFunctionChunk(TypedDict):
|
||||
class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
|
||||
name: Optional[str]
|
||||
arguments: str
|
||||
|
||||
|
@ -312,7 +312,7 @@ class ChatCompletionToolCallChunk(TypedDict):
|
|||
index: int
|
||||
|
||||
|
||||
class ChatCompletionDeltaToolCallChunk(TypedDict):
|
||||
class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
|
||||
id: str
|
||||
type: Literal["function"]
|
||||
function: ChatCompletionToolCallFunctionChunk
|
||||
|
|
|
@ -324,7 +324,12 @@ class DeploymentTypedDict(TypedDict):
|
|||
litellm_params: LiteLLMParamsTypedDict
|
||||
|
||||
|
||||
SPECIAL_MODEL_INFO_PARAMS = ["input_cost_per_token", "output_cost_per_token"]
|
||||
SPECIAL_MODEL_INFO_PARAMS = [
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"input_cost_per_character",
|
||||
"output_cost_per_character",
|
||||
]
|
||||
|
||||
|
||||
class Deployment(BaseModel):
|
||||
|
@ -517,3 +522,9 @@ class CustomRoutingStrategyBase:
|
|||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class RouterGeneralSettings(BaseModel):
|
||||
async_only_mode: bool = Field(
|
||||
default=False
|
||||
) # this will only initialize async clients. Good for memory utils
|
||||
|
|
|
@ -42,6 +42,8 @@ import httpx
|
|||
import openai
|
||||
import requests
|
||||
import tiktoken
|
||||
from httpx import Proxy
|
||||
from httpx._utils import get_environment_proxies
|
||||
from pydantic import BaseModel
|
||||
from tokenizers import Tokenizer
|
||||
|
||||
|
@ -2555,6 +2557,24 @@ def get_optional_params(
|
|||
message=f"Function calling is not supported by {custom_llm_provider}.",
|
||||
)
|
||||
|
||||
if "tools" in non_default_params:
|
||||
tools = non_default_params["tools"]
|
||||
for (
|
||||
tool
|
||||
) in (
|
||||
tools
|
||||
): # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
|
||||
tool_function = tool.get("function", {})
|
||||
parameters = tool_function.get("parameters", None)
|
||||
if parameters is not None:
|
||||
new_parameters = copy.deepcopy(parameters)
|
||||
if (
|
||||
"additionalProperties" in new_parameters
|
||||
and new_parameters["additionalProperties"] is False
|
||||
):
|
||||
new_parameters.pop("additionalProperties", None)
|
||||
tool_function["parameters"] = new_parameters
|
||||
|
||||
def _check_valid_arg(supported_params):
|
||||
verbose_logger.debug(
|
||||
f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}"
|
||||
|
@ -4707,7 +4727,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
|||
)
|
||||
except Exception:
|
||||
raise Exception(
|
||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
|
||||
model, custom_llm_provider
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
@ -4893,6 +4915,34 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
|
|||
return []
|
||||
|
||||
|
||||
def create_proxy_transport_and_mounts():
|
||||
proxies = {
|
||||
key: None if url is None else Proxy(url=url)
|
||||
for key, url in get_environment_proxies().items()
|
||||
}
|
||||
|
||||
sync_proxy_mounts = {}
|
||||
async_proxy_mounts = {}
|
||||
|
||||
# Retrieve NO_PROXY environment variable
|
||||
no_proxy = os.getenv("NO_PROXY", None)
|
||||
no_proxy_urls = no_proxy.split(",") if no_proxy else []
|
||||
|
||||
for key, proxy in proxies.items():
|
||||
if proxy is None:
|
||||
sync_proxy_mounts[key] = httpx.HTTPTransport()
|
||||
async_proxy_mounts[key] = httpx.AsyncHTTPTransport()
|
||||
else:
|
||||
sync_proxy_mounts[key] = httpx.HTTPTransport(proxy=proxy)
|
||||
async_proxy_mounts[key] = httpx.AsyncHTTPTransport(proxy=proxy)
|
||||
|
||||
for url in no_proxy_urls:
|
||||
sync_proxy_mounts[url] = httpx.HTTPTransport()
|
||||
async_proxy_mounts[url] = httpx.AsyncHTTPTransport()
|
||||
|
||||
return sync_proxy_mounts, async_proxy_mounts
|
||||
|
||||
|
||||
def validate_environment(model: Optional[str] = None) -> dict:
|
||||
"""
|
||||
Checks if the environment variables are valid for the given model.
|
||||
|
@ -7519,7 +7569,7 @@ def exception_type(
|
|||
if original_exception.status_code == 400:
|
||||
exception_mapping_worked = True
|
||||
raise BadRequestError(
|
||||
message=f"{exception_provider} - {message}",
|
||||
message=f"{exception_provider} - {error_str}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
response=original_exception.response,
|
||||
|
@ -7528,7 +7578,7 @@ def exception_type(
|
|||
elif original_exception.status_code == 401:
|
||||
exception_mapping_worked = True
|
||||
raise AuthenticationError(
|
||||
message=f"AuthenticationError: {exception_provider} - {message}",
|
||||
message=f"AuthenticationError: {exception_provider} - {error_str}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
response=original_exception.response,
|
||||
|
@ -7537,7 +7587,7 @@ def exception_type(
|
|||
elif original_exception.status_code == 404:
|
||||
exception_mapping_worked = True
|
||||
raise NotFoundError(
|
||||
message=f"NotFoundError: {exception_provider} - {message}",
|
||||
message=f"NotFoundError: {exception_provider} - {error_str}",
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
response=original_exception.response,
|
||||
|
@ -7546,7 +7596,7 @@ def exception_type(
|
|||
elif original_exception.status_code == 408:
|
||||
exception_mapping_worked = True
|
||||
raise Timeout(
|
||||
message=f"Timeout Error: {exception_provider} - {message}",
|
||||
message=f"Timeout Error: {exception_provider} - {error_str}",
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
litellm_debug_info=extra_information,
|
||||
|
@ -7554,7 +7604,7 @@ def exception_type(
|
|||
elif original_exception.status_code == 422:
|
||||
exception_mapping_worked = True
|
||||
raise BadRequestError(
|
||||
message=f"BadRequestError: {exception_provider} - {message}",
|
||||
message=f"BadRequestError: {exception_provider} - {error_str}",
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
response=original_exception.response,
|
||||
|
@ -7563,7 +7613,7 @@ def exception_type(
|
|||
elif original_exception.status_code == 429:
|
||||
exception_mapping_worked = True
|
||||
raise RateLimitError(
|
||||
message=f"RateLimitError: {exception_provider} - {message}",
|
||||
message=f"RateLimitError: {exception_provider} - {error_str}",
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
response=original_exception.response,
|
||||
|
@ -7572,7 +7622,7 @@ def exception_type(
|
|||
elif original_exception.status_code == 503:
|
||||
exception_mapping_worked = True
|
||||
raise ServiceUnavailableError(
|
||||
message=f"ServiceUnavailableError: {exception_provider} - {message}",
|
||||
message=f"ServiceUnavailableError: {exception_provider} - {error_str}",
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
response=original_exception.response,
|
||||
|
@ -7581,7 +7631,7 @@ def exception_type(
|
|||
elif original_exception.status_code == 504: # gateway timeout error
|
||||
exception_mapping_worked = True
|
||||
raise Timeout(
|
||||
message=f"Timeout Error: {exception_provider} - {message}",
|
||||
message=f"Timeout Error: {exception_provider} - {error_str}",
|
||||
model=model,
|
||||
llm_provider=custom_llm_provider,
|
||||
litellm_debug_info=extra_information,
|
||||
|
@ -7590,7 +7640,7 @@ def exception_type(
|
|||
exception_mapping_worked = True
|
||||
raise APIError(
|
||||
status_code=original_exception.status_code,
|
||||
message=f"APIError: {exception_provider} - {message}",
|
||||
message=f"APIError: {exception_provider} - {error_str}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
request=original_exception.request,
|
||||
|
@ -7599,7 +7649,7 @@ def exception_type(
|
|||
else:
|
||||
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
|
||||
raise APIConnectionError(
|
||||
message=f"APIConnectionError: {exception_provider} - {message}",
|
||||
message=f"APIConnectionError: {exception_provider} - {error_str}",
|
||||
llm_provider=custom_llm_provider,
|
||||
model=model,
|
||||
litellm_debug_info=extra_information,
|
||||
|
@ -7950,6 +8000,7 @@ class CustomStreamWrapper:
|
|||
)
|
||||
self.messages = getattr(logging_obj, "messages", None)
|
||||
self.sent_stream_usage = False
|
||||
self.tool_call = False
|
||||
self.chunks: List = (
|
||||
[]
|
||||
) # keep track of the returned chunks - used for calculating the input/output tokens for stream options
|
||||
|
@ -9192,9 +9243,16 @@ class CustomStreamWrapper:
|
|||
"is_finished": True,
|
||||
"finish_reason": chunk.choices[0].finish_reason,
|
||||
"original_chunk": chunk,
|
||||
"tool_calls": (
|
||||
chunk.choices[0].delta.tool_calls
|
||||
if hasattr(chunk.choices[0].delta, "tool_calls")
|
||||
else None
|
||||
),
|
||||
}
|
||||
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
if response_obj["tool_calls"] is not None:
|
||||
completion_obj["tool_calls"] = response_obj["tool_calls"]
|
||||
print_verbose(f"completion obj content: {completion_obj['content']}")
|
||||
if hasattr(chunk, "id"):
|
||||
model_response.id = chunk.id
|
||||
|
@ -9352,6 +9410,10 @@ class CustomStreamWrapper:
|
|||
)
|
||||
print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
|
||||
|
||||
## CHECK FOR TOOL USE
|
||||
if "tool_calls" in completion_obj and len(completion_obj["tool_calls"]) > 0:
|
||||
self.tool_call = True
|
||||
|
||||
## RETURN ARG
|
||||
if (
|
||||
"content" in completion_obj
|
||||
|
@ -9530,6 +9592,12 @@ class CustomStreamWrapper:
|
|||
)
|
||||
else:
|
||||
model_response.choices[0].finish_reason = "stop"
|
||||
|
||||
## if tool use
|
||||
if (
|
||||
model_response.choices[0].finish_reason == "stop" and self.tool_call
|
||||
): # don't overwrite for other - potential error finish reasons
|
||||
model_response.choices[0].finish_reason = "tool_calls"
|
||||
return model_response
|
||||
|
||||
def __next__(self):
|
||||
|
@ -9583,7 +9651,7 @@ class CustomStreamWrapper:
|
|||
return response
|
||||
|
||||
except StopIteration:
|
||||
if self.sent_last_chunk == True:
|
||||
if self.sent_last_chunk is True:
|
||||
if (
|
||||
self.sent_stream_usage == False
|
||||
and self.stream_options is not None
|
||||
|
|
|
@ -2022,10 +2022,10 @@
|
|||
"max_tokens": 8192,
|
||||
"max_input_tokens": 2097152,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
||||
"output_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
||||
"input_cost_per_token": 0.0000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.000007,
|
||||
"output_cost_per_token": 0.0000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000021,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -2033,16 +2033,16 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models"
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-1.5-pro-latest": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 8192,
|
||||
"input_cost_per_token": 0.00000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000007,
|
||||
"input_cost_per_token": 0.0000035,
|
||||
"input_cost_per_token_above_128k_tokens": 0.000007,
|
||||
"output_cost_per_token": 0.00000105,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000021,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000021,
|
||||
"litellm_provider": "gemini",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -2050,7 +2050,7 @@
|
|||
"supports_vision": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_response_schema": true,
|
||||
"source": "https://ai.google.dev/models/gemini"
|
||||
"source": "https://ai.google.dev/pricing"
|
||||
},
|
||||
"gemini/gemini-pro-vision": {
|
||||
"max_tokens": 2048,
|
||||
|
|
32
poetry.lock
generated
32
poetry.lock
generated
|
@ -1,4 +1,4 @@
|
|||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
|
@ -2115,6 +2115,32 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
|
|||
docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
|
||||
tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pynacl"
|
||||
version = "1.5.0"
|
||||
description = "Python binding to the Networking and Cryptography (NaCl) library"
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
files = [
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"},
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"},
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"},
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"},
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"},
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"},
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"},
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"},
|
||||
{file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"},
|
||||
{file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
cffi = ">=1.4.1"
|
||||
|
||||
[package.extras]
|
||||
docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
|
||||
tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.4.4"
|
||||
|
@ -3381,10 +3407,10 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link
|
|||
testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
|
||||
|
||||
[extras]
|
||||
extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "resend"]
|
||||
extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "pynacl", "resend"]
|
||||
proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "python-multipart", "pyyaml", "rq", "uvicorn"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||
content-hash = "925b604bed171282827c8b046191ad858ce37fa3b011a393345382f8ff86e68c"
|
||||
content-hash = "6025cae7749c94755d17362f77adf76f834863dba2126501cd3111d53a9c5779"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.41.8"
|
||||
version = "1.41.11"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -46,6 +46,7 @@ azure-identity = {version = "^1.15.0", optional = true}
|
|||
azure-keyvault-secrets = {version = "^4.8.0", optional = true}
|
||||
google-cloud-kms = {version = "^2.21.3", optional = true}
|
||||
resend = {version = "^0.8.0", optional = true}
|
||||
pynacl = {version = "^1.5.0", optional = true}
|
||||
|
||||
[tool.poetry.extras]
|
||||
proxy = [
|
||||
|
@ -90,7 +91,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.41.8"
|
||||
version = "1.41.11"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
|
@ -42,7 +42,7 @@ tokenizers==0.14.0 # for calculating usage
|
|||
click==8.1.7 # for proxy cli
|
||||
jinja2==3.1.4 # for prompt templates
|
||||
certifi==2024.7.4 # [TODO] clean up
|
||||
aiohttp==3.9.0 # for network calls
|
||||
aiohttp==3.9.4 # for network calls
|
||||
aioboto3==12.3.0 # for async sagemaker calls
|
||||
tenacity==8.2.3 # for retrying requests, when litellm.num_retries set
|
||||
pydantic==2.7.1 # proxy + openai req.
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -1 +1 @@
|
|||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
||||
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>
|
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-da7d95729f2529b5.js"],""]
|
||||
3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-906d7dd6a5bf7be4.js","931","static/chunks/app/page-567f85145e7f0f35.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
|
||||
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-906d7dd6a5bf7be4.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,7 +1,7 @@
|
|||
2:I[77831,[],""]
|
||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
|
||||
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-906d7dd6a5bf7be4.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
|
||||
4:I[5613,[],""]
|
||||
5:I[31778,[],""]
|
||||
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
|
||||
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
|
||||
1:null
|
||||
|
|
|
@ -743,7 +743,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
|
|||
}
|
||||
|
||||
const fetchModelMap = async () => {
|
||||
const data = await modelCostMap();
|
||||
const data = await modelCostMap(accessToken);
|
||||
console.log(`received model cost map data: ${Object.keys(data)}`);
|
||||
setModelMap(data);
|
||||
};
|
||||
|
|
|
@ -12,11 +12,19 @@ export interface Model {
|
|||
model_info: Object | null;
|
||||
}
|
||||
|
||||
export const modelCostMap = async () => {
|
||||
export const modelCostMap = async (
|
||||
accessToken: string,
|
||||
) => {
|
||||
try {
|
||||
const url = proxyBaseUrl ? `${proxyBaseUrl}/get/litellm_model_cost_map` : `/get/litellm_model_cost_map`;
|
||||
const response = await fetch(
|
||||
url
|
||||
url, {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
}
|
||||
);
|
||||
const jsonData = await response.json();
|
||||
console.log(`received litellm model cost data: ${jsonData}`);
|
||||
|
@ -693,6 +701,9 @@ export const claimOnboardingToken = async (
|
|||
throw error;
|
||||
}
|
||||
};
|
||||
let ModelListerrorShown = false;
|
||||
let errorTimer: NodeJS.Timeout | null = null;
|
||||
|
||||
export const modelInfoCall = async (
|
||||
accessToken: String,
|
||||
userID: String,
|
||||
|
@ -714,8 +725,21 @@ export const modelInfoCall = async (
|
|||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 10);
|
||||
let errorData = await response.text();
|
||||
errorData += `error shown=${ModelListerrorShown}`
|
||||
if (!ModelListerrorShown) {
|
||||
if (errorData.includes("No model list passed")) {
|
||||
errorData = "No Models Exist. Click Add Model to get started.";
|
||||
}
|
||||
message.info(errorData, 10);
|
||||
ModelListerrorShown = true;
|
||||
|
||||
if (errorTimer) clearTimeout(errorTimer);
|
||||
errorTimer = setTimeout(() => {
|
||||
ModelListerrorShown = false;
|
||||
}, 10000);
|
||||
}
|
||||
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
@ -750,7 +774,6 @@ export const modelHubCall = async (accessToken: String) => {
|
|||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.text();
|
||||
message.error(errorData, 10);
|
||||
throw new Error("Network response was not ok");
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,6 @@ import {
|
|||
allTagNamesCall,
|
||||
modelMetricsCall,
|
||||
modelAvailableCall,
|
||||
modelInfoCall,
|
||||
adminspendByProvider,
|
||||
adminGlobalActivity,
|
||||
adminGlobalActivityPerModel,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue