Merge branch 'BerriAI:main' into main

This commit is contained in:
Simon S. Viloria 2024-07-07 18:00:11 +02:00 committed by GitHub
commit d54d4b6734
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
81 changed files with 2022 additions and 1231 deletions

View file

@ -289,7 +289,8 @@ jobs:
repo: context.repo.repo, repo: context.repo.repo,
release_id: process.env.RELEASE_ID, release_id: process.env.RELEASE_ID,
}); });
return response.data.body; const formattedBody = JSON.stringify(response.data.body).slice(1, -1);
return formattedBody;
} catch (error) { } catch (error) {
core.setFailed(error.message); core.setFailed(error.message);
} }
@ -302,14 +303,15 @@ jobs:
RELEASE_NOTES: ${{ steps.release-notes.outputs.result }} RELEASE_NOTES: ${{ steps.release-notes.outputs.result }}
run: | run: |
curl -H "Content-Type: application/json" -X POST -d '{ curl -H "Content-Type: application/json" -X POST -d '{
"content": "New LiteLLM release ${{ env.RELEASE_TAG }}", "content": "New LiteLLM release '"${RELEASE_TAG}"'",
"username": "Release Changelog", "username": "Release Changelog",
"avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png", "avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png",
"embeds": [ "embeds": [
{ {
"title": "Changelog for LiteLLM ${{ env.RELEASE_TAG }}", "title": "Changelog for LiteLLM '"${RELEASE_TAG}"'",
"description": "${{ env.RELEASE_NOTES }}", "description": "'"${RELEASE_NOTES}"'",
"color": 2105893 "color": 2105893
} }
] ]
}' $WEBHOOK_URL }' $WEBHOOK_URL

View file

@ -25,6 +25,10 @@ repos:
exclude: ^litellm/tests/|^litellm/proxy/tests/ exclude: ^litellm/tests/|^litellm/proxy/tests/
additional_dependencies: [flake8-print] additional_dependencies: [flake8-print]
files: litellm/.*\.py files: litellm/.*\.py
- repo: https://github.com/python-poetry/poetry
rev: 1.8.0
hooks:
- id: poetry-check
- repo: local - repo: local
hooks: hooks:
- id: check-files-match - id: check-files-match

View file

@ -151,12 +151,9 @@ Navigate to the Usage Tab on the LiteLLM UI (found on https://your-proxy-endpoin
</Tabs> </Tabs>
## ✨ (Enterprise) API Endpoints to get Spend ## ✨ (Enterprise) API Endpoints to get Spend
#### Getting Spend Reports - To Charge Other Teams, Customers #### Getting Spend Reports - To Charge Other Teams, Customers, Users
Use the `/global/spend/report` endpoint to get daily spend report per Use the `/global/spend/report` endpoint to get spend reports
- Team
- Customer [this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
- [LiteLLM API key](virtual_keys.md)
<Tabs> <Tabs>
@ -285,6 +282,16 @@ Output from script
<TabItem value="per customer" label="Spend Per Customer"> <TabItem value="per customer" label="Spend Per Customer">
:::info
Customer This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
[this is `user` passed to `/chat/completions` request](#how-to-track-spend-with-litellm)
- [LiteLLM API key](virtual_keys.md)
:::
##### Example Request ##### Example Request
👉 Key Change: Specify `group_by=customer` 👉 Key Change: Specify `group_by=customer`
@ -341,14 +348,14 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
</TabItem> </TabItem>
<TabItem value="per key" label="Spend Per API Key"> <TabItem value="per key" label="Spend for Specific API Key">
👉 Key Change: Specify `group_by=api_key` 👉 Key Change: Specify `api_key=sk-1234`
```shell ```shell
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&group_by=api_key' \ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-06-30&api_key=sk-1234' \
-H 'Authorization: Bearer sk-1234' -H 'Authorization: Bearer sk-1234'
``` ```
@ -357,32 +364,18 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
```shell ```shell
[ [
{
"api_key": "ad64768847d05d978d62f623d872bff0f9616cc14b9c1e651c84d14fe3b9f539",
"total_cost": 0.0002157,
"total_input_tokens": 45.0,
"total_output_tokens": 1375.0,
"model_details": [
{
"model": "gpt-3.5-turbo",
"total_cost": 0.0001095,
"total_input_tokens": 9,
"total_output_tokens": 70
},
{
"model": "llama3-8b-8192",
"total_cost": 0.0001062,
"total_input_tokens": 36,
"total_output_tokens": 1305
}
]
},
{ {
"api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b", "api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"total_cost": 0.00012924, "total_cost": 0.3201286305151999,
"total_input_tokens": 36.0, "total_input_tokens": 36.0,
"total_output_tokens": 1593.0, "total_output_tokens": 1593.0,
"model_details": [ "model_details": [
{
"model": "dall-e-3",
"total_cost": 0.31999939051519993,
"total_input_tokens": 0,
"total_output_tokens": 0
},
{ {
"model": "llama3-8b-8192", "model": "llama3-8b-8192",
"total_cost": 0.00012924, "total_cost": 0.00012924,
@ -396,6 +389,87 @@ curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end
</TabItem> </TabItem>
<TabItem value="per user" label="Spend for Internal User (Key Owner)">
:::info
Internal User (Key Owner): This is the value of `user_id` passed when calling [`/key/generate`](https://litellm-api.up.railway.app/#/key%20management/generate_key_fn_key_generate_post)
:::
👉 Key Change: Specify `internal_user_id=ishaan`
```shell
curl -X GET 'http://localhost:4000/global/spend/report?start_date=2024-04-01&end_date=2024-12-30&internal_user_id=ishaan' \
-H 'Authorization: Bearer sk-1234'
```
##### Example Response
```shell
[
{
"api_key": "88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",
"total_cost": 0.00013132,
"total_input_tokens": 105.0,
"total_output_tokens": 872.0,
"model_details": [
{
"model": "gpt-3.5-turbo-instruct",
"total_cost": 5.85e-05,
"total_input_tokens": 15,
"total_output_tokens": 18
},
{
"model": "llama3-8b-8192",
"total_cost": 7.282000000000001e-05,
"total_input_tokens": 90,
"total_output_tokens": 854
}
]
},
{
"api_key": "151e85e46ab8c9c7fad090793e3fe87940213f6ae665b543ca633b0b85ba6dc6",
"total_cost": 5.2699999999999993e-05,
"total_input_tokens": 26.0,
"total_output_tokens": 27.0,
"model_details": [
{
"model": "gpt-3.5-turbo",
"total_cost": 5.2499999999999995e-05,
"total_input_tokens": 24,
"total_output_tokens": 27
},
{
"model": "text-embedding-ada-002",
"total_cost": 2e-07,
"total_input_tokens": 2,
"total_output_tokens": 0
}
]
},
{
"api_key": "60cb83a2dcbf13531bd27a25f83546ecdb25a1a6deebe62d007999dc00e1e32a",
"total_cost": 9.42e-06,
"total_input_tokens": 30.0,
"total_output_tokens": 99.0,
"model_details": [
{
"model": "llama3-8b-8192",
"total_cost": 9.42e-06,
"total_input_tokens": 30,
"total_output_tokens": 99
}
]
}
]
```
</TabItem>
</Tabs> </Tabs>
#### Allowing Non-Proxy Admins to access `/spend` endpoints #### Allowing Non-Proxy Admins to access `/spend` endpoints

View file

@ -1120,12 +1120,14 @@ This is a beta feature, and subject to changes.
USE_AWS_KMS="True" USE_AWS_KMS="True"
``` ```
**Step 2.** Add `aws_kms/` to encrypted keys in env **Step 2.** Add `LITELLM_SECRET_AWS_KMS_` to encrypted keys in env
```env ```env
DATABASE_URL="aws_kms/AQICAH.." LITELLM_SECRET_AWS_KMS_DATABASE_URL="AQICAH.."
``` ```
LiteLLM will find this and use the decrypted `DATABASE_URL="postgres://.."` value in runtime.
**Step 3.** Start proxy **Step 3.** Start proxy
``` ```

View file

@ -1,7 +1,7 @@
import Tabs from '@theme/Tabs'; import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem'; import TabItem from '@theme/TabItem';
# Use with Langchain, OpenAI SDK, LlamaIndex, Curl # Use with Langchain, OpenAI SDK, LlamaIndex, Instructor, Curl
:::info :::info
@ -173,6 +173,37 @@ console.log(message);
``` ```
</TabItem>
<TabItem value="instructor" label="Instructor">
```python
from openai import OpenAI
import instructor
from pydantic import BaseModel
my_proxy_api_key = "" # e.g. sk-1234
my_proxy_base_url = "" # e.g. http://0.0.0.0:4000
# This enables response_model keyword
# from client.chat.completions.create
client = instructor.from_openai(OpenAI(api_key=my_proxy_api_key, base_url=my_proxy_base_url))
class UserDetail(BaseModel):
name: str
age: int
user = client.chat.completions.create(
model="gemini-pro-flash",
response_model=UserDetail,
messages=[
{"role": "user", "content": "Extract Jason is 25 years old"},
]
)
assert isinstance(user, UserDetail)
assert user.name == "Jason"
assert user.age == 25
```
</TabItem> </TabItem>
</Tabs> </Tabs>
@ -205,6 +236,97 @@ console.log(message);
``` ```
### Function Calling
Here's some examples of doing function calling with the proxy.
You can use the proxy for function calling with **any** openai-compatible project.
<Tabs>
<TabItem value="curl" label="curl">
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $OPTIONAL_YOUR_PROXY_KEY" \
-d '{
"model": "gpt-4-turbo",
"messages": [
{
"role": "user",
"content": "What'\''s the weather like in Boston today?"
}
],
"tools": [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
}
],
"tool_choice": "auto"
}'
```
</TabItem>
<TabItem value="sdk" label="SDK">
```python
from openai import OpenAI
client = OpenAI(
api_key="sk-1234", # [OPTIONAL] set if you set one on proxy, else set ""
base_url="http://0.0.0.0:4000",
)
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
}
]
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}]
completion = client.chat.completions.create(
model="gpt-4o", # use 'model_name' from config.yaml
messages=messages,
tools=tools,
tool_choice="auto"
)
print(completion)
```
</TabItem>
</Tabs>
## `/embeddings` ## `/embeddings`
### Request Format ### Request Format

View file

@ -248,9 +248,15 @@ class RedisCache(BaseCache):
# asyncio.get_running_loop().create_task(self.ping()) # asyncio.get_running_loop().create_task(self.ping())
result = asyncio.get_running_loop().create_task(self.ping()) result = asyncio.get_running_loop().create_task(self.ping())
except Exception as e: except Exception as e:
verbose_logger.error( if "no running event loop" in str(e):
"Error connecting to Async Redis client", extra={"error": str(e)} verbose_logger.debug(
) "Ignoring async redis ping. No running event loop."
)
else:
verbose_logger.error(
"Error connecting to Async Redis client - {}".format(str(e)),
extra={"error": str(e)},
)
### SYNC HEALTH PING ### ### SYNC HEALTH PING ###
try: try:

View file

@ -4,6 +4,8 @@ import time
import traceback import traceback
from typing import List, Literal, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
from pydantic import BaseModel
import litellm import litellm
import litellm._logging import litellm._logging
from litellm import verbose_logger from litellm import verbose_logger
@ -13,6 +15,10 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
from litellm.litellm_core_utils.llm_cost_calc.google import ( from litellm.litellm_core_utils.llm_cost_calc.google import (
cost_per_token as google_cost_per_token, cost_per_token as google_cost_per_token,
) )
from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
from litellm.utils import ( from litellm.utils import (
CallTypes, CallTypes,
CostPerToken, CostPerToken,
@ -62,6 +68,23 @@ def cost_per_token(
### CUSTOM PRICING ### ### CUSTOM PRICING ###
custom_cost_per_token: Optional[CostPerToken] = None, custom_cost_per_token: Optional[CostPerToken] = None,
custom_cost_per_second: Optional[float] = None, custom_cost_per_second: Optional[float] = None,
### CALL TYPE ###
call_type: Literal[
"embedding",
"aembedding",
"completion",
"acompletion",
"atext_completion",
"text_completion",
"image_generation",
"aimage_generation",
"moderation",
"amoderation",
"atranscription",
"transcription",
"aspeech",
"speech",
] = "completion",
) -> Tuple[float, float]: ) -> Tuple[float, float]:
""" """
Calculates the cost per token for a given model, prompt tokens, and completion tokens. Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -76,6 +99,7 @@ def cost_per_token(
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list) custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call. custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
custom_cost_per_second: Optional[float]: the cost per second for the llm api call. custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
call_type: Optional[str]: the call type
Returns: Returns:
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively. tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
@ -159,6 +183,27 @@ def cost_per_token(
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
) )
elif call_type == "speech" or call_type == "aspeech":
prompt_cost, completion_cost = _generic_cost_per_character(
model=model_without_prefix,
custom_llm_provider=custom_llm_provider,
prompt_characters=prompt_characters,
completion_characters=completion_characters,
custom_prompt_cost=None,
custom_completion_cost=0,
)
if prompt_cost is None or completion_cost is None:
raise ValueError(
"cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
prompt_cost,
completion_cost,
model_without_prefix,
custom_llm_provider,
prompt_characters,
completion_characters,
)
)
return prompt_cost, completion_cost
elif model in model_cost_ref: elif model in model_cost_ref:
print_verbose(f"Success: model={model} in model_cost_map") print_verbose(f"Success: model={model} in model_cost_map")
print_verbose( print_verbose(
@ -289,7 +334,7 @@ def cost_per_token(
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
else: else:
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n" error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
raise litellm.exceptions.NotFoundError( # type: ignore raise litellm.exceptions.NotFoundError( # type: ignore
message=error_str, message=error_str,
model=model, model=model,
@ -429,7 +474,10 @@ def completion_cost(
prompt_characters = 0 prompt_characters = 0
completion_tokens = 0 completion_tokens = 0
completion_characters = 0 completion_characters = 0
if completion_response is not None: if completion_response is not None and (
isinstance(completion_response, BaseModel)
or isinstance(completion_response, dict)
): # tts returns a custom class
# get input/output tokens from completion_response # get input/output tokens from completion_response
prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0) prompt_tokens = completion_response.get("usage", {}).get("prompt_tokens", 0)
completion_tokens = completion_response.get("usage", {}).get( completion_tokens = completion_response.get("usage", {}).get(
@ -535,6 +583,11 @@ def completion_cost(
raise Exception( raise Exception(
f"Model={image_gen_model_name} not found in completion cost model map" f"Model={image_gen_model_name} not found in completion cost model map"
) )
elif (
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
):
prompt_characters = litellm.utils._count_characters(text=prompt)
# Calculate cost based on prompt_tokens, completion_tokens # Calculate cost based on prompt_tokens, completion_tokens
if ( if (
"togethercomputer" in model "togethercomputer" in model
@ -591,6 +644,7 @@ def completion_cost(
custom_cost_per_token=custom_cost_per_token, custom_cost_per_token=custom_cost_per_token,
prompt_characters=prompt_characters, prompt_characters=prompt_characters,
completion_characters=completion_characters, completion_characters=completion_characters,
call_type=call_type,
) )
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar _final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
print_verbose( print_verbose(
@ -608,6 +662,7 @@ def response_cost_calculator(
ImageResponse, ImageResponse,
TranscriptionResponse, TranscriptionResponse,
TextCompletionResponse, TextCompletionResponse,
HttpxBinaryResponseContent,
], ],
model: str, model: str,
custom_llm_provider: Optional[str], custom_llm_provider: Optional[str],
@ -641,7 +696,8 @@ def response_cost_calculator(
if cache_hit is not None and cache_hit is True: if cache_hit is not None and cache_hit is True:
response_cost = 0.0 response_cost = 0.0
else: else:
response_object._hidden_params["optional_params"] = optional_params if isinstance(response_object, BaseModel):
response_object._hidden_params["optional_params"] = optional_params
if isinstance(response_object, ImageResponse): if isinstance(response_object, ImageResponse):
response_cost = completion_cost( response_cost = completion_cost(
completion_response=response_object, completion_response=response_object,
@ -651,12 +707,11 @@ def response_cost_calculator(
) )
else: else:
if ( if (
model in litellm.model_cost model in litellm.model_cost or custom_pricing is True
and custom_pricing is not None
and custom_llm_provider is True
): # override defaults if custom pricing is set ): # override defaults if custom pricing is set
base_model = model base_model = model
# base_model defaults to None if not set on model_info # base_model defaults to None if not set on model_info
response_cost = completion_cost( response_cost = completion_cost(
completion_response=response_object, completion_response=response_object,
call_type=call_type, call_type=call_type,

View file

@ -32,6 +32,12 @@ class LangFuseLogger:
self.langfuse_host = langfuse_host or os.getenv( self.langfuse_host = langfuse_host or os.getenv(
"LANGFUSE_HOST", "https://cloud.langfuse.com" "LANGFUSE_HOST", "https://cloud.langfuse.com"
) )
if not (
self.langfuse_host.startswith("http://")
or self.langfuse_host.startswith("https://")
):
# add http:// if unset, assume communicating over private network - e.g. render
self.langfuse_host = "http://" + self.langfuse_host
self.langfuse_release = os.getenv("LANGFUSE_RELEASE") self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
self.langfuse_debug = os.getenv("LANGFUSE_DEBUG") self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")

View file

@ -29,6 +29,7 @@ else:
LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm") LITELLM_TRACER_NAME = os.getenv("OTEL_TRACER_NAME", "litellm")
LITELLM_RESOURCE = { LITELLM_RESOURCE = {
"service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"), "service.name": os.getenv("OTEL_SERVICE_NAME", "litellm"),
"deployment.environment": os.getenv("OTEL_ENVIRONMENT_NAME", "production"),
} }
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request" RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
LITELLM_REQUEST_SPAN_NAME = "litellm_request" LITELLM_REQUEST_SPAN_NAME = "litellm_request"

View file

@ -24,6 +24,8 @@ from litellm.integrations.custom_logger import CustomLogger
from litellm.litellm_core_utils.redact_messages import ( from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_logging, redact_message_input_output_from_logging,
) )
from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
from litellm.types.utils import ( from litellm.types.utils import (
CallTypes, CallTypes,
EmbeddingResponse, EmbeddingResponse,
@ -517,33 +519,36 @@ class Logging:
self.model_call_details["cache_hit"] = cache_hit self.model_call_details["cache_hit"] = cache_hit
## if model in model cost map - log the response cost ## if model in model cost map - log the response cost
## else set cost to None ## else set cost to None
verbose_logger.debug(f"Model={self.model};")
if ( if (
result is not None result is not None and self.stream is not True
and ( ): # handle streaming separately
if (
isinstance(result, ModelResponse) isinstance(result, ModelResponse)
or isinstance(result, EmbeddingResponse) or isinstance(result, EmbeddingResponse)
or isinstance(result, ImageResponse) or isinstance(result, ImageResponse)
or isinstance(result, TranscriptionResponse) or isinstance(result, TranscriptionResponse)
or isinstance(result, TextCompletionResponse) or isinstance(result, TextCompletionResponse)
) or isinstance(result, HttpxBinaryResponseContent) # tts
and self.stream != True ):
): # handle streaming separately custom_pricing = use_custom_pricing_for_model(
self.model_call_details["response_cost"] = ( litellm_params=self.litellm_params
litellm.response_cost_calculator( )
response_object=result, self.model_call_details["response_cost"] = (
model=self.model, litellm.response_cost_calculator(
cache_hit=self.model_call_details.get("cache_hit", False), response_object=result,
custom_llm_provider=self.model_call_details.get( model=self.model,
"custom_llm_provider", None cache_hit=self.model_call_details.get("cache_hit", False),
), custom_llm_provider=self.model_call_details.get(
base_model=_get_base_model_from_metadata( "custom_llm_provider", None
model_call_details=self.model_call_details ),
), base_model=_get_base_model_from_metadata(
call_type=self.call_type, model_call_details=self.model_call_details
optional_params=self.optional_params, ),
call_type=self.call_type,
optional_params=self.optional_params,
custom_pricing=custom_pricing,
)
) )
)
else: # streaming chunks + image gen. else: # streaming chunks + image gen.
self.model_call_details["response_cost"] = None self.model_call_details["response_cost"] = None
@ -600,8 +605,7 @@ class Logging:
verbose_logger.error( verbose_logger.error(
"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format( "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while building complete streaming response in success logging {}\n{}".format(
str(e), traceback.format_exc() str(e), traceback.format_exc()
), )
log_level="ERROR",
) )
complete_streaming_response = None complete_streaming_response = None
else: else:
@ -626,7 +630,11 @@ class Logging:
model_call_details=self.model_call_details model_call_details=self.model_call_details
), ),
call_type=self.call_type, call_type=self.call_type,
optional_params=self.optional_params, optional_params=(
self.optional_params
if hasattr(self, "optional_params")
else {}
),
) )
) )
if self.dynamic_success_callbacks is not None and isinstance( if self.dynamic_success_callbacks is not None and isinstance(
@ -1795,7 +1803,6 @@ def set_callbacks(callback_list, function_id=None):
try: try:
for callback in callback_list: for callback in callback_list:
print_verbose(f"init callback list: {callback}")
if callback == "sentry": if callback == "sentry":
try: try:
import sentry_sdk import sentry_sdk
@ -2013,3 +2020,17 @@ def get_custom_logger_compatible_class(
if isinstance(callback, _PROXY_DynamicRateLimitHandler): if isinstance(callback, _PROXY_DynamicRateLimitHandler):
return callback # type: ignore return callback # type: ignore
return None return None
def use_custom_pricing_for_model(litellm_params: Optional[dict]) -> bool:
if litellm_params is None:
return False
metadata: Optional[dict] = litellm_params.get("metadata", {})
if metadata is None:
return False
model_info: Optional[dict] = metadata.get("model_info", {})
if model_info is not None:
for k, v in model_info.items():
if k in SPECIAL_MODEL_INFO_PARAMS:
return True
return False

View file

@ -0,0 +1,85 @@
# What is this?
## Helper utilities for cost_per_token()
import traceback
from typing import List, Literal, Optional, Tuple
import litellm
from litellm import verbose_logger
def _generic_cost_per_character(
model: str,
custom_llm_provider: str,
prompt_characters: float,
completion_characters: float,
custom_prompt_cost: Optional[float],
custom_completion_cost: Optional[float],
) -> Tuple[Optional[float], Optional[float]]:
"""
Generic function to help calculate cost per character.
"""
"""
Calculates the cost per character for a given model, input messages, and response object.
Input:
- model: str, the model name without provider prefix
- custom_llm_provider: str, "vertex_ai-*"
- prompt_characters: float, the number of input characters
- completion_characters: float, the number of output characters
Returns:
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
- returns None if not able to calculate cost.
Raises:
Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
"""
args = locals()
## GET MODEL INFO
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
## CALCULATE INPUT COST
try:
if custom_prompt_cost is None:
assert (
"input_cost_per_character" in model_info
and model_info["input_cost_per_character"] is not None
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
custom_prompt_cost = model_info["input_cost_per_character"]
prompt_cost = prompt_characters * custom_prompt_cost
except Exception as e:
verbose_logger.error(
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
str(e), traceback.format_exc()
)
)
prompt_cost = None
## CALCULATE OUTPUT COST
try:
if custom_completion_cost is None:
assert (
"output_cost_per_character" in model_info
and model_info["output_cost_per_character"] is not None
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
custom_completion_cost = model_info["output_cost_per_character"]
completion_cost = completion_characters * custom_completion_cost
except Exception as e:
verbose_logger.error(
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
str(e), traceback.format_exc()
)
)
completion_cost = None
return prompt_cost, completion_cost

View file

@ -55,7 +55,6 @@ from ..types.llms.openai import (
Thread, Thread,
) )
from .base import BaseLLM from .base import BaseLLM
from .custom_httpx.azure_dall_e_2 import AsyncCustomHTTPTransport, CustomHTTPTransport
azure_ad_cache = DualCache() azure_ad_cache = DualCache()
@ -1718,9 +1717,7 @@ class AzureChatCompletion(BaseLLM):
input: Optional[list] = None, input: Optional[list] = None,
prompt: Optional[str] = None, prompt: Optional[str] = None,
) -> dict: ) -> dict:
client_session = litellm.client_session or httpx.Client( client_session = litellm.client_session or httpx.Client()
transport=CustomHTTPTransport(), # handle dall-e-2 calls
)
if "gateway.ai.cloudflare.com" in api_base: if "gateway.ai.cloudflare.com" in api_base:
## build base url - assume api base includes resource name ## build base url - assume api base includes resource name
if not api_base.endswith("/"): if not api_base.endswith("/"):
@ -1793,9 +1790,10 @@ class AzureChatCompletion(BaseLLM):
input: Optional[list] = None, input: Optional[list] = None,
prompt: Optional[str] = None, prompt: Optional[str] = None,
) -> dict: ) -> dict:
client_session = litellm.aclient_session or httpx.AsyncClient( client_session = (
transport=AsyncCustomHTTPTransport(), # handle dall-e-2 calls litellm.aclient_session or httpx.AsyncClient()
) ) # handle dall-e-2 calls
if "gateway.ai.cloudflare.com" in api_base: if "gateway.ai.cloudflare.com" in api_base:
## build base url - assume api base includes resource name ## build base url - assume api base includes resource name
if not api_base.endswith("/"): if not api_base.endswith("/"):

View file

@ -1,24 +1,27 @@
from typing import Optional, Union, Any import json
import types, requests # type: ignore import types # type: ignore
from .base import BaseLLM
from litellm.utils import (
ModelResponse,
Choices,
Message,
CustomStreamWrapper,
convert_to_model_response_object,
TranscriptionResponse,
TextCompletionResponse,
)
from typing import Callable, Optional, BinaryIO
from litellm import OpenAIConfig
import litellm, json
import httpx
from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
from openai import AzureOpenAI, AsyncAzureOpenAI
from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
import uuid import uuid
from .prompt_templates.factory import prompt_factory, custom_prompt from typing import Any, BinaryIO, Callable, Optional, Union
import httpx
import requests
from openai import AsyncAzureOpenAI, AzureOpenAI
import litellm
from litellm import OpenAIConfig
from litellm.utils import (
Choices,
CustomStreamWrapper,
Message,
ModelResponse,
TextCompletionResponse,
TranscriptionResponse,
convert_to_model_response_object,
)
from ..llms.openai import OpenAITextCompletion, OpenAITextCompletionConfig
from .base import BaseLLM
from .prompt_templates.factory import custom_prompt, prompt_factory
openai_text_completion_config = OpenAITextCompletionConfig() openai_text_completion_config = OpenAITextCompletionConfig()

View file

@ -1,143 +0,0 @@
import asyncio
import json
import time
import httpx
class AsyncCustomHTTPTransport(httpx.AsyncHTTPTransport):
"""
Async implementation of custom http transport
"""
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
_api_version = request.url.params.get("api-version", "")
if (
"images/generations" in request.url.path
and _api_version
in [ # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
"2023-06-01-preview",
"2023-07-01-preview",
"2023-08-01-preview",
"2023-09-01-preview",
"2023-10-01-preview",
]
):
request.url = request.url.copy_with(
path="/openai/images/generations:submit"
)
response = await super().handle_async_request(request)
operation_location_url = response.headers["operation-location"]
request.url = httpx.URL(operation_location_url)
request.method = "GET"
response = await super().handle_async_request(request)
await response.aread()
timeout_secs: int = 120
start_time = time.time()
while response.json()["status"] not in ["succeeded", "failed"]:
if time.time() - start_time > timeout_secs:
timeout = {
"error": {
"code": "Timeout",
"message": "Operation polling timed out.",
}
}
return httpx.Response(
status_code=400,
headers=response.headers,
content=json.dumps(timeout).encode("utf-8"),
request=request,
)
await asyncio.sleep(int(response.headers.get("retry-after") or 10))
response = await super().handle_async_request(request)
await response.aread()
if response.json()["status"] == "failed":
error_data = response.json()
return httpx.Response(
status_code=400,
headers=response.headers,
content=json.dumps(error_data).encode("utf-8"),
request=request,
)
result = response.json()["result"]
return httpx.Response(
status_code=200,
headers=response.headers,
content=json.dumps(result).encode("utf-8"),
request=request,
)
return await super().handle_async_request(request)
class CustomHTTPTransport(httpx.HTTPTransport):
"""
This class was written as a workaround to support dall-e-2 on openai > v1.x
Refer to this issue for more: https://github.com/openai/openai-python/issues/692
"""
def handle_request(
self,
request: httpx.Request,
) -> httpx.Response:
_api_version = request.url.params.get("api-version", "")
if (
"images/generations" in request.url.path
and _api_version
in [ # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
"2023-06-01-preview",
"2023-07-01-preview",
"2023-08-01-preview",
"2023-09-01-preview",
"2023-10-01-preview",
]
):
request.url = request.url.copy_with(
path="/openai/images/generations:submit"
)
response = super().handle_request(request)
operation_location_url = response.headers["operation-location"]
request.url = httpx.URL(operation_location_url)
request.method = "GET"
response = super().handle_request(request)
response.read()
timeout_secs: int = 120
start_time = time.time()
while response.json()["status"] not in ["succeeded", "failed"]:
if time.time() - start_time > timeout_secs:
timeout = {
"error": {
"code": "Timeout",
"message": "Operation polling timed out.",
}
}
return httpx.Response(
status_code=400,
headers=response.headers,
content=json.dumps(timeout).encode("utf-8"),
request=request,
)
time.sleep(int(response.headers.get("retry-after", None) or 10))
response = super().handle_request(request)
response.read()
if response.json()["status"] == "failed":
error_data = response.json()
return httpx.Response(
status_code=400,
headers=response.headers,
content=json.dumps(error_data).encode("utf-8"),
request=request,
)
result = response.json()["result"]
return httpx.Response(
status_code=200,
headers=response.headers,
content=json.dumps(result).encode("utf-8"),
request=request,
)
return super().handle_request(request)

View file

@ -26,30 +26,12 @@ class AsyncHTTPHandler:
self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int self, timeout: Optional[Union[float, httpx.Timeout]], concurrent_limit: int
) -> httpx.AsyncClient: ) -> httpx.AsyncClient:
async_proxy_mounts = None
# Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly. # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
http_proxy = os.getenv("HTTP_PROXY", None)
https_proxy = os.getenv("HTTPS_PROXY", None)
no_proxy = os.getenv("NO_PROXY", None)
ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify)) ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify))
cert = os.getenv( cert = os.getenv(
"SSL_CERTIFICATE", litellm.ssl_certificate "SSL_CERTIFICATE", litellm.ssl_certificate
) # /path/to/client.pem ) # /path/to/client.pem
if http_proxy is not None and https_proxy is not None:
async_proxy_mounts = {
"http://": httpx.AsyncHTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
"https://": httpx.AsyncHTTPTransport(
proxy=httpx.Proxy(url=https_proxy)
),
}
# assume no_proxy is a list of comma separated urls
if no_proxy is not None and isinstance(no_proxy, str):
no_proxy_urls = no_proxy.split(",")
for url in no_proxy_urls: # set no-proxy support for specific urls
async_proxy_mounts[url] = None # type: ignore
if timeout is None: if timeout is None:
timeout = _DEFAULT_TIMEOUT timeout = _DEFAULT_TIMEOUT
# Create a client with a connection pool # Create a client with a connection pool
@ -61,7 +43,6 @@ class AsyncHTTPHandler:
max_keepalive_connections=concurrent_limit, max_keepalive_connections=concurrent_limit,
), ),
verify=ssl_verify, verify=ssl_verify,
mounts=async_proxy_mounts,
cert=cert, cert=cert,
) )
@ -163,27 +144,11 @@ class HTTPHandler:
timeout = _DEFAULT_TIMEOUT timeout = _DEFAULT_TIMEOUT
# Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly. # Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
http_proxy = os.getenv("HTTP_PROXY", None)
https_proxy = os.getenv("HTTPS_PROXY", None)
no_proxy = os.getenv("NO_PROXY", None)
ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify)) ssl_verify = bool(os.getenv("SSL_VERIFY", litellm.ssl_verify))
cert = os.getenv( cert = os.getenv(
"SSL_CERTIFICATE", litellm.ssl_certificate "SSL_CERTIFICATE", litellm.ssl_certificate
) # /path/to/client.pem ) # /path/to/client.pem
sync_proxy_mounts = None
if http_proxy is not None and https_proxy is not None:
sync_proxy_mounts = {
"http://": httpx.HTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
"https://": httpx.HTTPTransport(proxy=httpx.Proxy(url=https_proxy)),
}
# assume no_proxy is a list of comma separated urls
if no_proxy is not None and isinstance(no_proxy, str):
no_proxy_urls = no_proxy.split(",")
for url in no_proxy_urls: # set no-proxy support for specific urls
sync_proxy_mounts[url] = None # type: ignore
if client is None: if client is None:
# Create a client with a connection pool # Create a client with a connection pool
self.client = httpx.Client( self.client = httpx.Client(
@ -193,7 +158,6 @@ class HTTPHandler:
max_keepalive_connections=concurrent_limit, max_keepalive_connections=concurrent_limit,
), ),
verify=ssl_verify, verify=ssl_verify,
mounts=sync_proxy_mounts,
cert=cert, cert=cert,
) )
else: else:

View file

@ -1330,17 +1330,30 @@ class ModelResponseIterator:
gemini_chunk = processed_chunk["candidates"][0] gemini_chunk = processed_chunk["candidates"][0]
if ( if "content" in gemini_chunk:
"content" in gemini_chunk if "text" in gemini_chunk["content"]["parts"][0]:
and "text" in gemini_chunk["content"]["parts"][0] text = gemini_chunk["content"]["parts"][0]["text"]
): elif "functionCall" in gemini_chunk["content"]["parts"][0]:
text = gemini_chunk["content"]["parts"][0]["text"] function_call = ChatCompletionToolCallFunctionChunk(
name=gemini_chunk["content"]["parts"][0]["functionCall"][
"name"
],
arguments=json.dumps(
gemini_chunk["content"]["parts"][0]["functionCall"]["args"]
),
)
tool_use = ChatCompletionToolCallChunk(
id=str(uuid.uuid4()),
type="function",
function=function_call,
index=0,
)
if "finishReason" in gemini_chunk: if "finishReason" in gemini_chunk:
finish_reason = map_finish_reason( finish_reason = map_finish_reason(
finish_reason=gemini_chunk["finishReason"] finish_reason=gemini_chunk["finishReason"]
) )
## DO NOT SET 'finish_reason' = True ## DO NOT SET 'is_finished' = True
## GEMINI SETS FINISHREASON ON EVERY CHUNK! ## GEMINI SETS FINISHREASON ON EVERY CHUNK!
if "usageMetadata" in processed_chunk: if "usageMetadata" in processed_chunk:

View file

@ -896,7 +896,7 @@ def completion(
if ( if (
supports_system_message is not None supports_system_message is not None
and isinstance(supports_system_message, bool) and isinstance(supports_system_message, bool)
and supports_system_message == False and supports_system_message is False
): ):
messages = map_system_message_pt(messages=messages) messages = map_system_message_pt(messages=messages)
model_api_key = get_api_key( model_api_key = get_api_key(
@ -5028,10 +5028,9 @@ def stream_chunk_builder(
for chunk in chunks: for chunk in chunks:
if "usage" in chunk: if "usage" in chunk:
if "prompt_tokens" in chunk["usage"]: if "prompt_tokens" in chunk["usage"]:
prompt_tokens += chunk["usage"].get("prompt_tokens", 0) or 0 prompt_tokens = chunk["usage"].get("prompt_tokens", 0) or 0
if "completion_tokens" in chunk["usage"]: if "completion_tokens" in chunk["usage"]:
completion_tokens += chunk["usage"].get("completion_tokens", 0) or 0 completion_tokens = chunk["usage"].get("completion_tokens", 0) or 0
try: try:
response["usage"]["prompt_tokens"] = prompt_tokens or token_counter( response["usage"]["prompt_tokens"] = prompt_tokens or token_counter(
model=model, messages=messages model=model, messages=messages

View file

@ -2022,10 +2022,10 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 2097152, "max_input_tokens": 2097152,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.00000035, "input_cost_per_token": 0.0000035,
"input_cost_per_token_above_128k_tokens": 0.0000007, "input_cost_per_token_above_128k_tokens": 0.000007,
"output_cost_per_token": 0.00000105, "output_cost_per_token": 0.0000105,
"output_cost_per_token_above_128k_tokens": 0.0000021, "output_cost_per_token_above_128k_tokens": 0.000021,
"litellm_provider": "gemini", "litellm_provider": "gemini",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2033,16 +2033,16 @@
"supports_vision": true, "supports_vision": true,
"supports_tool_choice": true, "supports_tool_choice": true,
"supports_response_schema": true, "supports_response_schema": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" "source": "https://ai.google.dev/pricing"
}, },
"gemini/gemini-1.5-pro-latest": { "gemini/gemini-1.5-pro-latest": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1048576, "max_input_tokens": 1048576,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.00000035, "input_cost_per_token": 0.0000035,
"input_cost_per_token_above_128k_tokens": 0.0000007, "input_cost_per_token_above_128k_tokens": 0.000007,
"output_cost_per_token": 0.00000105, "output_cost_per_token": 0.00000105,
"output_cost_per_token_above_128k_tokens": 0.0000021, "output_cost_per_token_above_128k_tokens": 0.000021,
"litellm_provider": "gemini", "litellm_provider": "gemini",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2050,7 +2050,7 @@
"supports_vision": true, "supports_vision": true,
"supports_tool_choice": true, "supports_tool_choice": true,
"supports_response_schema": true, "supports_response_schema": true,
"source": "https://ai.google.dev/models/gemini" "source": "https://ai.google.dev/pricing"
}, },
"gemini/gemini-pro-vision": { "gemini/gemini-pro-vision": {
"max_tokens": 2048, "max_tokens": 2048,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-da7d95729f2529b5.js"],""] 3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-906d7dd6a5bf7be4.js","931","static/chunks/app/page-567f85145e7f0f35.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""] 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-906d7dd6a5bf7be4.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""] 3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-906d7dd6a5bf7be4.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -1,12 +1,10 @@
model_list: model_list:
- model_name: "*" - model_name: tts
litellm_params: litellm_params:
model: "openai/*" model: "openai/*"
mock_response: "Hello world!" - model_name: gemini-1.5-flash
litellm_params:
litellm_settings: model: gemini/gemini-1.5-flash
success_callback: ["langfuse"]
failure_callback: ["langfuse"]
general_settings: general_settings:
alerting: ["slack"] alerting: ["slack"]

View file

@ -0,0 +1,167 @@
import os
def show_missing_vars_in_env():
from fastapi.responses import HTMLResponse
from litellm.proxy.proxy_server import master_key, prisma_client
if prisma_client is None and master_key is None:
return HTMLResponse(
content=missing_keys_form(
missing_key_names="DATABASE_URL, LITELLM_MASTER_KEY"
),
status_code=200,
)
if prisma_client is None:
return HTMLResponse(
content=missing_keys_form(missing_key_names="DATABASE_URL"), status_code=200
)
if master_key is None:
return HTMLResponse(
content=missing_keys_form(missing_key_names="LITELLM_MASTER_KEY"),
status_code=200,
)
return None
# LiteLLM Admin UI - Non SSO Login
url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
url_to_redirect_to += "/login"
html_form = f"""
<!DOCTYPE html>
<html>
<head>
<title>LiteLLM Login</title>
<style>
body {{
font-family: Arial, sans-serif;
background-color: #f4f4f4;
margin: 0;
padding: 0;
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
}}
form {{
background-color: #fff;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}}
label {{
display: block;
margin-bottom: 8px;
}}
input {{
width: 100%;
padding: 8px;
margin-bottom: 16px;
box-sizing: border-box;
border: 1px solid #ccc;
border-radius: 4px;
}}
input[type="submit"] {{
background-color: #4caf50;
color: #fff;
cursor: pointer;
}}
input[type="submit"]:hover {{
background-color: #45a049;
}}
</style>
</head>
<body>
<form action="{url_to_redirect_to}" method="post">
<h2>LiteLLM Login</h2>
<p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
<p>If you need to set UI credentials / SSO docs here: <a href="https://docs.litellm.ai/docs/proxy/ui" target="_blank">https://docs.litellm.ai/docs/proxy/ui</a></p>
<br>
<label for="username">Username:</label>
<input type="text" id="username" name="username" required>
<label for="password">Password:</label>
<input type="password" id="password" name="password" required>
<input type="submit" value="Submit">
</form>
"""
def missing_keys_form(missing_key_names: str):
missing_keys_html_form = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
body {{
font-family: Arial, sans-serif;
background-color: #f4f4f9;
color: #333;
margin: 20px;
line-height: 1.6;
}}
.container {{
max-width: 800px;
margin: auto;
padding: 20px;
background: #fff;
border: 1px solid #ddd;
border-radius: 5px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}}
h1 {{
font-size: 24px;
margin-bottom: 20px;
}}
pre {{
background: #f8f8f8;
padding: 1px;
border: 1px solid #ccc;
border-radius: 4px;
overflow-x: auto;
font-size: 14px;
}}
.env-var {{
font-weight: normal;
}}
.comment {{
font-weight: normal;
color: #777;
}}
</style>
<title>Environment Setup Instructions</title>
</head>
<body>
<div class="container">
<h1>Environment Setup Instructions</h1>
<p>Please add the following variables to your environment variables:</p>
<pre>
<span class="env-var">LITELLM_MASTER_KEY="sk-1234"</span> <span class="comment"># Your master key for the proxy server. Can use this to send /chat/completion requests etc</span>
<span class="env-var">LITELLM_SALT_KEY="sk-XXXXXXXX"</span> <span class="comment"># Can NOT CHANGE THIS ONCE SET - It is used to encrypt/decrypt credentials stored in DB. If value of 'LITELLM_SALT_KEY' changes your models cannot be retrieved from DB</span>
<span class="env-var">DATABASE_URL="postgres://..."</span> <span class="comment"># Need a postgres database? (Check out Supabase, Neon, etc)</span>
<span class="comment">## OPTIONAL ##</span>
<span class="env-var">PORT=4000</span> <span class="comment"># DO THIS FOR RENDER/RAILWAY</span>
<span class="env-var">STORE_MODEL_IN_DB="True"</span> <span class="comment"># Allow storing models in db</span>
</pre>
<h1>Missing Environment Variables</h1>
<p>{missing_keys}</p>
</div>
<div class="container">
<h1>Need Help? Support</h1>
<p>Discord: <a href="https://discord.com/invite/wuPM9dRgDw" target="_blank">https://discord.com/invite/wuPM9dRgDw</a></p>
<p>Docs: <a href="https://docs.litellm.ai/docs/" target="_blank">https://docs.litellm.ai/docs/</a></p>
</div>
</body>
</html>
"""
return missing_keys_html_form.format(missing_keys=missing_key_names)

View file

@ -0,0 +1,89 @@
import base64
import os
from litellm._logging import verbose_proxy_logger
LITELLM_SALT_KEY = os.getenv("LITELLM_SALT_KEY", None)
verbose_proxy_logger.debug(
"LITELLM_SALT_KEY is None using master_key to encrypt/decrypt secrets stored in DB"
)
def encrypt_value_helper(value: str):
from litellm.proxy.proxy_server import master_key
signing_key = LITELLM_SALT_KEY
if LITELLM_SALT_KEY is None:
signing_key = master_key
try:
if isinstance(value, str):
encrypted_value = encrypt_value(value=value, signing_key=signing_key) # type: ignore
encrypted_value = base64.b64encode(encrypted_value).decode("utf-8")
return encrypted_value
raise ValueError(
f"Invalid value type passed to encrypt_value: {type(value)} for Value: {value}\n Value must be a string"
)
except Exception as e:
raise e
def decrypt_value_helper(value: str):
from litellm.proxy.proxy_server import master_key
signing_key = LITELLM_SALT_KEY
if LITELLM_SALT_KEY is None:
signing_key = master_key
try:
if isinstance(value, str):
decoded_b64 = base64.b64decode(value)
value = decrypt_value(value=decoded_b64, signing_key=signing_key) # type: ignore
return value
except Exception as e:
verbose_proxy_logger.error(f"Error decrypting value: {value}\nError: {str(e)}")
# [Non-Blocking Exception. - this should not block decrypting other values]
pass
def encrypt_value(value: str, signing_key: str):
import hashlib
import nacl.secret
import nacl.utils
# get 32 byte master key #
hash_object = hashlib.sha256(signing_key.encode())
hash_bytes = hash_object.digest()
# initialize secret box #
box = nacl.secret.SecretBox(hash_bytes)
# encode message #
value_bytes = value.encode("utf-8")
encrypted = box.encrypt(value_bytes)
return encrypted
def decrypt_value(value: bytes, signing_key: str) -> str:
import hashlib
import nacl.secret
import nacl.utils
# get 32 byte master key #
hash_object = hashlib.sha256(signing_key.encode())
hash_bytes = hash_object.digest()
# initialize secret box #
box = nacl.secret.SecretBox(hash_bytes)
# Convert the bytes object to a string
plaintext = box.decrypt(value)
plaintext = plaintext.decode("utf-8") # type: ignore
return plaintext # type: ignore

View file

@ -31,10 +31,12 @@ def initialize_callbacks_on_proxy(
imported_list.append(callback) imported_list.append(callback)
elif isinstance(callback, str) and callback == "otel": elif isinstance(callback, str) and callback == "otel":
from litellm.integrations.opentelemetry import OpenTelemetry from litellm.integrations.opentelemetry import OpenTelemetry
from litellm.proxy import proxy_server
open_telemetry_logger = OpenTelemetry() open_telemetry_logger = OpenTelemetry()
imported_list.append(open_telemetry_logger) imported_list.append(open_telemetry_logger)
setattr(proxy_server, "open_telemetry_logger", open_telemetry_logger)
elif isinstance(callback, str) and callback == "presidio": elif isinstance(callback, str) and callback == "presidio":
from litellm.proxy.hooks.presidio_pii_masking import ( from litellm.proxy.hooks.presidio_pii_masking import (
_OPTIONAL_PresidioPIIMasking, _OPTIONAL_PresidioPIIMasking,

View file

@ -8,21 +8,26 @@
# Tell us how we can improve! - Krrish & Ishaan # Tell us how we can improve! - Krrish & Ishaan
import asyncio
import json
import traceback
import uuid
from typing import Optional, Union from typing import Optional, Union
import litellm, traceback, uuid, json # noqa: E401
from litellm.caching import DualCache import aiohttp
from litellm.proxy._types import UserAPIKeyAuth
from litellm.integrations.custom_logger import CustomLogger
from fastapi import HTTPException from fastapi import HTTPException
import litellm # noqa: E401
from litellm._logging import verbose_proxy_logger from litellm._logging import verbose_proxy_logger
from litellm.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.utils import ( from litellm.utils import (
ModelResponse,
EmbeddingResponse, EmbeddingResponse,
ImageResponse, ImageResponse,
ModelResponse,
StreamingChoices, StreamingChoices,
) )
import aiohttp
import asyncio
class _OPTIONAL_PresidioPIIMasking(CustomLogger): class _OPTIONAL_PresidioPIIMasking(CustomLogger):
@ -57,22 +62,41 @@ class _OPTIONAL_PresidioPIIMasking(CustomLogger):
f"An error occurred: {str(e)}, file_path={ad_hoc_recognizers}" f"An error occurred: {str(e)}, file_path={ad_hoc_recognizers}"
) )
self.presidio_analyzer_api_base = litellm.get_secret( self.validate_environment()
def validate_environment(self):
self.presidio_analyzer_api_base: Optional[str] = litellm.get_secret(
"PRESIDIO_ANALYZER_API_BASE", None "PRESIDIO_ANALYZER_API_BASE", None
) ) # type: ignore
self.presidio_anonymizer_api_base = litellm.get_secret( self.presidio_anonymizer_api_base: Optional[str] = litellm.get_secret(
"PRESIDIO_ANONYMIZER_API_BASE", None "PRESIDIO_ANONYMIZER_API_BASE", None
) ) # type: ignore
if self.presidio_analyzer_api_base is None: if self.presidio_analyzer_api_base is None:
raise Exception("Missing `PRESIDIO_ANALYZER_API_BASE` from environment") raise Exception("Missing `PRESIDIO_ANALYZER_API_BASE` from environment")
elif not self.presidio_analyzer_api_base.endswith("/"): if not self.presidio_analyzer_api_base.endswith("/"):
self.presidio_analyzer_api_base += "/" self.presidio_analyzer_api_base += "/"
if not (
self.presidio_analyzer_api_base.startswith("http://")
or self.presidio_analyzer_api_base.startswith("https://")
):
# add http:// if unset, assume communicating over private network - e.g. render
self.presidio_analyzer_api_base = (
"http://" + self.presidio_analyzer_api_base
)
if self.presidio_anonymizer_api_base is None: if self.presidio_anonymizer_api_base is None:
raise Exception("Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment") raise Exception("Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment")
elif not self.presidio_anonymizer_api_base.endswith("/"): if not self.presidio_anonymizer_api_base.endswith("/"):
self.presidio_anonymizer_api_base += "/" self.presidio_anonymizer_api_base += "/"
if not (
self.presidio_anonymizer_api_base.startswith("http://")
or self.presidio_anonymizer_api_base.startswith("https://")
):
# add http:// if unset, assume communicating over private network - e.g. render
self.presidio_anonymizer_api_base = (
"http://" + self.presidio_anonymizer_api_base
)
def print_verbose(self, print_statement): def print_verbose(self, print_statement):
try: try:

View file

@ -176,6 +176,7 @@ async def add_litellm_data_to_request(
def _add_otel_traceparent_to_data(data: dict, request: Request): def _add_otel_traceparent_to_data(data: dict, request: Request):
from litellm.proxy.proxy_server import open_telemetry_logger from litellm.proxy.proxy_server import open_telemetry_logger
if data is None: if data is None:
return return
if open_telemetry_logger is None: if open_telemetry_logger is None:

View file

@ -35,6 +35,7 @@ general_settings:
LANGFUSE_SECRET_KEY: "os.environ/LANGFUSE_DEV_SK_KEY" LANGFUSE_SECRET_KEY: "os.environ/LANGFUSE_DEV_SK_KEY"
litellm_settings: litellm_settings:
callbacks: ["otel"]
guardrails: guardrails:
- prompt_injection: - prompt_injection:
callbacks: [lakera_prompt_injection, hide_secrets] callbacks: [lakera_prompt_injection, hide_secrets]

View file

@ -140,7 +140,15 @@ from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
## Import All Misc routes here ## ## Import All Misc routes here ##
from litellm.proxy.caching_routes import router as caching_router from litellm.proxy.caching_routes import router as caching_router
from litellm.proxy.common_utils.admin_ui_utils import (
html_form,
show_missing_vars_in_env,
)
from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router from litellm.proxy.common_utils.debug_utils import router as debugging_endpoints_router
from litellm.proxy.common_utils.encrypt_decrypt_utils import (
decrypt_value_helper,
encrypt_value_helper,
)
from litellm.proxy.common_utils.http_parsing_utils import _read_request_body from litellm.proxy.common_utils.http_parsing_utils import _read_request_body
from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy from litellm.proxy.common_utils.init_callbacks import initialize_callbacks_on_proxy
from litellm.proxy.common_utils.openai_endpoint_utils import ( from litellm.proxy.common_utils.openai_endpoint_utils import (
@ -186,13 +194,9 @@ from litellm.proxy.utils import (
_get_projected_spend_over_limit, _get_projected_spend_over_limit,
_is_projected_spend_over_limit, _is_projected_spend_over_limit,
_is_valid_team_configs, _is_valid_team_configs,
decrypt_value,
encrypt_value,
get_error_message_str, get_error_message_str,
get_instance_fn, get_instance_fn,
hash_token, hash_token,
html_form,
missing_keys_html_form,
reset_budget, reset_budget,
send_email, send_email,
update_spend, update_spend,
@ -207,6 +211,7 @@ from litellm.router import ModelInfo as RouterModelInfo
from litellm.router import updateDeployment from litellm.router import updateDeployment
from litellm.scheduler import DefaultPriorities, FlowItem, Scheduler from litellm.scheduler import DefaultPriorities, FlowItem, Scheduler
from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.router import RouterGeneralSettings
try: try:
from litellm._version import version from litellm._version import version
@ -1242,6 +1247,7 @@ class ProxyConfig:
## DB ## DB
if prisma_client is not None and ( if prisma_client is not None and (
general_settings.get("store_model_in_db", False) == True general_settings.get("store_model_in_db", False) == True
or store_model_in_db is True
): ):
_tasks = [] _tasks = []
keys = [ keys = [
@ -1765,7 +1771,11 @@ class ProxyConfig:
if k in available_args: if k in available_args:
router_params[k] = v router_params[k] = v
router = litellm.Router( router = litellm.Router(
**router_params, assistants_config=assistants_config **router_params,
assistants_config=assistants_config,
router_general_settings=RouterGeneralSettings(
async_only_mode=True # only init async clients
),
) # type:ignore ) # type:ignore
return router, router.get_model_list(), general_settings return router, router.get_model_list(), general_settings
@ -1880,16 +1890,8 @@ class ProxyConfig:
# decrypt values # decrypt values
for k, v in _litellm_params.items(): for k, v in _litellm_params.items():
if isinstance(v, str): if isinstance(v, str):
# decode base64
try:
decoded_b64 = base64.b64decode(v)
except Exception as e:
verbose_proxy_logger.error(
"Error decoding value - {}".format(v)
)
continue
# decrypt value # decrypt value
_value = decrypt_value(value=decoded_b64, master_key=master_key) _value = decrypt_value_helper(value=v)
# sanity check if string > size 0 # sanity check if string > size 0
if len(_value) > 0: if len(_value) > 0:
_litellm_params[k] = _value _litellm_params[k] = _value
@ -1933,13 +1935,8 @@ class ProxyConfig:
if isinstance(_litellm_params, dict): if isinstance(_litellm_params, dict):
# decrypt values # decrypt values
for k, v in _litellm_params.items(): for k, v in _litellm_params.items():
if isinstance(v, str): decrypted_value = decrypt_value_helper(value=v)
# decode base64 _litellm_params[k] = decrypted_value
decoded_b64 = base64.b64decode(v)
# decrypt value
_litellm_params[k] = decrypt_value(
value=decoded_b64, master_key=master_key # type: ignore
)
_litellm_params = LiteLLM_Params(**_litellm_params) _litellm_params = LiteLLM_Params(**_litellm_params)
else: else:
verbose_proxy_logger.error( verbose_proxy_logger.error(
@ -1957,7 +1954,12 @@ class ProxyConfig:
) )
if len(_model_list) > 0: if len(_model_list) > 0:
verbose_proxy_logger.debug(f"_model_list: {_model_list}") verbose_proxy_logger.debug(f"_model_list: {_model_list}")
llm_router = litellm.Router(model_list=_model_list) llm_router = litellm.Router(
model_list=_model_list,
router_general_settings=RouterGeneralSettings(
async_only_mode=True # only init async clients
),
)
verbose_proxy_logger.debug(f"updated llm_router: {llm_router}") verbose_proxy_logger.debug(f"updated llm_router: {llm_router}")
else: else:
verbose_proxy_logger.debug(f"len new_models: {len(new_models)}") verbose_proxy_logger.debug(f"len new_models: {len(new_models)}")
@ -1995,10 +1997,8 @@ class ProxyConfig:
environment_variables = config_data.get("environment_variables", {}) environment_variables = config_data.get("environment_variables", {})
for k, v in environment_variables.items(): for k, v in environment_variables.items():
try: try:
if v is not None: decrypted_value = decrypt_value_helper(value=v)
decoded_b64 = base64.b64decode(v) os.environ[k] = decrypted_value
value = decrypt_value(value=decoded_b64, master_key=master_key) # type: ignore
os.environ[k] = value
except Exception as e: except Exception as e:
verbose_proxy_logger.error( verbose_proxy_logger.error(
"Error setting env variable: %s - %s", k, str(e) "Error setting env variable: %s - %s", k, str(e)
@ -2720,6 +2720,10 @@ async def chat_completion(
except: except:
data = json.loads(body_str) data = json.loads(body_str)
verbose_proxy_logger.debug(
"Request received by LiteLLM:\n{}".format(json.dumps(data, indent=4)),
)
data = await add_litellm_data_to_request( data = await add_litellm_data_to_request(
data=data, data=data,
request=request, request=request,
@ -3372,8 +3376,9 @@ async def embeddings(
) )
verbose_proxy_logger.debug(traceback.format_exc()) verbose_proxy_logger.debug(traceback.format_exc())
if isinstance(e, HTTPException): if isinstance(e, HTTPException):
message = get_error_message_str(e)
raise ProxyException( raise ProxyException(
message=getattr(e, "message", str(e)), message=message,
type=getattr(e, "type", "None"), type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"), param=getattr(e, "param", "None"),
code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST), code=getattr(e, "status_code", status.HTTP_400_BAD_REQUEST),
@ -5930,11 +5935,8 @@ async def add_new_model(
_litellm_params_dict = model_params.litellm_params.dict(exclude_none=True) _litellm_params_dict = model_params.litellm_params.dict(exclude_none=True)
_orignal_litellm_model_name = model_params.litellm_params.model _orignal_litellm_model_name = model_params.litellm_params.model
for k, v in _litellm_params_dict.items(): for k, v in _litellm_params_dict.items():
if isinstance(v, str): encrypted_value = encrypt_value_helper(value=v)
encrypted_value = encrypt_value(value=v, master_key=master_key) # type: ignore model_params.litellm_params[k] = encrypted_value
model_params.litellm_params[k] = base64.b64encode(
encrypted_value
).decode("utf-8")
_data: dict = { _data: dict = {
"model_id": model_params.model_info.id, "model_id": model_params.model_info.id,
"model_name": model_params.model_name, "model_name": model_params.model_name,
@ -6065,11 +6067,8 @@ async def update_model(
### ENCRYPT PARAMS ### ### ENCRYPT PARAMS ###
for k, v in _new_litellm_params_dict.items(): for k, v in _new_litellm_params_dict.items():
if isinstance(v, str): encrypted_value = encrypt_value_helper(value=v)
encrypted_value = encrypt_value(value=v, master_key=master_key) # type: ignore model_params.litellm_params[k] = encrypted_value
model_params.litellm_params[k] = base64.b64encode(
encrypted_value
).decode("utf-8")
### MERGE WITH EXISTING DATA ### ### MERGE WITH EXISTING DATA ###
merged_dictionary = {} merged_dictionary = {}
@ -7187,10 +7186,9 @@ async def google_login(request: Request):
) )
####### Detect DB + MASTER KEY in .env ####### ####### Detect DB + MASTER KEY in .env #######
if prisma_client is None or master_key is None: missing_env_vars = show_missing_vars_in_env()
from fastapi.responses import HTMLResponse if missing_env_vars is not None:
return missing_env_vars
return HTMLResponse(content=missing_keys_html_form, status_code=200)
# get url from request # get url from request
redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url)) redirect_url = os.getenv("PROXY_BASE_URL", str(request.base_url))
@ -8393,11 +8391,8 @@ async def update_config(config_info: ConfigYAML):
# encrypt updated_environment_variables # # encrypt updated_environment_variables #
for k, v in _updated_environment_variables.items(): for k, v in _updated_environment_variables.items():
if isinstance(v, str): encrypted_value = encrypt_value_helper(value=v)
encrypted_value = encrypt_value(value=v, master_key=master_key) # type: ignore _updated_environment_variables[k] = encrypted_value
_updated_environment_variables[k] = base64.b64encode(
encrypted_value
).decode("utf-8")
_existing_env_variables = config["environment_variables"] _existing_env_variables = config["environment_variables"]
@ -8814,11 +8809,8 @@ async def get_config():
env_vars_dict[_var] = None env_vars_dict[_var] = None
else: else:
# decode + decrypt the value # decode + decrypt the value
decoded_b64 = base64.b64decode(env_variable) decrypted_value = decrypt_value_helper(value=env_variable)
_decrypted_value = decrypt_value( env_vars_dict[_var] = decrypted_value
value=decoded_b64, master_key=master_key
)
env_vars_dict[_var] = _decrypted_value
_data_to_return.append({"name": _callback, "variables": env_vars_dict}) _data_to_return.append({"name": _callback, "variables": env_vars_dict})
elif _callback == "langfuse": elif _callback == "langfuse":
@ -8834,11 +8826,8 @@ async def get_config():
_langfuse_env_vars[_var] = None _langfuse_env_vars[_var] = None
else: else:
# decode + decrypt the value # decode + decrypt the value
decoded_b64 = base64.b64decode(env_variable) decrypted_value = decrypt_value_helper(value=env_variable)
_decrypted_value = decrypt_value( _langfuse_env_vars[_var] = decrypted_value
value=decoded_b64, master_key=master_key
)
_langfuse_env_vars[_var] = _decrypted_value
_data_to_return.append( _data_to_return.append(
{"name": _callback, "variables": _langfuse_env_vars} {"name": _callback, "variables": _langfuse_env_vars}
@ -8859,10 +8848,7 @@ async def get_config():
_slack_env_vars[_var] = _value _slack_env_vars[_var] = _value
else: else:
# decode + decrypt the value # decode + decrypt the value
decoded_b64 = base64.b64decode(env_variable) _decrypted_value = decrypt_value_helper(value=env_variable)
_decrypted_value = decrypt_value(
value=decoded_b64, master_key=master_key
)
_slack_env_vars[_var] = _decrypted_value _slack_env_vars[_var] = _decrypted_value
_alerting_types = proxy_logging_obj.slack_alerting_instance.alert_types _alerting_types = proxy_logging_obj.slack_alerting_instance.alert_types
@ -8898,10 +8884,7 @@ async def get_config():
_email_env_vars[_var] = None _email_env_vars[_var] = None
else: else:
# decode + decrypt the value # decode + decrypt the value
decoded_b64 = base64.b64decode(env_variable) _decrypted_value = decrypt_value_helper(value=env_variable)
_decrypted_value = decrypt_value(
value=decoded_b64, master_key=master_key
)
_email_env_vars[_var] = _decrypted_value _email_env_vars[_var] = _decrypted_value
alerting_data.append( alerting_data.append(

View file

@ -821,6 +821,14 @@ async def get_global_spend_report(
default="team", default="team",
description="Group spend by internal team or customer or api_key", description="Group spend by internal team or customer or api_key",
), ),
api_key: Optional[str] = fastapi.Query(
default=None,
description="View spend for a specific api_key. Example api_key='sk-1234",
),
internal_user_id: Optional[str] = fastapi.Query(
default=None,
description="View spend for a specific internal_user_id. Example internal_user_id='1234",
),
): ):
""" """
Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model Get Daily Spend per Team, based on specific startTime and endTime. Per team, view usage by each key, model
@ -873,6 +881,96 @@ async def get_global_spend_report(
raise ValueError( raise ValueError(
"/spend/report endpoint " + CommonProxyErrors.not_premium_user.value "/spend/report endpoint " + CommonProxyErrors.not_premium_user.value
) )
if api_key is not None:
verbose_proxy_logger.debug("Getting /spend for api_key: %s", api_key)
if api_key.startswith("sk-"):
api_key = hash_token(token=api_key)
sql_query = """
WITH SpendByModelApiKey AS (
SELECT
sl.api_key,
sl.model,
SUM(sl.spend) AS model_cost,
SUM(sl.prompt_tokens) AS model_input_tokens,
SUM(sl.completion_tokens) AS model_output_tokens
FROM
"LiteLLM_SpendLogs" sl
WHERE
sl."startTime" BETWEEN $1::date AND $2::date AND sl.api_key = $3
GROUP BY
sl.api_key,
sl.model
)
SELECT
api_key,
SUM(model_cost) AS total_cost,
SUM(model_input_tokens) AS total_input_tokens,
SUM(model_output_tokens) AS total_output_tokens,
jsonb_agg(jsonb_build_object(
'model', model,
'total_cost', model_cost,
'total_input_tokens', model_input_tokens,
'total_output_tokens', model_output_tokens
)) AS model_details
FROM
SpendByModelApiKey
GROUP BY
api_key
ORDER BY
total_cost DESC;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj, api_key
)
if db_response is None:
return []
return db_response
elif internal_user_id is not None:
verbose_proxy_logger.debug(
"Getting /spend for internal_user_id: %s", internal_user_id
)
sql_query = """
WITH SpendByModelApiKey AS (
SELECT
sl.api_key,
sl.model,
SUM(sl.spend) AS model_cost,
SUM(sl.prompt_tokens) AS model_input_tokens,
SUM(sl.completion_tokens) AS model_output_tokens
FROM
"LiteLLM_SpendLogs" sl
WHERE
sl."startTime" BETWEEN $1::date AND $2::date AND sl.user = $3
GROUP BY
sl.api_key,
sl.model
)
SELECT
api_key,
SUM(model_cost) AS total_cost,
SUM(model_input_tokens) AS total_input_tokens,
SUM(model_output_tokens) AS total_output_tokens,
jsonb_agg(jsonb_build_object(
'model', model,
'total_cost', model_cost,
'total_input_tokens', model_input_tokens,
'total_output_tokens', model_output_tokens
)) AS model_details
FROM
SpendByModelApiKey
GROUP BY
api_key
ORDER BY
total_cost DESC;
"""
db_response = await prisma_client.db.query_raw(
sql_query, start_date_obj, end_date_obj, internal_user_id
)
if db_response is None:
return []
return db_response
if group_by == "team": if group_by == "team":
# first get data from spend logs -> SpendByModelApiKey # first get data from spend logs -> SpendByModelApiKey

View file

@ -353,7 +353,7 @@ class ProxyLogging:
raise HTTPException( raise HTTPException(
status_code=400, detail={"error": response} status_code=400, detail={"error": response}
) )
print_verbose(f"final data being sent to {call_type} call: {data}")
return data return data
except Exception as e: except Exception as e:
raise e raise e
@ -2705,178 +2705,6 @@ def _is_valid_team_configs(team_id=None, team_config=None, request_data=None):
return return
def encrypt_value(value: str, master_key: str):
import hashlib
import nacl.secret
import nacl.utils
# get 32 byte master key #
hash_object = hashlib.sha256(master_key.encode())
hash_bytes = hash_object.digest()
# initialize secret box #
box = nacl.secret.SecretBox(hash_bytes)
# encode message #
value_bytes = value.encode("utf-8")
encrypted = box.encrypt(value_bytes)
return encrypted
def decrypt_value(value: bytes, master_key: str) -> str:
import hashlib
import nacl.secret
import nacl.utils
# get 32 byte master key #
hash_object = hashlib.sha256(master_key.encode())
hash_bytes = hash_object.digest()
# initialize secret box #
box = nacl.secret.SecretBox(hash_bytes)
# Convert the bytes object to a string
plaintext = box.decrypt(value)
plaintext = plaintext.decode("utf-8") # type: ignore
return plaintext # type: ignore
# LiteLLM Admin UI - Non SSO Login
url_to_redirect_to = os.getenv("PROXY_BASE_URL", "")
url_to_redirect_to += "/login"
html_form = f"""
<!DOCTYPE html>
<html>
<head>
<title>LiteLLM Login</title>
<style>
body {{
font-family: Arial, sans-serif;
background-color: #f4f4f4;
margin: 0;
padding: 0;
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
}}
form {{
background-color: #fff;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}}
label {{
display: block;
margin-bottom: 8px;
}}
input {{
width: 100%;
padding: 8px;
margin-bottom: 16px;
box-sizing: border-box;
border: 1px solid #ccc;
border-radius: 4px;
}}
input[type="submit"] {{
background-color: #4caf50;
color: #fff;
cursor: pointer;
}}
input[type="submit"]:hover {{
background-color: #45a049;
}}
</style>
</head>
<body>
<form action="{url_to_redirect_to}" method="post">
<h2>LiteLLM Login</h2>
<p>By default Username is "admin" and Password is your set LiteLLM Proxy `MASTER_KEY`</p>
<p>If you need to set UI credentials / SSO docs here: <a href="https://docs.litellm.ai/docs/proxy/ui" target="_blank">https://docs.litellm.ai/docs/proxy/ui</a></p>
<br>
<label for="username">Username:</label>
<input type="text" id="username" name="username" required>
<label for="password">Password:</label>
<input type="password" id="password" name="password" required>
<input type="submit" value="Submit">
</form>
"""
missing_keys_html_form = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style>
body {
font-family: Arial, sans-serif;
background-color: #f4f4f9;
color: #333;
margin: 20px;
line-height: 1.6;
}
.container {
max-width: 600px;
margin: auto;
padding: 20px;
background: #fff;
border: 1px solid #ddd;
border-radius: 5px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}
h1 {
font-size: 24px;
margin-bottom: 20px;
}
pre {
background: #f8f8f8;
padding: 10px;
border: 1px solid #ccc;
border-radius: 4px;
overflow-x: auto;
font-size: 14px;
}
.env-var {
font-weight: normal;
}
.comment {
font-weight: normal;
color: #777;
}
</style>
<title>Environment Setup Instructions</title>
</head>
<body>
<div class="container">
<h1>Environment Setup Instructions</h1>
<p>Please add the following configurations to your environment variables:</p>
<pre>
<span class="env-var">LITELLM_MASTER_KEY="sk-1234"</span> <span class="comment"># make this unique. must start with `sk-`.</span>
<span class="env-var">DATABASE_URL="postgres://..."</span> <span class="comment"># Need a postgres database? (Check out Supabase, Neon, etc)</span>
<span class="comment">## OPTIONAL ##</span>
<span class="env-var">PORT=4000</span> <span class="comment"># DO THIS FOR RENDER/RAILWAY</span>
<span class="env-var">STORE_MODEL_IN_DB="True"</span> <span class="comment"># Allow storing models in db</span>
</pre>
</div>
</body>
</html>
"""
def _to_ns(dt): def _to_ns(dt):
return int(dt.timestamp() * 1e9) return int(dt.timestamp() * 1e9)
@ -2888,6 +2716,11 @@ def get_error_message_str(e: Exception) -> str:
error_message = e.detail error_message = e.detail
elif isinstance(e.detail, dict): elif isinstance(e.detail, dict):
error_message = json.dumps(e.detail) error_message = json.dumps(e.detail)
elif hasattr(e, "message"):
if isinstance(e.message, "str"):
error_message = e.message
elif isinstance(e.message, dict):
error_message = json.dumps(e.message)
else: else:
error_message = str(e) error_message = str(e)
else: else:

View file

@ -46,15 +46,15 @@ from litellm._logging import verbose_router_logger
from litellm.caching import DualCache, InMemoryCache, RedisCache from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.azure import get_azure_ad_token_from_oidc from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.llms.custom_httpx.azure_dall_e_2 import (
AsyncCustomHTTPTransport,
CustomHTTPTransport,
)
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2 from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
from litellm.router_utils.client_initalization_utils import (
set_client,
should_initialize_sync_client,
)
from litellm.router_utils.handle_error import send_llm_exception_alert from litellm.router_utils.handle_error import send_llm_exception_alert
from litellm.scheduler import FlowItem, Scheduler from litellm.scheduler import FlowItem, Scheduler
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
@ -79,6 +79,7 @@ from litellm.types.router import (
ModelInfo, ModelInfo,
RetryPolicy, RetryPolicy,
RouterErrors, RouterErrors,
RouterGeneralSettings,
updateDeployment, updateDeployment,
updateLiteLLMParams, updateLiteLLMParams,
) )
@ -88,6 +89,7 @@ from litellm.utils import (
ModelResponse, ModelResponse,
_is_region_eu, _is_region_eu,
calculate_max_parallel_requests, calculate_max_parallel_requests,
create_proxy_transport_and_mounts,
get_utc_datetime, get_utc_datetime,
) )
@ -169,6 +171,7 @@ class Router:
routing_strategy_args: dict = {}, # just for latency-based routing routing_strategy_args: dict = {}, # just for latency-based routing
semaphore: Optional[asyncio.Semaphore] = None, semaphore: Optional[asyncio.Semaphore] = None,
alerting_config: Optional[AlertingConfig] = None, alerting_config: Optional[AlertingConfig] = None,
router_general_settings: Optional[RouterGeneralSettings] = None,
) -> None: ) -> None:
""" """
Initialize the Router class with the given parameters for caching, reliability, and routing strategy. Initialize the Router class with the given parameters for caching, reliability, and routing strategy.
@ -246,6 +249,9 @@ class Router:
verbose_router_logger.setLevel(logging.INFO) verbose_router_logger.setLevel(logging.INFO)
elif debug_level == "DEBUG": elif debug_level == "DEBUG":
verbose_router_logger.setLevel(logging.DEBUG) verbose_router_logger.setLevel(logging.DEBUG)
self.router_general_settings: Optional[RouterGeneralSettings] = (
router_general_settings
)
self.assistants_config = assistants_config self.assistants_config = assistants_config
self.deployment_names: List = ( self.deployment_names: List = (
@ -3247,520 +3253,6 @@ class Router:
except Exception as e: except Exception as e:
raise e raise e
def set_client(self, model: dict):
"""
- Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
- Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
"""
client_ttl = self.client_ttl
litellm_params = model.get("litellm_params", {})
model_name = litellm_params.get("model")
model_id = model["model_info"]["id"]
# ### IF RPM SET - initialize a semaphore ###
rpm = litellm_params.get("rpm", None)
tpm = litellm_params.get("tpm", None)
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
calculated_max_parallel_requests = calculate_max_parallel_requests(
rpm=rpm,
max_parallel_requests=max_parallel_requests,
tpm=tpm,
default_max_parallel_requests=self.default_max_parallel_requests,
)
if calculated_max_parallel_requests:
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
cache_key = f"{model_id}_max_parallel_requests_client"
self.cache.set_cache(
key=cache_key,
value=semaphore,
local_only=True,
)
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
custom_llm_provider = litellm_params.get("custom_llm_provider")
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
default_api_base = None
default_api_key = None
if custom_llm_provider in litellm.openai_compatible_providers:
_, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
model=model_name
)
default_api_base = api_base
default_api_key = api_key
if (
model_name in litellm.open_ai_chat_completion_models
or custom_llm_provider in litellm.openai_compatible_providers
or custom_llm_provider == "azure"
or custom_llm_provider == "azure_text"
or custom_llm_provider == "custom_openai"
or custom_llm_provider == "openai"
or custom_llm_provider == "text-completion-openai"
or "ft:gpt-3.5-turbo" in model_name
or model_name in litellm.open_ai_embedding_models
):
is_azure_ai_studio_model: bool = False
if custom_llm_provider == "azure":
if litellm.utils._is_non_openai_azure_model(model_name):
is_azure_ai_studio_model = True
custom_llm_provider = "openai"
# remove azure prefx from model_name
model_name = model_name.replace("azure/", "")
# glorified / complicated reading of configs
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
api_key = litellm_params.get("api_key") or default_api_key
if (
api_key
and isinstance(api_key, str)
and api_key.startswith("os.environ/")
):
api_key_env_name = api_key.replace("os.environ/", "")
api_key = litellm.get_secret(api_key_env_name)
litellm_params["api_key"] = api_key
api_base = litellm_params.get("api_base")
base_url = litellm_params.get("base_url")
api_base = (
api_base or base_url or default_api_base
) # allow users to pass in `api_base` or `base_url` for azure
if api_base and api_base.startswith("os.environ/"):
api_base_env_name = api_base.replace("os.environ/", "")
api_base = litellm.get_secret(api_base_env_name)
litellm_params["api_base"] = api_base
## AZURE AI STUDIO MISTRAL CHECK ##
"""
Make sure api base ends in /v1/
if not, add it - https://github.com/BerriAI/litellm/issues/2279
"""
if (
is_azure_ai_studio_model is True
and api_base is not None
and isinstance(api_base, str)
and not api_base.endswith("/v1/")
):
# check if it ends with a trailing slash
if api_base.endswith("/"):
api_base += "v1/"
elif api_base.endswith("/v1"):
api_base += "/"
else:
api_base += "/v1/"
api_version = litellm_params.get("api_version")
if api_version and api_version.startswith("os.environ/"):
api_version_env_name = api_version.replace("os.environ/", "")
api_version = litellm.get_secret(api_version_env_name)
litellm_params["api_version"] = api_version
timeout = litellm_params.pop("timeout", None) or litellm.request_timeout
if isinstance(timeout, str) and timeout.startswith("os.environ/"):
timeout_env_name = timeout.replace("os.environ/", "")
timeout = litellm.get_secret(timeout_env_name)
litellm_params["timeout"] = timeout
stream_timeout = litellm_params.pop(
"stream_timeout", timeout
) # if no stream_timeout is set, default to timeout
if isinstance(stream_timeout, str) and stream_timeout.startswith(
"os.environ/"
):
stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
stream_timeout = litellm.get_secret(stream_timeout_env_name)
litellm_params["stream_timeout"] = stream_timeout
max_retries = litellm_params.pop(
"max_retries", 0
) # router handles retry logic
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
max_retries_env_name = max_retries.replace("os.environ/", "")
max_retries = litellm.get_secret(max_retries_env_name)
litellm_params["max_retries"] = max_retries
# proxy support
import os
import httpx
# Check if the HTTP_PROXY and HTTPS_PROXY environment variables are set and use them accordingly.
http_proxy = os.getenv("HTTP_PROXY", None)
https_proxy = os.getenv("HTTPS_PROXY", None)
no_proxy = os.getenv("NO_PROXY", None)
# Create the proxies dictionary only if the environment variables are set.
sync_proxy_mounts = None
async_proxy_mounts = None
if http_proxy is not None and https_proxy is not None:
sync_proxy_mounts = {
"http://": httpx.HTTPTransport(proxy=httpx.Proxy(url=http_proxy)),
"https://": httpx.HTTPTransport(proxy=httpx.Proxy(url=https_proxy)),
}
async_proxy_mounts = {
"http://": httpx.AsyncHTTPTransport(
proxy=httpx.Proxy(url=http_proxy)
),
"https://": httpx.AsyncHTTPTransport(
proxy=httpx.Proxy(url=https_proxy)
),
}
# assume no_proxy is a list of comma separated urls
if no_proxy is not None and isinstance(no_proxy, str):
no_proxy_urls = no_proxy.split(",")
for url in no_proxy_urls: # set no-proxy support for specific urls
sync_proxy_mounts[url] = None # type: ignore
async_proxy_mounts[url] = None # type: ignore
organization = litellm_params.get("organization", None)
if isinstance(organization, str) and organization.startswith("os.environ/"):
organization_env_name = organization.replace("os.environ/", "")
organization = litellm.get_secret(organization_env_name)
litellm_params["organization"] = organization
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
if api_base is None or not isinstance(api_base, str):
filtered_litellm_params = {
k: v
for k, v in model["litellm_params"].items()
if k != "api_key"
}
_filtered_model = {
"model_name": model["model_name"],
"litellm_params": filtered_litellm_params,
}
raise ValueError(
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
)
azure_ad_token = litellm_params.get("azure_ad_token")
if azure_ad_token is not None:
if azure_ad_token.startswith("oidc/"):
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
if api_version is None:
api_version = litellm.AZURE_DEFAULT_API_VERSION
if "gateway.ai.cloudflare.com" in api_base:
if not api_base.endswith("/"):
api_base += "/"
azure_model = model_name.replace("azure/", "")
api_base += f"{azure_model}"
cache_key = f"{model_id}_async_client"
_client = openai.AsyncAzureOpenAI(
api_key=api_key,
azure_ad_token=azure_ad_token,
base_url=api_base,
api_version=api_version,
timeout=timeout,
max_retries=max_retries,
http_client=httpx.AsyncClient(
transport=AsyncCustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=async_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
cache_key = f"{model_id}_client"
_client = openai.AzureOpenAI( # type: ignore
api_key=api_key,
azure_ad_token=azure_ad_token,
base_url=api_base,
api_version=api_version,
timeout=timeout,
max_retries=max_retries,
http_client=httpx.Client(
transport=CustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=sync_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
# streaming clients can have diff timeouts
cache_key = f"{model_id}_stream_async_client"
_client = openai.AsyncAzureOpenAI( # type: ignore
api_key=api_key,
azure_ad_token=azure_ad_token,
base_url=api_base,
api_version=api_version,
timeout=stream_timeout,
max_retries=max_retries,
http_client=httpx.AsyncClient(
transport=AsyncCustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=async_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
cache_key = f"{model_id}_stream_client"
_client = openai.AzureOpenAI( # type: ignore
api_key=api_key,
azure_ad_token=azure_ad_token,
base_url=api_base,
api_version=api_version,
timeout=stream_timeout,
max_retries=max_retries,
http_client=httpx.Client(
transport=CustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=sync_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
else:
_api_key = api_key
if _api_key is not None and isinstance(_api_key, str):
# only show first 5 chars of api_key
_api_key = _api_key[:8] + "*" * 15
verbose_router_logger.debug(
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
)
azure_client_params = {
"api_key": api_key,
"azure_endpoint": api_base,
"api_version": api_version,
"azure_ad_token": azure_ad_token,
}
from litellm.llms.azure import select_azure_base_url_or_endpoint
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
azure_client_params = select_azure_base_url_or_endpoint(
azure_client_params
)
cache_key = f"{model_id}_async_client"
_client = openai.AsyncAzureOpenAI( # type: ignore
**azure_client_params,
timeout=timeout,
max_retries=max_retries,
http_client=httpx.AsyncClient(
transport=AsyncCustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=async_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
cache_key = f"{model_id}_client"
_client = openai.AzureOpenAI( # type: ignore
**azure_client_params,
timeout=timeout,
max_retries=max_retries,
http_client=httpx.Client(
transport=CustomHTTPTransport(
verify=litellm.ssl_verify,
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
),
mounts=sync_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
# streaming clients should have diff timeouts
cache_key = f"{model_id}_stream_async_client"
_client = openai.AsyncAzureOpenAI( # type: ignore
**azure_client_params,
timeout=stream_timeout,
max_retries=max_retries,
http_client=httpx.AsyncClient(
transport=AsyncCustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=async_proxy_mounts,
),
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
cache_key = f"{model_id}_stream_client"
_client = openai.AzureOpenAI( # type: ignore
**azure_client_params,
timeout=stream_timeout,
max_retries=max_retries,
http_client=httpx.Client(
transport=CustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=sync_proxy_mounts,
),
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
else:
_api_key = api_key # type: ignore
if _api_key is not None and isinstance(_api_key, str):
# only show first 5 chars of api_key
_api_key = _api_key[:8] + "*" * 15
verbose_router_logger.debug(
f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
)
cache_key = f"{model_id}_async_client"
_client = openai.AsyncOpenAI( # type: ignore
api_key=api_key,
base_url=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
http_client=httpx.AsyncClient(
transport=AsyncCustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=async_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
cache_key = f"{model_id}_client"
_client = openai.OpenAI( # type: ignore
api_key=api_key,
base_url=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
http_client=httpx.Client(
transport=CustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=sync_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
# streaming clients should have diff timeouts
cache_key = f"{model_id}_stream_async_client"
_client = openai.AsyncOpenAI( # type: ignore
api_key=api_key,
base_url=api_base,
timeout=stream_timeout,
max_retries=max_retries,
organization=organization,
http_client=httpx.AsyncClient(
transport=AsyncCustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=async_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
# streaming clients should have diff timeouts
cache_key = f"{model_id}_stream_client"
_client = openai.OpenAI( # type: ignore
api_key=api_key,
base_url=api_base,
timeout=stream_timeout,
max_retries=max_retries,
organization=organization,
http_client=httpx.Client(
transport=CustomHTTPTransport(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
mounts=sync_proxy_mounts,
), # type: ignore
)
self.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
def _generate_model_id(self, model_group: str, litellm_params: dict): def _generate_model_id(self, model_group: str, litellm_params: dict):
""" """
Helper function to consistently generate the same id for a deployment Helper function to consistently generate the same id for a deployment
@ -3904,7 +3396,9 @@ class Router:
raise Exception(f"Unsupported provider - {custom_llm_provider}") raise Exception(f"Unsupported provider - {custom_llm_provider}")
# init OpenAI, Azure clients # init OpenAI, Azure clients
self.set_client(model=deployment.to_json(exclude_none=True)) set_client(
litellm_router_instance=self, model=deployment.to_json(exclude_none=True)
)
# set region (if azure model) ## PREVIEW FEATURE ## # set region (if azure model) ## PREVIEW FEATURE ##
if litellm.enable_preview_features == True: if litellm.enable_preview_features == True:
@ -4432,7 +3926,7 @@ class Router:
""" """
Re-initialize the client Re-initialize the client
""" """
self.set_client(model=deployment) set_client(litellm_router_instance=self, model=deployment)
client = self.cache.get_cache(key=cache_key, local_only=True) client = self.cache.get_cache(key=cache_key, local_only=True)
return client return client
else: else:
@ -4442,7 +3936,7 @@ class Router:
""" """
Re-initialize the client Re-initialize the client
""" """
self.set_client(model=deployment) set_client(litellm_router_instance=self, model=deployment)
client = self.cache.get_cache(key=cache_key, local_only=True) client = self.cache.get_cache(key=cache_key, local_only=True)
return client return client
else: else:
@ -4453,7 +3947,7 @@ class Router:
""" """
Re-initialize the client Re-initialize the client
""" """
self.set_client(model=deployment) set_client(litellm_router_instance=self, model=deployment)
client = self.cache.get_cache(key=cache_key) client = self.cache.get_cache(key=cache_key)
return client return client
else: else:
@ -4463,7 +3957,7 @@ class Router:
""" """
Re-initialize the client Re-initialize the client
""" """
self.set_client(model=deployment) set_client(litellm_router_instance=self, model=deployment)
client = self.cache.get_cache(key=cache_key) client = self.cache.get_cache(key=cache_key)
return client return client

View file

@ -0,0 +1,495 @@
import asyncio
import os
import traceback
from typing import TYPE_CHECKING, Any
import httpx
import openai
import litellm
from litellm._logging import verbose_router_logger
from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.utils import calculate_max_parallel_requests
if TYPE_CHECKING:
from litellm.router import Router as _Router
LitellmRouter = _Router
else:
LitellmRouter = Any
def should_initialize_sync_client(
litellm_router_instance: LitellmRouter,
) -> bool:
"""
Returns if Sync OpenAI, Azure Clients should be initialized.
Do not init sync clients when router.router_general_settings.async_only_mode is True
"""
if litellm_router_instance is None:
return False
if litellm_router_instance.router_general_settings is not None:
if (
hasattr(litellm_router_instance, "router_general_settings")
and hasattr(
litellm_router_instance.router_general_settings, "async_only_mode"
)
and litellm_router_instance.router_general_settings.async_only_mode is True
):
return False
return True
def set_client(litellm_router_instance: LitellmRouter, model: dict):
"""
- Initializes Azure/OpenAI clients. Stores them in cache, b/c of this - https://github.com/BerriAI/litellm/issues/1278
- Initializes Semaphore for client w/ rpm. Stores them in cache. b/c of this - https://github.com/BerriAI/litellm/issues/2994
"""
client_ttl = litellm_router_instance.client_ttl
litellm_params = model.get("litellm_params", {})
model_name = litellm_params.get("model")
model_id = model["model_info"]["id"]
# ### IF RPM SET - initialize a semaphore ###
rpm = litellm_params.get("rpm", None)
tpm = litellm_params.get("tpm", None)
max_parallel_requests = litellm_params.get("max_parallel_requests", None)
calculated_max_parallel_requests = calculate_max_parallel_requests(
rpm=rpm,
max_parallel_requests=max_parallel_requests,
tpm=tpm,
default_max_parallel_requests=litellm_router_instance.default_max_parallel_requests,
)
if calculated_max_parallel_requests:
semaphore = asyncio.Semaphore(calculated_max_parallel_requests)
cache_key = f"{model_id}_max_parallel_requests_client"
litellm_router_instance.cache.set_cache(
key=cache_key,
value=semaphore,
local_only=True,
)
#### for OpenAI / Azure we need to initalize the Client for High Traffic ########
custom_llm_provider = litellm_params.get("custom_llm_provider")
custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or ""
default_api_base = None
default_api_key = None
if custom_llm_provider in litellm.openai_compatible_providers:
_, custom_llm_provider, api_key, api_base = litellm.get_llm_provider(
model=model_name
)
default_api_base = api_base
default_api_key = api_key
if (
model_name in litellm.open_ai_chat_completion_models
or custom_llm_provider in litellm.openai_compatible_providers
or custom_llm_provider == "azure"
or custom_llm_provider == "azure_text"
or custom_llm_provider == "custom_openai"
or custom_llm_provider == "openai"
or custom_llm_provider == "text-completion-openai"
or "ft:gpt-3.5-turbo" in model_name
or model_name in litellm.open_ai_embedding_models
):
is_azure_ai_studio_model: bool = False
if custom_llm_provider == "azure":
if litellm.utils._is_non_openai_azure_model(model_name):
is_azure_ai_studio_model = True
custom_llm_provider = "openai"
# remove azure prefx from model_name
model_name = model_name.replace("azure/", "")
# glorified / complicated reading of configs
# user can pass vars directly or they can pas os.environ/AZURE_API_KEY, in which case we will read the env
# we do this here because we init clients for Azure, OpenAI and we need to set the right key
api_key = litellm_params.get("api_key") or default_api_key
if api_key and isinstance(api_key, str) and api_key.startswith("os.environ/"):
api_key_env_name = api_key.replace("os.environ/", "")
api_key = litellm.get_secret(api_key_env_name)
litellm_params["api_key"] = api_key
api_base = litellm_params.get("api_base")
base_url = litellm_params.get("base_url")
api_base = (
api_base or base_url or default_api_base
) # allow users to pass in `api_base` or `base_url` for azure
if api_base and api_base.startswith("os.environ/"):
api_base_env_name = api_base.replace("os.environ/", "")
api_base = litellm.get_secret(api_base_env_name)
litellm_params["api_base"] = api_base
## AZURE AI STUDIO MISTRAL CHECK ##
"""
Make sure api base ends in /v1/
if not, add it - https://github.com/BerriAI/litellm/issues/2279
"""
if (
is_azure_ai_studio_model is True
and api_base is not None
and isinstance(api_base, str)
and not api_base.endswith("/v1/")
):
# check if it ends with a trailing slash
if api_base.endswith("/"):
api_base += "v1/"
elif api_base.endswith("/v1"):
api_base += "/"
else:
api_base += "/v1/"
api_version = litellm_params.get("api_version")
if api_version and api_version.startswith("os.environ/"):
api_version_env_name = api_version.replace("os.environ/", "")
api_version = litellm.get_secret(api_version_env_name)
litellm_params["api_version"] = api_version
timeout = litellm_params.pop("timeout", None) or litellm.request_timeout
if isinstance(timeout, str) and timeout.startswith("os.environ/"):
timeout_env_name = timeout.replace("os.environ/", "")
timeout = litellm.get_secret(timeout_env_name)
litellm_params["timeout"] = timeout
stream_timeout = litellm_params.pop(
"stream_timeout", timeout
) # if no stream_timeout is set, default to timeout
if isinstance(stream_timeout, str) and stream_timeout.startswith("os.environ/"):
stream_timeout_env_name = stream_timeout.replace("os.environ/", "")
stream_timeout = litellm.get_secret(stream_timeout_env_name)
litellm_params["stream_timeout"] = stream_timeout
max_retries = litellm_params.pop("max_retries", 0) # router handles retry logic
if isinstance(max_retries, str) and max_retries.startswith("os.environ/"):
max_retries_env_name = max_retries.replace("os.environ/", "")
max_retries = litellm.get_secret(max_retries_env_name)
litellm_params["max_retries"] = max_retries
organization = litellm_params.get("organization", None)
if isinstance(organization, str) and organization.startswith("os.environ/"):
organization_env_name = organization.replace("os.environ/", "")
organization = litellm.get_secret(organization_env_name)
litellm_params["organization"] = organization
if custom_llm_provider == "azure" or custom_llm_provider == "azure_text":
if api_base is None or not isinstance(api_base, str):
filtered_litellm_params = {
k: v for k, v in model["litellm_params"].items() if k != "api_key"
}
_filtered_model = {
"model_name": model["model_name"],
"litellm_params": filtered_litellm_params,
}
raise ValueError(
f"api_base is required for Azure OpenAI. Set it on your config. Model - {_filtered_model}"
)
azure_ad_token = litellm_params.get("azure_ad_token")
if azure_ad_token is not None:
if azure_ad_token.startswith("oidc/"):
azure_ad_token = get_azure_ad_token_from_oidc(azure_ad_token)
if api_version is None:
api_version = litellm.AZURE_DEFAULT_API_VERSION
if "gateway.ai.cloudflare.com" in api_base:
if not api_base.endswith("/"):
api_base += "/"
azure_model = model_name.replace("azure/", "")
api_base += f"{azure_model}"
cache_key = f"{model_id}_async_client"
_client = openai.AsyncAzureOpenAI(
api_key=api_key,
azure_ad_token=azure_ad_token,
base_url=api_base,
api_version=api_version,
timeout=timeout,
max_retries=max_retries,
http_client=httpx.AsyncClient(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
if should_initialize_sync_client(
litellm_router_instance=litellm_router_instance
):
cache_key = f"{model_id}_client"
_client = openai.AzureOpenAI( # type: ignore
api_key=api_key,
azure_ad_token=azure_ad_token,
base_url=api_base,
api_version=api_version,
timeout=timeout,
max_retries=max_retries,
http_client=httpx.Client(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
# streaming clients can have diff timeouts
cache_key = f"{model_id}_stream_async_client"
_client = openai.AsyncAzureOpenAI( # type: ignore
api_key=api_key,
azure_ad_token=azure_ad_token,
base_url=api_base,
api_version=api_version,
timeout=stream_timeout,
max_retries=max_retries,
http_client=httpx.AsyncClient(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
if should_initialize_sync_client(
litellm_router_instance=litellm_router_instance
):
cache_key = f"{model_id}_stream_client"
_client = openai.AzureOpenAI( # type: ignore
api_key=api_key,
azure_ad_token=azure_ad_token,
base_url=api_base,
api_version=api_version,
timeout=stream_timeout,
max_retries=max_retries,
http_client=httpx.Client(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
else:
_api_key = api_key
if _api_key is not None and isinstance(_api_key, str):
# only show first 5 chars of api_key
_api_key = _api_key[:8] + "*" * 15
verbose_router_logger.debug(
f"Initializing Azure OpenAI Client for {model_name}, Api Base: {str(api_base)}, Api Key:{_api_key}"
)
azure_client_params = {
"api_key": api_key,
"azure_endpoint": api_base,
"api_version": api_version,
"azure_ad_token": azure_ad_token,
}
from litellm.llms.azure import select_azure_base_url_or_endpoint
# this decides if we should set azure_endpoint or base_url on Azure OpenAI Client
# required to support GPT-4 vision enhancements, since base_url needs to be set on Azure OpenAI Client
azure_client_params = select_azure_base_url_or_endpoint(
azure_client_params
)
cache_key = f"{model_id}_async_client"
_client = openai.AsyncAzureOpenAI( # type: ignore
**azure_client_params,
timeout=timeout,
max_retries=max_retries,
http_client=httpx.AsyncClient(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
if should_initialize_sync_client(
litellm_router_instance=litellm_router_instance
):
cache_key = f"{model_id}_client"
_client = openai.AzureOpenAI( # type: ignore
**azure_client_params,
timeout=timeout,
max_retries=max_retries,
http_client=httpx.Client(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
# streaming clients should have diff timeouts
cache_key = f"{model_id}_stream_async_client"
_client = openai.AsyncAzureOpenAI( # type: ignore
**azure_client_params,
timeout=stream_timeout,
max_retries=max_retries,
http_client=httpx.AsyncClient(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
if should_initialize_sync_client(
litellm_router_instance=litellm_router_instance
):
cache_key = f"{model_id}_stream_client"
_client = openai.AzureOpenAI( # type: ignore
**azure_client_params,
timeout=stream_timeout,
max_retries=max_retries,
http_client=httpx.Client(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
),
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
else:
_api_key = api_key # type: ignore
if _api_key is not None and isinstance(_api_key, str):
# only show first 5 chars of api_key
_api_key = _api_key[:8] + "*" * 15
verbose_router_logger.debug(
f"Initializing OpenAI Client for {model_name}, Api Base:{str(api_base)}, Api Key:{_api_key}"
)
cache_key = f"{model_id}_async_client"
_client = openai.AsyncOpenAI( # type: ignore
api_key=api_key,
base_url=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
http_client=httpx.AsyncClient(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
if should_initialize_sync_client(
litellm_router_instance=litellm_router_instance
):
cache_key = f"{model_id}_client"
_client = openai.OpenAI( # type: ignore
api_key=api_key,
base_url=api_base,
timeout=timeout,
max_retries=max_retries,
organization=organization,
http_client=httpx.Client(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
# streaming clients should have diff timeouts
cache_key = f"{model_id}_stream_async_client"
_client = openai.AsyncOpenAI( # type: ignore
api_key=api_key,
base_url=api_base,
timeout=stream_timeout,
max_retries=max_retries,
organization=organization,
http_client=httpx.AsyncClient(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr
if should_initialize_sync_client(
litellm_router_instance=litellm_router_instance
):
# streaming clients should have diff timeouts
cache_key = f"{model_id}_stream_client"
_client = openai.OpenAI( # type: ignore
api_key=api_key,
base_url=api_base,
timeout=stream_timeout,
max_retries=max_retries,
organization=organization,
http_client=httpx.Client(
limits=httpx.Limits(
max_connections=1000, max_keepalive_connections=100
),
verify=litellm.ssl_verify,
), # type: ignore
)
litellm_router_instance.cache.set_cache(
key=cache_key,
value=_client,
ttl=client_ttl,
local_only=True,
) # cache for 1 hr

File diff suppressed because one or more lines are too long

View file

@ -1607,7 +1607,17 @@ def test_caching_redis_simple(caplog):
print(m) print(m)
print(time.time() - s2) print(time.time() - s2)
redis_async_caching_error = False
redis_service_logging_error = False
captured_logs = [rec.message for rec in caplog.records] captured_logs = [rec.message for rec in caplog.records]
assert "LiteLLM Redis Caching: async set" not in captured_logs print(f"captured_logs: {captured_logs}")
assert "ServiceLogging.async_service_success_hook" not in captured_logs for item in captured_logs:
if "Error connecting to Async Redis client" in item:
redis_async_caching_error = True
if "ServiceLogging.async_service_success_hook" in item:
redis_service_logging_error = True
assert redis_async_caching_error is False
assert redis_service_logging_error is False

View file

@ -712,7 +712,6 @@ def test_vertex_ai_claude_completion_cost():
assert cost == predicted_cost assert cost == predicted_cost
@pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_completion_cost_hidden_params(sync_mode): async def test_completion_cost_hidden_params(sync_mode):
@ -732,6 +731,7 @@ async def test_completion_cost_hidden_params(sync_mode):
assert "response_cost" in response._hidden_params assert "response_cost" in response._hidden_params
assert isinstance(response._hidden_params["response_cost"], float) assert isinstance(response._hidden_params["response_cost"], float)
def test_vertex_ai_gemini_predict_cost(): def test_vertex_ai_gemini_predict_cost():
model = "gemini-1.5-flash" model = "gemini-1.5-flash"
messages = [{"role": "user", "content": "Hey, hows it going???"}] messages = [{"role": "user", "content": "Hey, hows it going???"}]
@ -739,3 +739,16 @@ def test_vertex_ai_gemini_predict_cost():
assert predictive_cost > 0 assert predictive_cost > 0
@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
def test_completion_cost_tts(model):
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
cost = completion_cost(
model=model,
prompt="the quick brown fox jumped over the lazy dogs",
call_type="speech",
)
assert cost > 0

View file

@ -2,23 +2,30 @@
## Unit tests for ProxyConfig class ## Unit tests for ProxyConfig class
import sys, os import os
import sys
import traceback import traceback
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os, io import io
import os
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest, litellm
from pydantic import BaseModel, ConfigDict
from litellm.proxy.proxy_server import ProxyConfig
from litellm.proxy.utils import encrypt_value, ProxyLogging, DualCache
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
from typing import Literal from typing import Literal
import pytest
from pydantic import BaseModel, ConfigDict
import litellm
from litellm.proxy.common_utils.encrypt_decrypt_utils import encrypt_value
from litellm.proxy.proxy_server import ProxyConfig
from litellm.proxy.utils import DualCache, ProxyLogging
from litellm.types.router import Deployment, LiteLLM_Params, ModelInfo
class DBModel(BaseModel): class DBModel(BaseModel):
model_id: str model_id: str
@ -28,6 +35,7 @@ class DBModel(BaseModel):
model_config = ConfigDict(protected_namespaces=()) model_config = ConfigDict(protected_namespaces=())
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_delete_deployment(): async def test_delete_deployment():
""" """

View file

@ -1,8 +1,13 @@
# What is this? # What is this?
## Unit test for presidio pii masking ## Unit test for presidio pii masking
import sys, os, asyncio, time, random import asyncio
from datetime import datetime import os
import random
import sys
import time
import traceback import traceback
from datetime import datetime
from dotenv import load_dotenv from dotenv import load_dotenv
load_dotenv() load_dotenv()
@ -12,12 +17,40 @@ sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
import pytest import pytest
import litellm import litellm
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
from litellm import Router, mock_completion from litellm import Router, mock_completion
from litellm.proxy.utils import ProxyLogging
from litellm.proxy._types import UserAPIKeyAuth
from litellm.caching import DualCache from litellm.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.hooks.presidio_pii_masking import _OPTIONAL_PresidioPIIMasking
from litellm.proxy.utils import ProxyLogging
@pytest.mark.parametrize(
"base_url",
[
"presidio-analyzer-s3pa:10000",
"https://presidio-analyzer-s3pa:10000",
"http://presidio-analyzer-s3pa:10000",
],
)
def test_validate_environment_missing_http(base_url):
pii_masking = _OPTIONAL_PresidioPIIMasking(mock_testing=True)
os.environ["PRESIDIO_ANALYZER_API_BASE"] = f"{base_url}/analyze"
os.environ["PRESIDIO_ANONYMIZER_API_BASE"] = f"{base_url}/anonymize"
pii_masking.validate_environment()
expected_url = base_url
if not (base_url.startswith("https://") or base_url.startswith("http://")):
expected_url = "http://" + base_url
assert (
pii_masking.presidio_anonymizer_api_base == f"{expected_url}/anonymize/"
), "Got={}, Expected={}".format(
pii_masking.presidio_anonymizer_api_base, f"{expected_url}/anonymize/"
)
assert pii_masking.presidio_analyzer_api_base == f"{expected_url}/analyze/"
@pytest.mark.asyncio @pytest.mark.asyncio

View file

@ -1894,6 +1894,49 @@ async def test_router_model_usage(mock_response):
raise e raise e
@pytest.mark.skip(reason="Check if this is causing ci/cd issues.")
@pytest.mark.asyncio
async def test_is_proxy_set():
"""
Assert if proxy is set
"""
from httpx import AsyncHTTPTransport
os.environ["HTTPS_PROXY"] = "https://proxy.example.com:8080"
from openai import AsyncAzureOpenAI
# Function to check if a proxy is set on the client
# Function to check if a proxy is set on the client
def check_proxy(client: httpx.AsyncClient) -> bool:
print(f"client._mounts: {client._mounts}")
assert len(client._mounts) == 1
for k, v in client._mounts.items():
assert isinstance(v, AsyncHTTPTransport)
return True
llm_router = Router(
model_list=[
{
"model_name": "gpt-4",
"litellm_params": {
"model": "azure/gpt-3.5-turbo",
"api_key": "my-key",
"api_base": "my-base",
"mock_response": "hello world",
},
"model_info": {"id": "1"},
}
]
)
_deployment = llm_router.get_deployment(model_id="1")
model_client: AsyncAzureOpenAI = llm_router._get_client(
deployment=_deployment, kwargs={}, client_type="async"
) # type: ignore
assert check_proxy(client=model_client._client)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"model, base_model, llm_provider", "model, base_model, llm_provider",
[ [

View file

@ -1,16 +1,22 @@
# this tests if the router is initialized correctly # this tests if the router is initialized correctly
import sys, os, time import asyncio
import traceback, asyncio import os
import sys
import time
import traceback
import pytest import pytest
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from dotenv import load_dotenv
import litellm import litellm
from litellm import Router from litellm import Router
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
from dotenv import load_dotenv
load_dotenv() load_dotenv()
@ -24,6 +30,7 @@ load_dotenv()
def test_init_clients(): def test_init_clients():
litellm.set_verbose = True litellm.set_verbose = True
import logging import logging
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
verbose_router_logger.setLevel(logging.DEBUG) verbose_router_logger.setLevel(logging.DEBUG)
@ -489,6 +496,7 @@ def test_init_clients_azure_command_r_plus():
# For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent # For azure/command-r-plus we need to use openai.OpenAI because of how the Azure provider requires requests being sent
litellm.set_verbose = True litellm.set_verbose = True
import logging import logging
from litellm._logging import verbose_router_logger from litellm._logging import verbose_router_logger
verbose_router_logger.setLevel(logging.DEBUG) verbose_router_logger.setLevel(logging.DEBUG)
@ -585,3 +593,46 @@ async def test_text_completion_with_organization():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_init_clients_async_mode():
litellm.set_verbose = True
import logging
from litellm._logging import verbose_router_logger
from litellm.types.router import RouterGeneralSettings
verbose_router_logger.setLevel(logging.DEBUG)
try:
print("testing init 4 clients with diff timeouts")
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
"timeout": 0.01,
"stream_timeout": 0.000_001,
"max_retries": 7,
},
},
]
router = Router(
model_list=model_list,
set_verbose=True,
router_general_settings=RouterGeneralSettings(async_only_mode=True),
)
for elem in router.model_list:
model_id = elem["model_info"]["id"]
# sync clients not initialized in async_only_mode=True
assert router.cache.get_cache(f"{model_id}_client") is None
assert router.cache.get_cache(f"{model_id}_stream_client") is None
# only async clients initialized in async_only_mode=True
assert router.cache.get_cache(f"{model_id}_async_client") is not None
assert router.cache.get_cache(f"{model_id}_stream_async_client") is not None
except Exception as e:
pytest.fail(f"Error occurred: {e}")

View file

@ -1,15 +1,22 @@
import sys, os, time import asyncio
import traceback, asyncio import os
import sys
import time
import traceback
import pytest import pytest
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
from litellm import completion, stream_chunk_builder import os
import litellm
import os, dotenv import dotenv
from openai import OpenAI
import pytest import pytest
from openai import OpenAI
import litellm
from litellm import completion, stream_chunk_builder
dotenv.load_dotenv() dotenv.load_dotenv()
@ -147,3 +154,45 @@ def test_stream_chunk_builder_litellm_tool_call_regular_message():
# test_stream_chunk_builder_litellm_tool_call_regular_message() # test_stream_chunk_builder_litellm_tool_call_regular_message()
def test_stream_chunk_builder_litellm_usage_chunks():
"""
Checks if stream_chunk_builder is able to correctly rebuild with given metadata from streaming chunks
"""
messages = [
{"role": "user", "content": "Tell me the funniest joke you know."},
{
"role": "assistant",
"content": "Why did the chicken cross the road?\nYou will not guess this one I bet\n",
},
{"role": "user", "content": "I do not know, why?"},
{"role": "assistant", "content": "uhhhh\n\n\nhmmmm.....\nthinking....\n"},
{"role": "user", "content": "\nI am waiting...\n\n...\n"},
]
# make a regular gemini call
response = completion(
model="gemini/gemini-1.5-flash",
messages=messages,
)
usage: litellm.Usage = response.usage
gemini_pt = usage.prompt_tokens
# make a streaming gemini call
response = completion(
model="gemini/gemini-1.5-flash",
messages=messages,
stream=True,
complete_response=True,
stream_options={"include_usage": True},
)
usage: litellm.Usage = response.usage
stream_rebuilt_pt = usage.prompt_tokens
# assert prompt tokens are the same
assert gemini_pt == stream_rebuilt_pt

View file

@ -12,6 +12,9 @@ from typing import Tuple
import pytest import pytest
from pydantic import BaseModel from pydantic import BaseModel
import litellm.litellm_core_utils
import litellm.litellm_core_utils.litellm_logging
sys.path.insert( sys.path.insert(
0, os.path.abspath("../..") 0, os.path.abspath("../..")
) # Adds the parent directory to the system path ) # Adds the parent directory to the system path
@ -1078,7 +1081,6 @@ def test_vertex_ai_stream(provider):
print(f"completion_response: {complete_response}") print(f"completion_response: {complete_response}")
assert is_finished == True assert is_finished == True
assert False
except litellm.RateLimitError as e: except litellm.RateLimitError as e:
pass pass
except Exception as e: except Exception as e:
@ -3034,8 +3036,11 @@ def test_completion_claude_3_function_call_with_streaming():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.mark.parametrize(
"model", ["gemini/gemini-1.5-flash"]
) # "claude-3-opus-20240229",
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_acompletion_claude_3_function_call_with_streaming(): async def test_acompletion_claude_3_function_call_with_streaming(model):
litellm.set_verbose = True litellm.set_verbose = True
tools = [ tools = [
{ {
@ -3066,7 +3071,7 @@ async def test_acompletion_claude_3_function_call_with_streaming():
try: try:
# test without max tokens # test without max tokens
response = await acompletion( response = await acompletion(
model="claude-3-opus-20240229", model=model,
messages=messages, messages=messages,
tools=tools, tools=tools,
tool_choice="required", tool_choice="required",
@ -3453,3 +3458,55 @@ def test_aamazing_unit_test_custom_stream_wrapper_n():
assert ( assert (
chunk_dict == chunks[idx] chunk_dict == chunks[idx]
), f"idx={idx} translated chunk = {chunk_dict} != openai chunk = {chunks[idx]}" ), f"idx={idx} translated chunk = {chunk_dict} != openai chunk = {chunks[idx]}"
def test_unit_test_custom_stream_wrapper_function_call():
"""
Test if model returns a tool call, the finish reason is correctly set to 'tool_calls'
"""
from litellm.types.llms.openai import ChatCompletionDeltaChunk
litellm.set_verbose = False
delta: ChatCompletionDeltaChunk = {
"content": None,
"role": "assistant",
"tool_calls": [
{
"function": {"arguments": '"}'},
"type": "function",
"index": 0,
}
],
}
chunk = {
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": 1694268190,
"model": "gpt-3.5-turbo-0125",
"system_fingerprint": "fp_44709d6fcb",
"choices": [{"index": 0, "delta": delta, "finish_reason": "stop"}],
}
chunk = litellm.ModelResponse(**chunk, stream=True)
completion_stream = ModelResponseIterator(model_response=chunk)
response = litellm.CustomStreamWrapper(
completion_stream=completion_stream,
model="gpt-3.5-turbo",
custom_llm_provider="cached_response",
logging_obj=litellm.litellm_core_utils.litellm_logging.Logging(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey"}],
stream=True,
call_type="completion",
start_time=time.time(),
litellm_call_id="12345",
function_id="1245",
),
)
finish_reason: Optional[str] = None
for chunk in response:
if chunk.choices[0].finish_reason is not None:
finish_reason = chunk.choices[0].finish_reason
assert finish_reason == "tool_calls"

View file

@ -300,7 +300,7 @@ class ListBatchRequest(TypedDict, total=False):
timeout: Optional[float] timeout: Optional[float]
class ChatCompletionToolCallFunctionChunk(TypedDict): class ChatCompletionToolCallFunctionChunk(TypedDict, total=False):
name: Optional[str] name: Optional[str]
arguments: str arguments: str
@ -312,7 +312,7 @@ class ChatCompletionToolCallChunk(TypedDict):
index: int index: int
class ChatCompletionDeltaToolCallChunk(TypedDict): class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):
id: str id: str
type: Literal["function"] type: Literal["function"]
function: ChatCompletionToolCallFunctionChunk function: ChatCompletionToolCallFunctionChunk

View file

@ -324,7 +324,12 @@ class DeploymentTypedDict(TypedDict):
litellm_params: LiteLLMParamsTypedDict litellm_params: LiteLLMParamsTypedDict
SPECIAL_MODEL_INFO_PARAMS = ["input_cost_per_token", "output_cost_per_token"] SPECIAL_MODEL_INFO_PARAMS = [
"input_cost_per_token",
"output_cost_per_token",
"input_cost_per_character",
"output_cost_per_character",
]
class Deployment(BaseModel): class Deployment(BaseModel):
@ -517,3 +522,9 @@ class CustomRoutingStrategyBase:
""" """
pass pass
class RouterGeneralSettings(BaseModel):
async_only_mode: bool = Field(
default=False
) # this will only initialize async clients. Good for memory utils

View file

@ -42,6 +42,8 @@ import httpx
import openai import openai
import requests import requests
import tiktoken import tiktoken
from httpx import Proxy
from httpx._utils import get_environment_proxies
from pydantic import BaseModel from pydantic import BaseModel
from tokenizers import Tokenizer from tokenizers import Tokenizer
@ -2555,6 +2557,24 @@ def get_optional_params(
message=f"Function calling is not supported by {custom_llm_provider}.", message=f"Function calling is not supported by {custom_llm_provider}.",
) )
if "tools" in non_default_params:
tools = non_default_params["tools"]
for (
tool
) in (
tools
): # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
tool_function = tool.get("function", {})
parameters = tool_function.get("parameters", None)
if parameters is not None:
new_parameters = copy.deepcopy(parameters)
if (
"additionalProperties" in new_parameters
and new_parameters["additionalProperties"] is False
):
new_parameters.pop("additionalProperties", None)
tool_function["parameters"] = new_parameters
def _check_valid_arg(supported_params): def _check_valid_arg(supported_params):
verbose_logger.debug( verbose_logger.debug(
f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}" f"\nLiteLLM completion() model= {model}; provider = {custom_llm_provider}"
@ -4707,7 +4727,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
) )
except Exception: except Exception:
raise Exception( raise Exception(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" "This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
model, custom_llm_provider
)
) )
@ -4893,6 +4915,34 @@ def get_provider_fields(custom_llm_provider: str) -> List[ProviderField]:
return [] return []
def create_proxy_transport_and_mounts():
proxies = {
key: None if url is None else Proxy(url=url)
for key, url in get_environment_proxies().items()
}
sync_proxy_mounts = {}
async_proxy_mounts = {}
# Retrieve NO_PROXY environment variable
no_proxy = os.getenv("NO_PROXY", None)
no_proxy_urls = no_proxy.split(",") if no_proxy else []
for key, proxy in proxies.items():
if proxy is None:
sync_proxy_mounts[key] = httpx.HTTPTransport()
async_proxy_mounts[key] = httpx.AsyncHTTPTransport()
else:
sync_proxy_mounts[key] = httpx.HTTPTransport(proxy=proxy)
async_proxy_mounts[key] = httpx.AsyncHTTPTransport(proxy=proxy)
for url in no_proxy_urls:
sync_proxy_mounts[url] = httpx.HTTPTransport()
async_proxy_mounts[url] = httpx.AsyncHTTPTransport()
return sync_proxy_mounts, async_proxy_mounts
def validate_environment(model: Optional[str] = None) -> dict: def validate_environment(model: Optional[str] = None) -> dict:
""" """
Checks if the environment variables are valid for the given model. Checks if the environment variables are valid for the given model.
@ -7519,7 +7569,7 @@ def exception_type(
if original_exception.status_code == 400: if original_exception.status_code == 400:
exception_mapping_worked = True exception_mapping_worked = True
raise BadRequestError( raise BadRequestError(
message=f"{exception_provider} - {message}", message=f"{exception_provider} - {error_str}",
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
model=model, model=model,
response=original_exception.response, response=original_exception.response,
@ -7528,7 +7578,7 @@ def exception_type(
elif original_exception.status_code == 401: elif original_exception.status_code == 401:
exception_mapping_worked = True exception_mapping_worked = True
raise AuthenticationError( raise AuthenticationError(
message=f"AuthenticationError: {exception_provider} - {message}", message=f"AuthenticationError: {exception_provider} - {error_str}",
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
model=model, model=model,
response=original_exception.response, response=original_exception.response,
@ -7537,7 +7587,7 @@ def exception_type(
elif original_exception.status_code == 404: elif original_exception.status_code == 404:
exception_mapping_worked = True exception_mapping_worked = True
raise NotFoundError( raise NotFoundError(
message=f"NotFoundError: {exception_provider} - {message}", message=f"NotFoundError: {exception_provider} - {error_str}",
model=model, model=model,
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
response=original_exception.response, response=original_exception.response,
@ -7546,7 +7596,7 @@ def exception_type(
elif original_exception.status_code == 408: elif original_exception.status_code == 408:
exception_mapping_worked = True exception_mapping_worked = True
raise Timeout( raise Timeout(
message=f"Timeout Error: {exception_provider} - {message}", message=f"Timeout Error: {exception_provider} - {error_str}",
model=model, model=model,
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
@ -7554,7 +7604,7 @@ def exception_type(
elif original_exception.status_code == 422: elif original_exception.status_code == 422:
exception_mapping_worked = True exception_mapping_worked = True
raise BadRequestError( raise BadRequestError(
message=f"BadRequestError: {exception_provider} - {message}", message=f"BadRequestError: {exception_provider} - {error_str}",
model=model, model=model,
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
response=original_exception.response, response=original_exception.response,
@ -7563,7 +7613,7 @@ def exception_type(
elif original_exception.status_code == 429: elif original_exception.status_code == 429:
exception_mapping_worked = True exception_mapping_worked = True
raise RateLimitError( raise RateLimitError(
message=f"RateLimitError: {exception_provider} - {message}", message=f"RateLimitError: {exception_provider} - {error_str}",
model=model, model=model,
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
response=original_exception.response, response=original_exception.response,
@ -7572,7 +7622,7 @@ def exception_type(
elif original_exception.status_code == 503: elif original_exception.status_code == 503:
exception_mapping_worked = True exception_mapping_worked = True
raise ServiceUnavailableError( raise ServiceUnavailableError(
message=f"ServiceUnavailableError: {exception_provider} - {message}", message=f"ServiceUnavailableError: {exception_provider} - {error_str}",
model=model, model=model,
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
response=original_exception.response, response=original_exception.response,
@ -7581,7 +7631,7 @@ def exception_type(
elif original_exception.status_code == 504: # gateway timeout error elif original_exception.status_code == 504: # gateway timeout error
exception_mapping_worked = True exception_mapping_worked = True
raise Timeout( raise Timeout(
message=f"Timeout Error: {exception_provider} - {message}", message=f"Timeout Error: {exception_provider} - {error_str}",
model=model, model=model,
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
@ -7590,7 +7640,7 @@ def exception_type(
exception_mapping_worked = True exception_mapping_worked = True
raise APIError( raise APIError(
status_code=original_exception.status_code, status_code=original_exception.status_code,
message=f"APIError: {exception_provider} - {message}", message=f"APIError: {exception_provider} - {error_str}",
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
model=model, model=model,
request=original_exception.request, request=original_exception.request,
@ -7599,7 +7649,7 @@ def exception_type(
else: else:
# if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors # if no status code then it is an APIConnectionError: https://github.com/openai/openai-python#handling-errors
raise APIConnectionError( raise APIConnectionError(
message=f"APIConnectionError: {exception_provider} - {message}", message=f"APIConnectionError: {exception_provider} - {error_str}",
llm_provider=custom_llm_provider, llm_provider=custom_llm_provider,
model=model, model=model,
litellm_debug_info=extra_information, litellm_debug_info=extra_information,
@ -7950,6 +8000,7 @@ class CustomStreamWrapper:
) )
self.messages = getattr(logging_obj, "messages", None) self.messages = getattr(logging_obj, "messages", None)
self.sent_stream_usage = False self.sent_stream_usage = False
self.tool_call = False
self.chunks: List = ( self.chunks: List = (
[] []
) # keep track of the returned chunks - used for calculating the input/output tokens for stream options ) # keep track of the returned chunks - used for calculating the input/output tokens for stream options
@ -9192,9 +9243,16 @@ class CustomStreamWrapper:
"is_finished": True, "is_finished": True,
"finish_reason": chunk.choices[0].finish_reason, "finish_reason": chunk.choices[0].finish_reason,
"original_chunk": chunk, "original_chunk": chunk,
"tool_calls": (
chunk.choices[0].delta.tool_calls
if hasattr(chunk.choices[0].delta, "tool_calls")
else None
),
} }
completion_obj["content"] = response_obj["text"] completion_obj["content"] = response_obj["text"]
if response_obj["tool_calls"] is not None:
completion_obj["tool_calls"] = response_obj["tool_calls"]
print_verbose(f"completion obj content: {completion_obj['content']}") print_verbose(f"completion obj content: {completion_obj['content']}")
if hasattr(chunk, "id"): if hasattr(chunk, "id"):
model_response.id = chunk.id model_response.id = chunk.id
@ -9352,6 +9410,10 @@ class CustomStreamWrapper:
) )
print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}") print_verbose(f"self.sent_first_chunk: {self.sent_first_chunk}")
## CHECK FOR TOOL USE
if "tool_calls" in completion_obj and len(completion_obj["tool_calls"]) > 0:
self.tool_call = True
## RETURN ARG ## RETURN ARG
if ( if (
"content" in completion_obj "content" in completion_obj
@ -9530,6 +9592,12 @@ class CustomStreamWrapper:
) )
else: else:
model_response.choices[0].finish_reason = "stop" model_response.choices[0].finish_reason = "stop"
## if tool use
if (
model_response.choices[0].finish_reason == "stop" and self.tool_call
): # don't overwrite for other - potential error finish reasons
model_response.choices[0].finish_reason = "tool_calls"
return model_response return model_response
def __next__(self): def __next__(self):
@ -9583,7 +9651,7 @@ class CustomStreamWrapper:
return response return response
except StopIteration: except StopIteration:
if self.sent_last_chunk == True: if self.sent_last_chunk is True:
if ( if (
self.sent_stream_usage == False self.sent_stream_usage == False
and self.stream_options is not None and self.stream_options is not None

View file

@ -2022,10 +2022,10 @@
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 2097152, "max_input_tokens": 2097152,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.00000035, "input_cost_per_token": 0.0000035,
"input_cost_per_token_above_128k_tokens": 0.0000007, "input_cost_per_token_above_128k_tokens": 0.000007,
"output_cost_per_token": 0.00000105, "output_cost_per_token": 0.0000105,
"output_cost_per_token_above_128k_tokens": 0.0000021, "output_cost_per_token_above_128k_tokens": 0.000021,
"litellm_provider": "gemini", "litellm_provider": "gemini",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2033,16 +2033,16 @@
"supports_vision": true, "supports_vision": true,
"supports_tool_choice": true, "supports_tool_choice": true,
"supports_response_schema": true, "supports_response_schema": true,
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" "source": "https://ai.google.dev/pricing"
}, },
"gemini/gemini-1.5-pro-latest": { "gemini/gemini-1.5-pro-latest": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 1048576, "max_input_tokens": 1048576,
"max_output_tokens": 8192, "max_output_tokens": 8192,
"input_cost_per_token": 0.00000035, "input_cost_per_token": 0.0000035,
"input_cost_per_token_above_128k_tokens": 0.0000007, "input_cost_per_token_above_128k_tokens": 0.000007,
"output_cost_per_token": 0.00000105, "output_cost_per_token": 0.00000105,
"output_cost_per_token_above_128k_tokens": 0.0000021, "output_cost_per_token_above_128k_tokens": 0.000021,
"litellm_provider": "gemini", "litellm_provider": "gemini",
"mode": "chat", "mode": "chat",
"supports_system_messages": true, "supports_system_messages": true,
@ -2050,7 +2050,7 @@
"supports_vision": true, "supports_vision": true,
"supports_tool_choice": true, "supports_tool_choice": true,
"supports_response_schema": true, "supports_response_schema": true,
"source": "https://ai.google.dev/models/gemini" "source": "https://ai.google.dev/pricing"
}, },
"gemini/gemini-pro-vision": { "gemini/gemini-pro-vision": {
"max_tokens": 2048, "max_tokens": 2048,

32
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]] [[package]]
name = "aiohttp" name = "aiohttp"
@ -2115,6 +2115,32 @@ dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pyte
docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"] docs = ["sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
[[package]]
name = "pynacl"
version = "1.5.0"
description = "Python binding to the Networking and Cryptography (NaCl) library"
optional = true
python-versions = ">=3.6"
files = [
{file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"},
{file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"},
{file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"},
{file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"},
{file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"},
{file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"},
{file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"},
{file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"},
{file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"},
{file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"},
]
[package.dependencies]
cffi = ">=1.4.1"
[package.extras]
docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"]
tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"]
[[package]] [[package]]
name = "pytest" name = "pytest"
version = "7.4.4" version = "7.4.4"
@ -3381,10 +3407,10 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link
testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
[extras] [extras]
extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "resend"] extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "pynacl", "resend"]
proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "python-multipart", "pyyaml", "rq", "uvicorn"] proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "orjson", "python-multipart", "pyyaml", "rq", "uvicorn"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7" python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "925b604bed171282827c8b046191ad858ce37fa3b011a393345382f8ff86e68c" content-hash = "6025cae7749c94755d17362f77adf76f834863dba2126501cd3111d53a9c5779"

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "1.41.8" version = "1.41.11"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT" license = "MIT"
@ -46,6 +46,7 @@ azure-identity = {version = "^1.15.0", optional = true}
azure-keyvault-secrets = {version = "^4.8.0", optional = true} azure-keyvault-secrets = {version = "^4.8.0", optional = true}
google-cloud-kms = {version = "^2.21.3", optional = true} google-cloud-kms = {version = "^2.21.3", optional = true}
resend = {version = "^0.8.0", optional = true} resend = {version = "^0.8.0", optional = true}
pynacl = {version = "^1.5.0", optional = true}
[tool.poetry.extras] [tool.poetry.extras]
proxy = [ proxy = [
@ -90,7 +91,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api" build-backend = "poetry.core.masonry.api"
[tool.commitizen] [tool.commitizen]
version = "1.41.8" version = "1.41.11"
version_files = [ version_files = [
"pyproject.toml:^version" "pyproject.toml:^version"
] ]

View file

@ -42,7 +42,7 @@ tokenizers==0.14.0 # for calculating usage
click==8.1.7 # for proxy cli click==8.1.7 # for proxy cli
jinja2==3.1.4 # for prompt templates jinja2==3.1.4 # for prompt templates
certifi==2024.7.4 # [TODO] clean up certifi==2024.7.4 # [TODO] clean up
aiohttp==3.9.0 # for network calls aiohttp==3.9.4 # for network calls
aioboto3==12.3.0 # for async sagemaker calls aioboto3==12.3.0 # for async sagemaker calls
tenacity==8.2.3 # for retrying requests, when litellm.num_retries set tenacity==8.2.3 # for retrying requests, when litellm.num_retries set
pydantic==2.7.1 # proxy + openai req. pydantic==2.7.1 # proxy + openai req.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -1 +1 @@
<!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-6a03368053f9d26d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-83a8bdddfe32b5d9.js\",\"777\",\"static/chunks/777-f76791513e294b30.js\",\"931\",\"static/chunks/app/page-da7d95729f2529b5.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"0gt3_bF2KkdKeE61mic4M\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html> <!DOCTYPE html><html id="__next_error__"><head><meta charSet="utf-8"/><meta name="viewport" content="width=device-width, initial-scale=1"/><link rel="preload" as="script" fetchPriority="low" href="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin=""/><script src="/ui/_next/static/chunks/fd9d1056-f960ab1e6d32b002.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/69-04708d7d4a17c1ee.js" async="" crossorigin=""></script><script src="/ui/_next/static/chunks/main-app-9b4fb13a7db53edf.js" async="" crossorigin=""></script><title>LiteLLM Dashboard</title><meta name="description" content="LiteLLM Proxy Admin UI"/><link rel="icon" href="/ui/favicon.ico" type="image/x-icon" sizes="16x16"/><meta name="next-size-adjust"/><script src="/ui/_next/static/chunks/polyfills-c67a75d1b6f99dc8.js" crossorigin="" noModule=""></script></head><body><script src="/ui/_next/static/chunks/webpack-a8fd417ac0c6c8a5.js" crossorigin="" async=""></script><script>(self.__next_f=self.__next_f||[]).push([0]);self.__next_f.push([2,null])</script><script>self.__next_f.push([1,"1:HL[\"/ui/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2\",\"font\",{\"crossOrigin\":\"\",\"type\":\"font/woff2\"}]\n2:HL[\"/ui/_next/static/css/0f6908625573deae.css\",\"style\",{\"crossOrigin\":\"\"}]\n0:\"$L3\"\n"])</script><script>self.__next_f.push([1,"4:I[47690,[],\"\"]\n6:I[77831,[],\"\"]\n7:I[48951,[\"665\",\"static/chunks/3014691f-589a5f4865c3822f.js\",\"936\",\"static/chunks/2f6dbc85-052c4579f80d66ae.js\",\"294\",\"static/chunks/294-0e35509d5ca95267.js\",\"131\",\"static/chunks/131-19b05e5ce40fa85d.js\",\"684\",\"static/chunks/684-bb2d2f93d92acb0b.js\",\"759\",\"static/chunks/759-d7572f2a46f911d5.js\",\"777\",\"static/chunks/777-906d7dd6a5bf7be4.js\",\"931\",\"static/chunks/app/page-567f85145e7f0f35.js\"],\"\"]\n8:I[5613,[],\"\"]\n9:I[31778,[],\"\"]\nb:I[48955,[],\"\"]\nc:[]\n"])</script><script>self.__next_f.push([1,"3:[[[\"$\",\"link\",\"0\",{\"rel\":\"stylesheet\",\"href\":\"/ui/_next/static/css/0f6908625573deae.css\",\"precedence\":\"next\",\"crossOrigin\":\"\"}]],[\"$\",\"$L4\",null,{\"buildId\":\"RDLpeUaSstfmeQiKITNBo\",\"assetPrefix\":\"/ui\",\"initialCanonicalUrl\":\"/\",\"initialTree\":[\"\",{\"children\":[\"__PAGE__\",{}]},\"$undefined\",\"$undefined\",true],\"initialSeedData\":[\"\",{\"children\":[\"__PAGE__\",{},[\"$L5\",[\"$\",\"$L6\",null,{\"propsForComponent\":{\"params\":{}},\"Component\":\"$7\",\"isStaticGeneration\":true}],null]]},[null,[\"$\",\"html\",null,{\"lang\":\"en\",\"children\":[\"$\",\"body\",null,{\"className\":\"__className_12bbc4\",\"children\":[\"$\",\"$L8\",null,{\"parallelRouterKey\":\"children\",\"segmentPath\":[\"children\"],\"loading\":\"$undefined\",\"loadingStyles\":\"$undefined\",\"loadingScripts\":\"$undefined\",\"hasLoading\":false,\"error\":\"$undefined\",\"errorStyles\":\"$undefined\",\"errorScripts\":\"$undefined\",\"template\":[\"$\",\"$L9\",null,{}],\"templateStyles\":\"$undefined\",\"templateScripts\":\"$undefined\",\"notFound\":[[\"$\",\"title\",null,{\"children\":\"404: This page could not be found.\"}],[\"$\",\"div\",null,{\"style\":{\"fontFamily\":\"system-ui,\\\"Segoe UI\\\",Roboto,Helvetica,Arial,sans-serif,\\\"Apple Color Emoji\\\",\\\"Segoe UI Emoji\\\"\",\"height\":\"100vh\",\"textAlign\":\"center\",\"display\":\"flex\",\"flexDirection\":\"column\",\"alignItems\":\"center\",\"justifyContent\":\"center\"},\"children\":[\"$\",\"div\",null,{\"children\":[[\"$\",\"style\",null,{\"dangerouslySetInnerHTML\":{\"__html\":\"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}\"}}],[\"$\",\"h1\",null,{\"className\":\"next-error-h1\",\"style\":{\"display\":\"inline-block\",\"margin\":\"0 20px 0 0\",\"padding\":\"0 23px 0 0\",\"fontSize\":24,\"fontWeight\":500,\"verticalAlign\":\"top\",\"lineHeight\":\"49px\"},\"children\":\"404\"}],[\"$\",\"div\",null,{\"style\":{\"display\":\"inline-block\"},\"children\":[\"$\",\"h2\",null,{\"style\":{\"fontSize\":14,\"fontWeight\":400,\"lineHeight\":\"49px\",\"margin\":0},\"children\":\"This page could not be found.\"}]}]]}]}]],\"notFoundStyles\":[],\"styles\":null}]}]}],null]],\"initialHead\":[false,\"$La\"],\"globalErrorComponent\":\"$b\",\"missingSlots\":\"$Wc\"}]]\n"])</script><script>self.__next_f.push([1,"a:[[\"$\",\"meta\",\"0\",{\"name\":\"viewport\",\"content\":\"width=device-width, initial-scale=1\"}],[\"$\",\"meta\",\"1\",{\"charSet\":\"utf-8\"}],[\"$\",\"title\",\"2\",{\"children\":\"LiteLLM Dashboard\"}],[\"$\",\"meta\",\"3\",{\"name\":\"description\",\"content\":\"LiteLLM Proxy Admin UI\"}],[\"$\",\"link\",\"4\",{\"rel\":\"icon\",\"href\":\"/ui/favicon.ico\",\"type\":\"image/x-icon\",\"sizes\":\"16x16\"}],[\"$\",\"meta\",\"5\",{\"name\":\"next-size-adjust\"}]]\n5:null\n"])</script><script>self.__next_f.push([1,""])</script></body></html>

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-83a8bdddfe32b5d9.js","777","static/chunks/777-f76791513e294b30.js","931","static/chunks/app/page-da7d95729f2529b5.js"],""] 3:I[48951,["665","static/chunks/3014691f-589a5f4865c3822f.js","936","static/chunks/2f6dbc85-052c4579f80d66ae.js","294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","684","static/chunks/684-bb2d2f93d92acb0b.js","759","static/chunks/759-d7572f2a46f911d5.js","777","static/chunks/777-906d7dd6a5bf7be4.js","931","static/chunks/app/page-567f85145e7f0f35.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["__PAGE__",{}]},"$undefined","$undefined",true],["",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-6a03368053f9d26d.js","777","static/chunks/777-f76791513e294b30.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""] 3:I[87494,["294","static/chunks/294-0e35509d5ca95267.js","131","static/chunks/131-19b05e5ce40fa85d.js","777","static/chunks/777-906d7dd6a5bf7be4.js","418","static/chunks/app/model_hub/page-ba7819b59161aa64.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["model_hub",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["model_hub",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","model_hub","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,7 @@
2:I[77831,[],""] 2:I[77831,[],""]
3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-f76791513e294b30.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""] 3:I[667,["665","static/chunks/3014691f-589a5f4865c3822f.js","294","static/chunks/294-0e35509d5ca95267.js","684","static/chunks/684-bb2d2f93d92acb0b.js","777","static/chunks/777-906d7dd6a5bf7be4.js","461","static/chunks/app/onboarding/page-1ed08595d570934e.js"],""]
4:I[5613,[],""] 4:I[5613,[],""]
5:I[31778,[],""] 5:I[31778,[],""]
0:["0gt3_bF2KkdKeE61mic4M",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]] 0:["RDLpeUaSstfmeQiKITNBo",[[["",{"children":["onboarding",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],["",{"children":["onboarding",{"children":["__PAGE__",{},["$L1",["$","$L2",null,{"propsForComponent":{"params":{}},"Component":"$3","isStaticGeneration":true}],null]]},["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children","onboarding","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","styles":null}]]},[null,["$","html",null,{"lang":"en","children":["$","body",null,{"className":"__className_12bbc4","children":["$","$L4",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$L5",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"styles":null}]}]}],null]],[[["$","link","0",{"rel":"stylesheet","href":"/ui/_next/static/css/0f6908625573deae.css","precedence":"next","crossOrigin":""}]],"$L6"]]]]
6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]] 6:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"LiteLLM Dashboard"}],["$","meta","3",{"name":"description","content":"LiteLLM Proxy Admin UI"}],["$","link","4",{"rel":"icon","href":"/ui/favicon.ico","type":"image/x-icon","sizes":"16x16"}],["$","meta","5",{"name":"next-size-adjust"}]]
1:null 1:null

View file

@ -743,7 +743,7 @@ const ModelDashboard: React.FC<ModelDashboardProps> = ({
} }
const fetchModelMap = async () => { const fetchModelMap = async () => {
const data = await modelCostMap(); const data = await modelCostMap(accessToken);
console.log(`received model cost map data: ${Object.keys(data)}`); console.log(`received model cost map data: ${Object.keys(data)}`);
setModelMap(data); setModelMap(data);
}; };

View file

@ -12,11 +12,19 @@ export interface Model {
model_info: Object | null; model_info: Object | null;
} }
export const modelCostMap = async () => { export const modelCostMap = async (
accessToken: string,
) => {
try { try {
const url = proxyBaseUrl ? `${proxyBaseUrl}/get/litellm_model_cost_map` : `/get/litellm_model_cost_map`; const url = proxyBaseUrl ? `${proxyBaseUrl}/get/litellm_model_cost_map` : `/get/litellm_model_cost_map`;
const response = await fetch( const response = await fetch(
url url, {
method: "GET",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
},
}
); );
const jsonData = await response.json(); const jsonData = await response.json();
console.log(`received litellm model cost data: ${jsonData}`); console.log(`received litellm model cost data: ${jsonData}`);
@ -693,6 +701,9 @@ export const claimOnboardingToken = async (
throw error; throw error;
} }
}; };
let ModelListerrorShown = false;
let errorTimer: NodeJS.Timeout | null = null;
export const modelInfoCall = async ( export const modelInfoCall = async (
accessToken: String, accessToken: String,
userID: String, userID: String,
@ -714,8 +725,21 @@ export const modelInfoCall = async (
}); });
if (!response.ok) { if (!response.ok) {
const errorData = await response.text(); let errorData = await response.text();
message.error(errorData, 10); errorData += `error shown=${ModelListerrorShown}`
if (!ModelListerrorShown) {
if (errorData.includes("No model list passed")) {
errorData = "No Models Exist. Click Add Model to get started.";
}
message.info(errorData, 10);
ModelListerrorShown = true;
if (errorTimer) clearTimeout(errorTimer);
errorTimer = setTimeout(() => {
ModelListerrorShown = false;
}, 10000);
}
throw new Error("Network response was not ok"); throw new Error("Network response was not ok");
} }
@ -750,7 +774,6 @@ export const modelHubCall = async (accessToken: String) => {
if (!response.ok) { if (!response.ok) {
const errorData = await response.text(); const errorData = await response.text();
message.error(errorData, 10);
throw new Error("Network response was not ok"); throw new Error("Network response was not ok");
} }

View file

@ -32,7 +32,6 @@ import {
allTagNamesCall, allTagNamesCall,
modelMetricsCall, modelMetricsCall,
modelAvailableCall, modelAvailableCall,
modelInfoCall,
adminspendByProvider, adminspendByProvider,
adminGlobalActivity, adminGlobalActivity,
adminGlobalActivityPerModel, adminGlobalActivityPerModel,