forked from phoenix/litellm-mirror
Merge branch 'main' into litellm_no_store_cache_control
This commit is contained in:
commit
ce415a243d
7 changed files with 266 additions and 31 deletions
|
@ -31,6 +31,18 @@ general_settings:
|
|||
## 2. Setup SSO/Auth for UI
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="username" label="Quick Start - Username, Password">
|
||||
|
||||
Set the following in your .env on the Proxy
|
||||
|
||||
```shell
|
||||
UI_USERNAME=ishaan-litellm
|
||||
UI_PASSWORD=langchain
|
||||
```
|
||||
|
||||
On accessing the LiteLLM UI, you will be prompted to enter your username, password
|
||||
|
||||
</TabItem>
|
||||
|
||||
<TabItem value="google" label="Google SSO">
|
||||
|
||||
|
@ -73,6 +85,7 @@ MICROSOFT_TENANT="5a39737
|
|||
```
|
||||
|
||||
</TabItem>
|
||||
|
||||
</Tabs>
|
||||
|
||||
## 4. Use UI
|
||||
|
|
|
@ -76,6 +76,7 @@ from litellm.proxy.utils import (
|
|||
get_logging_payload,
|
||||
reset_budget,
|
||||
hash_token,
|
||||
html_form,
|
||||
)
|
||||
from litellm.proxy.secret_managers.google_kms import load_google_kms
|
||||
import pydantic
|
||||
|
@ -94,6 +95,7 @@ from fastapi import (
|
|||
BackgroundTasks,
|
||||
Header,
|
||||
Response,
|
||||
Form,
|
||||
)
|
||||
from fastapi.routing import APIRouter
|
||||
from fastapi.security import OAuth2PasswordBearer
|
||||
|
@ -1268,7 +1270,7 @@ async def generate_key_helper_fn(
|
|||
key_alias: Optional[str] = None,
|
||||
allowed_cache_controls: Optional[list] = [],
|
||||
):
|
||||
global prisma_client, custom_db_client
|
||||
global prisma_client, custom_db_client, user_api_key_cache
|
||||
|
||||
if prisma_client is None and custom_db_client is None:
|
||||
raise Exception(
|
||||
|
@ -1361,6 +1363,18 @@ async def generate_key_helper_fn(
|
|||
}
|
||||
if general_settings.get("allow_user_auth", False) == True:
|
||||
key_data["key_name"] = f"sk-...{token[-4:]}"
|
||||
saved_token = copy.deepcopy(key_data)
|
||||
if isinstance(saved_token["aliases"], str):
|
||||
saved_token["aliases"] = json.loads(saved_token["aliases"])
|
||||
if isinstance(saved_token["config"], str):
|
||||
saved_token["config"] = json.loads(saved_token["config"])
|
||||
if isinstance(saved_token["metadata"], str):
|
||||
saved_token["metadata"] = json.loads(saved_token["metadata"])
|
||||
user_api_key_cache.set_cache(
|
||||
key=key_data["token"],
|
||||
value=LiteLLM_VerificationToken(**saved_token), # type: ignore
|
||||
ttl=60,
|
||||
)
|
||||
if prisma_client is not None:
|
||||
## CREATE USER (If necessary)
|
||||
verbose_proxy_logger.debug(f"prisma_client: Creating User={user_data}")
|
||||
|
@ -1675,14 +1689,16 @@ async def startup_event():
|
|||
|
||||
if prisma_client is not None and master_key is not None:
|
||||
# add master key to db
|
||||
await generate_key_helper_fn(
|
||||
duration=None,
|
||||
models=[],
|
||||
aliases={},
|
||||
config={},
|
||||
spend=0,
|
||||
token=master_key,
|
||||
user_id="default_user_id",
|
||||
asyncio.create_task(
|
||||
generate_key_helper_fn(
|
||||
duration=None,
|
||||
models=[],
|
||||
aliases={},
|
||||
config={},
|
||||
spend=0,
|
||||
token=master_key,
|
||||
user_id="default_user_id",
|
||||
)
|
||||
)
|
||||
|
||||
if prisma_client is not None and litellm.max_budget > 0:
|
||||
|
@ -1692,20 +1708,22 @@ async def startup_event():
|
|||
)
|
||||
|
||||
# add proxy budget to db in the user table
|
||||
await generate_key_helper_fn(
|
||||
user_id=litellm_proxy_budget_name,
|
||||
duration=None,
|
||||
models=[],
|
||||
aliases={},
|
||||
config={},
|
||||
spend=0,
|
||||
max_budget=litellm.max_budget,
|
||||
budget_duration=litellm.budget_duration,
|
||||
query_type="update_data",
|
||||
update_key_values={
|
||||
"max_budget": litellm.max_budget,
|
||||
"budget_duration": litellm.budget_duration,
|
||||
},
|
||||
asyncio.create_task(
|
||||
generate_key_helper_fn(
|
||||
user_id=litellm_proxy_budget_name,
|
||||
duration=None,
|
||||
models=[],
|
||||
aliases={},
|
||||
config={},
|
||||
spend=0,
|
||||
max_budget=litellm.max_budget,
|
||||
budget_duration=litellm.budget_duration,
|
||||
query_type="update_data",
|
||||
update_key_values={
|
||||
"max_budget": litellm.max_budget,
|
||||
"budget_duration": litellm.budget_duration,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
|
@ -2962,6 +2980,60 @@ async def google_login(request: Request):
|
|||
)
|
||||
with microsoft_sso:
|
||||
return await microsoft_sso.get_login_redirect()
|
||||
else:
|
||||
# No Google, Microsoft SSO
|
||||
# Use UI Credentials set in .env
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
return HTMLResponse(content=html_form, status_code=200)
|
||||
|
||||
|
||||
@router.post(
|
||||
"/login", include_in_schema=False
|
||||
) # hidden since this is a helper for UI sso login
|
||||
async def login(request: Request):
|
||||
try:
|
||||
import multipart
|
||||
except ImportError:
|
||||
subprocess.run(["pip", "install", "python-multipart"])
|
||||
|
||||
form = await request.form()
|
||||
username = str(form.get("username"))
|
||||
password = form.get("password")
|
||||
ui_username = os.getenv("UI_USERNAME")
|
||||
ui_password = os.getenv("UI_PASSWORD")
|
||||
|
||||
if username == ui_username and password == ui_password:
|
||||
user_id = username
|
||||
response = await generate_key_helper_fn(
|
||||
**{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard"} # type: ignore
|
||||
)
|
||||
|
||||
key = response["token"] # type: ignore
|
||||
user_id = response["user_id"] # type: ignore
|
||||
litellm_dashboard_ui = "https://litellm-dashboard.vercel.app/"
|
||||
|
||||
# if user set LITELLM_UI_LINK in .env, use that
|
||||
litellm_ui_link_in_env = os.getenv("LITELLM_UI_LINK", None)
|
||||
if litellm_ui_link_in_env is not None:
|
||||
litellm_dashboard_ui = litellm_ui_link_in_env
|
||||
|
||||
litellm_dashboard_ui += (
|
||||
"?userID="
|
||||
+ user_id
|
||||
+ "&accessToken="
|
||||
+ key
|
||||
+ "&proxyBaseUrl="
|
||||
+ os.getenv("PROXY_BASE_URL")
|
||||
)
|
||||
return RedirectResponse(url=litellm_dashboard_ui)
|
||||
else:
|
||||
raise ProxyException(
|
||||
message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nCheck 'UI_USERNAME', 'UI_PASSWORD' in .env file",
|
||||
type="auth_error",
|
||||
param="invalid_credentials",
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
)
|
||||
|
||||
|
||||
@app.get("/sso/callback", tags=["experimental"])
|
||||
|
|
|
@ -1211,3 +1211,67 @@ async def reset_budget(prisma_client: PrismaClient):
|
|||
await prisma_client.update_data(
|
||||
query_type="update_many", data_list=users_to_reset, table_name="user"
|
||||
)
|
||||
|
||||
|
||||
# LiteLLM Admin UI - Non SSO Login
|
||||
html_form = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>LiteLLM Login</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
background-color: #f4f4f4;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
height: 100vh;
|
||||
}
|
||||
|
||||
form {
|
||||
background-color: #fff;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
label {
|
||||
display: block;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
input {
|
||||
width: 100%;
|
||||
padding: 8px;
|
||||
margin-bottom: 16px;
|
||||
box-sizing: border-box;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
input[type="submit"] {
|
||||
background-color: #4caf50;
|
||||
color: #fff;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
input[type="submit"]:hover {
|
||||
background-color: #45a049;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<form action="/login" method="post">
|
||||
<h2>LiteLLM Login</h2>
|
||||
<label for="username">Username:</label>
|
||||
<input type="text" id="username" name="username" required>
|
||||
<label for="password">Password:</label>
|
||||
<input type="password" id="password" name="password" required>
|
||||
<input type="submit" value="Submit">
|
||||
</form>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
|
|
@ -289,11 +289,7 @@ class Router:
|
|||
timeout = kwargs.get("request_timeout", self.timeout)
|
||||
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
|
||||
kwargs.setdefault("metadata", {}).update({"model_group": model})
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
# Submit the function to the executor with a timeout
|
||||
future = executor.submit(self.function_with_fallbacks, **kwargs)
|
||||
response = future.result(timeout=timeout) # type: ignore
|
||||
|
||||
response = self.function_with_fallbacks(**kwargs)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
|
87
litellm/tests/test_router_timeout.py
Normal file
87
litellm/tests/test_router_timeout.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
#### What this tests ####
|
||||
# This tests if the router timeout error handling during fallbacks
|
||||
|
||||
import sys, os, time
|
||||
import traceback, asyncio
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
import os
|
||||
|
||||
import litellm
|
||||
from litellm import Router
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
def test_router_timeouts():
|
||||
# Model list for OpenAI and Anthropic models
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "openai-gpt-4",
|
||||
"litellm_params": {
|
||||
"model": "azure/chatgpt-v-2",
|
||||
"api_key": "os.environ/AZURE_API_KEY",
|
||||
"api_base": "os.environ/AZURE_API_BASE",
|
||||
"api_version": "os.environ/AZURE_API_VERSION",
|
||||
},
|
||||
"tpm": 80000,
|
||||
},
|
||||
{
|
||||
"model_name": "anthropic-claude-instant-1.2",
|
||||
"litellm_params": {
|
||||
"model": "claude-instant-1",
|
||||
"api_key": "os.environ/ANTHROPIC_API_KEY",
|
||||
},
|
||||
"tpm": 20000,
|
||||
},
|
||||
]
|
||||
|
||||
fallbacks_list = [
|
||||
{"openai-gpt-4": ["anthropic-claude-instant-1.2"]},
|
||||
]
|
||||
|
||||
# Configure router
|
||||
router = Router(
|
||||
model_list=model_list,
|
||||
fallbacks=fallbacks_list,
|
||||
routing_strategy="usage-based-routing",
|
||||
debug_level="INFO",
|
||||
set_verbose=True,
|
||||
redis_host=os.getenv("REDIS_HOST"),
|
||||
redis_password=os.getenv("REDIS_PASSWORD"),
|
||||
redis_port=int(os.getenv("REDIS_PORT")),
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
print("***** TPM SETTINGS *****")
|
||||
for model_object in model_list:
|
||||
print(f"{model_object['model_name']}: {model_object['tpm']} TPM")
|
||||
|
||||
# Sample list of questions
|
||||
questions_list = [
|
||||
{"content": "Tell me a very long joke.", "modality": "voice"},
|
||||
]
|
||||
|
||||
total_tokens_used = 0
|
||||
|
||||
# Process each question
|
||||
for question in questions_list:
|
||||
messages = [{"content": question["content"], "role": "user"}]
|
||||
|
||||
prompt_tokens = litellm.token_counter(text=question["content"], model="gpt-4")
|
||||
print("prompt_tokens = ", prompt_tokens)
|
||||
|
||||
response = router.completion(
|
||||
model="openai-gpt-4", messages=messages, timeout=5, num_retries=0
|
||||
)
|
||||
|
||||
total_tokens_used += response.usage.total_tokens
|
||||
|
||||
print("Response:", response)
|
||||
print("********** TOKENS USED SO FAR = ", total_tokens_used)
|
|
@ -7490,7 +7490,10 @@ class CustomStreamWrapper:
|
|||
logprobs = None
|
||||
original_chunk = None # this is used for function/tool calling
|
||||
if len(str_line.choices) > 0:
|
||||
if str_line.choices[0].delta.content is not None:
|
||||
if (
|
||||
str_line.choices[0].delta is not None
|
||||
and str_line.choices[0].delta.content is not None
|
||||
):
|
||||
text = str_line.choices[0].delta.content
|
||||
else: # function/tool calling chunk - when content is None. in this case we just return the original chunk from openai
|
||||
original_chunk = str_line
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "1.20.6"
|
||||
version = "1.20.7"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT"
|
||||
|
@ -63,7 +63,7 @@ requires = ["poetry-core", "wheel"]
|
|||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.commitizen]
|
||||
version = "1.20.6"
|
||||
version = "1.20.7"
|
||||
version_files = [
|
||||
"pyproject.toml:^version"
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue