diff --git a/docs/my-website/docs/proxy/ui.md b/docs/my-website/docs/proxy/ui.md
index 0a19c427c2..6538793c1f 100644
--- a/docs/my-website/docs/proxy/ui.md
+++ b/docs/my-website/docs/proxy/ui.md
@@ -31,6 +31,18 @@ general_settings:
## 2. Setup SSO/Auth for UI
+
+
+Set the following in your .env on the Proxy
+
+```shell
+UI_USERNAME=ishaan-litellm
+UI_PASSWORD=langchain
+```
+
+On accessing the LiteLLM UI, you will be prompted to enter your username, password
+
+
@@ -73,6 +85,7 @@ MICROSOFT_TENANT="5a39737
```
+
## 4. Use UI
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 6109b824d7..a2d9117994 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -76,6 +76,7 @@ from litellm.proxy.utils import (
get_logging_payload,
reset_budget,
hash_token,
+ html_form,
)
from litellm.proxy.secret_managers.google_kms import load_google_kms
import pydantic
@@ -94,6 +95,7 @@ from fastapi import (
BackgroundTasks,
Header,
Response,
+ Form,
)
from fastapi.routing import APIRouter
from fastapi.security import OAuth2PasswordBearer
@@ -1268,7 +1270,7 @@ async def generate_key_helper_fn(
key_alias: Optional[str] = None,
allowed_cache_controls: Optional[list] = [],
):
- global prisma_client, custom_db_client
+ global prisma_client, custom_db_client, user_api_key_cache
if prisma_client is None and custom_db_client is None:
raise Exception(
@@ -1361,6 +1363,18 @@ async def generate_key_helper_fn(
}
if general_settings.get("allow_user_auth", False) == True:
key_data["key_name"] = f"sk-...{token[-4:]}"
+ saved_token = copy.deepcopy(key_data)
+ if isinstance(saved_token["aliases"], str):
+ saved_token["aliases"] = json.loads(saved_token["aliases"])
+ if isinstance(saved_token["config"], str):
+ saved_token["config"] = json.loads(saved_token["config"])
+ if isinstance(saved_token["metadata"], str):
+ saved_token["metadata"] = json.loads(saved_token["metadata"])
+ user_api_key_cache.set_cache(
+ key=key_data["token"],
+ value=LiteLLM_VerificationToken(**saved_token), # type: ignore
+ ttl=60,
+ )
if prisma_client is not None:
## CREATE USER (If necessary)
verbose_proxy_logger.debug(f"prisma_client: Creating User={user_data}")
@@ -1675,14 +1689,16 @@ async def startup_event():
if prisma_client is not None and master_key is not None:
# add master key to db
- await generate_key_helper_fn(
- duration=None,
- models=[],
- aliases={},
- config={},
- spend=0,
- token=master_key,
- user_id="default_user_id",
+ asyncio.create_task(
+ generate_key_helper_fn(
+ duration=None,
+ models=[],
+ aliases={},
+ config={},
+ spend=0,
+ token=master_key,
+ user_id="default_user_id",
+ )
)
if prisma_client is not None and litellm.max_budget > 0:
@@ -1692,20 +1708,22 @@ async def startup_event():
)
# add proxy budget to db in the user table
- await generate_key_helper_fn(
- user_id=litellm_proxy_budget_name,
- duration=None,
- models=[],
- aliases={},
- config={},
- spend=0,
- max_budget=litellm.max_budget,
- budget_duration=litellm.budget_duration,
- query_type="update_data",
- update_key_values={
- "max_budget": litellm.max_budget,
- "budget_duration": litellm.budget_duration,
- },
+ asyncio.create_task(
+ generate_key_helper_fn(
+ user_id=litellm_proxy_budget_name,
+ duration=None,
+ models=[],
+ aliases={},
+ config={},
+ spend=0,
+ max_budget=litellm.max_budget,
+ budget_duration=litellm.budget_duration,
+ query_type="update_data",
+ update_key_values={
+ "max_budget": litellm.max_budget,
+ "budget_duration": litellm.budget_duration,
+ },
+ )
)
verbose_proxy_logger.debug(
@@ -2962,6 +2980,60 @@ async def google_login(request: Request):
)
with microsoft_sso:
return await microsoft_sso.get_login_redirect()
+ else:
+ # No Google, Microsoft SSO
+ # Use UI Credentials set in .env
+ from fastapi.responses import HTMLResponse
+
+ return HTMLResponse(content=html_form, status_code=200)
+
+
+@router.post(
+ "/login", include_in_schema=False
+) # hidden since this is a helper for UI sso login
+async def login(request: Request):
+ try:
+ import multipart
+ except ImportError:
+ subprocess.run(["pip", "install", "python-multipart"])
+
+ form = await request.form()
+ username = str(form.get("username"))
+ password = form.get("password")
+ ui_username = os.getenv("UI_USERNAME")
+ ui_password = os.getenv("UI_PASSWORD")
+
+ if username == ui_username and password == ui_password:
+ user_id = username
+ response = await generate_key_helper_fn(
+ **{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard"} # type: ignore
+ )
+
+ key = response["token"] # type: ignore
+ user_id = response["user_id"] # type: ignore
+ litellm_dashboard_ui = "https://litellm-dashboard.vercel.app/"
+
+ # if user set LITELLM_UI_LINK in .env, use that
+ litellm_ui_link_in_env = os.getenv("LITELLM_UI_LINK", None)
+ if litellm_ui_link_in_env is not None:
+ litellm_dashboard_ui = litellm_ui_link_in_env
+
+ litellm_dashboard_ui += (
+ "?userID="
+ + user_id
+ + "&accessToken="
+ + key
+ + "&proxyBaseUrl="
+ + os.getenv("PROXY_BASE_URL")
+ )
+ return RedirectResponse(url=litellm_dashboard_ui)
+ else:
+ raise ProxyException(
+ message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nCheck 'UI_USERNAME', 'UI_PASSWORD' in .env file",
+ type="auth_error",
+ param="invalid_credentials",
+ code=status.HTTP_401_UNAUTHORIZED,
+ )
@app.get("/sso/callback", tags=["experimental"])
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 9983150d97..824fa4378c 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -1211,3 +1211,67 @@ async def reset_budget(prisma_client: PrismaClient):
await prisma_client.update_data(
query_type="update_many", data_list=users_to_reset, table_name="user"
)
+
+
+# LiteLLM Admin UI - Non SSO Login
+html_form = """
+
+
+
+ LiteLLM Login
+
+
+
+
+
+
+"""
diff --git a/litellm/router.py b/litellm/router.py
index 0d6d108e4d..bf5781c563 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -289,11 +289,7 @@ class Router:
timeout = kwargs.get("request_timeout", self.timeout)
kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
kwargs.setdefault("metadata", {}).update({"model_group": model})
- with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
- # Submit the function to the executor with a timeout
- future = executor.submit(self.function_with_fallbacks, **kwargs)
- response = future.result(timeout=timeout) # type: ignore
-
+ response = self.function_with_fallbacks(**kwargs)
return response
except Exception as e:
raise e
diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py
new file mode 100644
index 0000000000..1f93a60a68
--- /dev/null
+++ b/litellm/tests/test_router_timeout.py
@@ -0,0 +1,87 @@
+#### What this tests ####
+# This tests if the router timeout error handling during fallbacks
+
+import sys, os, time
+import traceback, asyncio
+import pytest
+
+sys.path.insert(
+ 0, os.path.abspath("../..")
+) # Adds the parent directory to the system path
+
+
+import os
+
+import litellm
+from litellm import Router
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def test_router_timeouts():
+ # Model list for OpenAI and Anthropic models
+ model_list = [
+ {
+ "model_name": "openai-gpt-4",
+ "litellm_params": {
+ "model": "azure/chatgpt-v-2",
+ "api_key": "os.environ/AZURE_API_KEY",
+ "api_base": "os.environ/AZURE_API_BASE",
+ "api_version": "os.environ/AZURE_API_VERSION",
+ },
+ "tpm": 80000,
+ },
+ {
+ "model_name": "anthropic-claude-instant-1.2",
+ "litellm_params": {
+ "model": "claude-instant-1",
+ "api_key": "os.environ/ANTHROPIC_API_KEY",
+ },
+ "tpm": 20000,
+ },
+ ]
+
+ fallbacks_list = [
+ {"openai-gpt-4": ["anthropic-claude-instant-1.2"]},
+ ]
+
+ # Configure router
+ router = Router(
+ model_list=model_list,
+ fallbacks=fallbacks_list,
+ routing_strategy="usage-based-routing",
+ debug_level="INFO",
+ set_verbose=True,
+ redis_host=os.getenv("REDIS_HOST"),
+ redis_password=os.getenv("REDIS_PASSWORD"),
+ redis_port=int(os.getenv("REDIS_PORT")),
+ timeout=10,
+ )
+
+ print("***** TPM SETTINGS *****")
+ for model_object in model_list:
+ print(f"{model_object['model_name']}: {model_object['tpm']} TPM")
+
+ # Sample list of questions
+ questions_list = [
+ {"content": "Tell me a very long joke.", "modality": "voice"},
+ ]
+
+ total_tokens_used = 0
+
+ # Process each question
+ for question in questions_list:
+ messages = [{"content": question["content"], "role": "user"}]
+
+ prompt_tokens = litellm.token_counter(text=question["content"], model="gpt-4")
+ print("prompt_tokens = ", prompt_tokens)
+
+ response = router.completion(
+ model="openai-gpt-4", messages=messages, timeout=5, num_retries=0
+ )
+
+ total_tokens_used += response.usage.total_tokens
+
+ print("Response:", response)
+ print("********** TOKENS USED SO FAR = ", total_tokens_used)
diff --git a/litellm/utils.py b/litellm/utils.py
index d91d262c5b..ed7d52125f 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7490,7 +7490,10 @@ class CustomStreamWrapper:
logprobs = None
original_chunk = None # this is used for function/tool calling
if len(str_line.choices) > 0:
- if str_line.choices[0].delta.content is not None:
+ if (
+ str_line.choices[0].delta is not None
+ and str_line.choices[0].delta.content is not None
+ ):
text = str_line.choices[0].delta.content
else: # function/tool calling chunk - when content is None. in this case we just return the original chunk from openai
original_chunk = str_line
diff --git a/pyproject.toml b/pyproject.toml
index 48b641bc1b..2855deeda8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "litellm"
-version = "1.20.6"
+version = "1.20.7"
description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"]
license = "MIT"
@@ -63,7 +63,7 @@ requires = ["poetry-core", "wheel"]
build-backend = "poetry.core.masonry.api"
[tool.commitizen]
-version = "1.20.6"
+version = "1.20.7"
version_files = [
"pyproject.toml:^version"
]