Merge branch 'main' into litellm_no_store_cache_control

2024-01-30 21:44:57 -08:00 · 2024-01-30 21:44:57 -08:00 · ce415a243d
commit ce415a243d
parent f9acad87dc 67bb11836a
7 changed files with 266 additions and 31 deletions
--- a/docs/my-website/docs/proxy/ui.md
+++ b/docs/my-website/docs/proxy/ui.md
@ -31,6 +31,18 @@ general_settings:
 ## 2. Setup SSO/Auth for UI

 <Tabs>
+<TabItem value="username" label="Quick Start - Username, Password">
+
+Set the following in your .env on the Proxy
+
+```shell
+UI_USERNAME=ishaan-litellm
+UI_PASSWORD=langchain
+```
+
+On accessing the LiteLLM UI, you will be prompted to enter your username, password
+
+</TabItem>

 <TabItem value="google" label="Google SSO">

@ -73,6 +85,7 @@ MICROSOFT_TENANT="5a39737
    ```

 </TabItem>
+
 </Tabs>

 ## 4. Use UI
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -76,6 +76,7 @@ from litellm.proxy.utils import (
    get_logging_payload,
    reset_budget,
    hash_token,
+    html_form,
 )
 from litellm.proxy.secret_managers.google_kms import load_google_kms
 import pydantic
@ -94,6 +95,7 @@ from fastapi import (
    BackgroundTasks,
    Header,
    Response,
+    Form,
 )
 from fastapi.routing import APIRouter
 from fastapi.security import OAuth2PasswordBearer
@ -1268,7 +1270,7 @@ async def generate_key_helper_fn(
    key_alias: Optional[str] = None,
    allowed_cache_controls: Optional[list] = [],
 ):
-    global prisma_client, custom_db_client
+    global prisma_client, custom_db_client, user_api_key_cache

    if prisma_client is None and custom_db_client is None:
        raise Exception(
@ -1361,6 +1363,18 @@ async def generate_key_helper_fn(
        }
        if general_settings.get("allow_user_auth", False) == True:
            key_data["key_name"] = f"sk-...{token[-4:]}"
+        saved_token = copy.deepcopy(key_data)
+        if isinstance(saved_token["aliases"], str):
+            saved_token["aliases"] = json.loads(saved_token["aliases"])
+        if isinstance(saved_token["config"], str):
+            saved_token["config"] = json.loads(saved_token["config"])
+        if isinstance(saved_token["metadata"], str):
+            saved_token["metadata"] = json.loads(saved_token["metadata"])
+        user_api_key_cache.set_cache(
+            key=key_data["token"],
+            value=LiteLLM_VerificationToken(**saved_token),  # type: ignore
+            ttl=60,
+        )
        if prisma_client is not None:
            ## CREATE USER (If necessary)
            verbose_proxy_logger.debug(f"prisma_client: Creating User={user_data}")
@ -1675,14 +1689,16 @@ async def startup_event():

    if prisma_client is not None and master_key is not None:
        # add master key to db
-        await generate_key_helper_fn(
-            duration=None,
-            models=[],
-            aliases={},
-            config={},
-            spend=0,
-            token=master_key,
-            user_id="default_user_id",
+        asyncio.create_task(
+            generate_key_helper_fn(
+                duration=None,
+                models=[],
+                aliases={},
+                config={},
+                spend=0,
+                token=master_key,
+                user_id="default_user_id",
+            )
        )

    if prisma_client is not None and litellm.max_budget > 0:
@ -1692,20 +1708,22 @@ async def startup_event():
            )

        # add proxy budget to db in the user table
-        await generate_key_helper_fn(
-            user_id=litellm_proxy_budget_name,
-            duration=None,
-            models=[],
-            aliases={},
-            config={},
-            spend=0,
-            max_budget=litellm.max_budget,
-            budget_duration=litellm.budget_duration,
-            query_type="update_data",
-            update_key_values={
-                "max_budget": litellm.max_budget,
-                "budget_duration": litellm.budget_duration,
-            },
+        asyncio.create_task(
+            generate_key_helper_fn(
+                user_id=litellm_proxy_budget_name,
+                duration=None,
+                models=[],
+                aliases={},
+                config={},
+                spend=0,
+                max_budget=litellm.max_budget,
+                budget_duration=litellm.budget_duration,
+                query_type="update_data",
+                update_key_values={
+                    "max_budget": litellm.max_budget,
+                    "budget_duration": litellm.budget_duration,
+                },
+            )
        )

    verbose_proxy_logger.debug(
@ -2962,6 +2980,60 @@ async def google_login(request: Request):
        )
        with microsoft_sso:
            return await microsoft_sso.get_login_redirect()
+    else:
+        # No Google, Microsoft SSO
+        # Use UI Credentials set in .env
+        from fastapi.responses import HTMLResponse
+
+        return HTMLResponse(content=html_form, status_code=200)
+
+
+@router.post(
+    "/login", include_in_schema=False
+)  # hidden since this is a helper for UI sso login
+async def login(request: Request):
+    try:
+        import multipart
+    except ImportError:
+        subprocess.run(["pip", "install", "python-multipart"])
+
+    form = await request.form()
+    username = str(form.get("username"))
+    password = form.get("password")
+    ui_username = os.getenv("UI_USERNAME")
+    ui_password = os.getenv("UI_PASSWORD")
+
+    if username == ui_username and password == ui_password:
+        user_id = username
+        response = await generate_key_helper_fn(
+            **{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard"}  # type: ignore
+        )
+
+        key = response["token"]  # type: ignore
+        user_id = response["user_id"]  # type: ignore
+        litellm_dashboard_ui = "https://litellm-dashboard.vercel.app/"
+
+        # if user set LITELLM_UI_LINK in .env, use that
+        litellm_ui_link_in_env = os.getenv("LITELLM_UI_LINK", None)
+        if litellm_ui_link_in_env is not None:
+            litellm_dashboard_ui = litellm_ui_link_in_env
+
+        litellm_dashboard_ui += (
+            "?userID="
+            + user_id
+            + "&accessToken="
+            + key
+            + "&proxyBaseUrl="
+            + os.getenv("PROXY_BASE_URL")
+        )
+        return RedirectResponse(url=litellm_dashboard_ui)
+    else:
+        raise ProxyException(
+            message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nCheck 'UI_USERNAME', 'UI_PASSWORD' in .env file",
+            type="auth_error",
+            param="invalid_credentials",
+            code=status.HTTP_401_UNAUTHORIZED,
+        )


@app.get("/sso/callback", tags=["experimental"])
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -1211,3 +1211,67 @@ async def reset_budget(prisma_client: PrismaClient):
            await prisma_client.update_data(
                query_type="update_many", data_list=users_to_reset, table_name="user"
            )
+
+
+# LiteLLM Admin UI - Non SSO Login
+html_form = """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>LiteLLM Login</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f4f4f4;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            height: 100vh;
+        }
+
+        form {
+            background-color: #fff;
+            padding: 20px;
+            border-radius: 8px;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }
+
+        label {
+            display: block;
+            margin-bottom: 8px;
+        }
+
+        input {
+            width: 100%;
+            padding: 8px;
+            margin-bottom: 16px;
+            box-sizing: border-box;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+        }
+
+        input[type="submit"] {
+            background-color: #4caf50;
+            color: #fff;
+            cursor: pointer;
+        }
+
+        input[type="submit"]:hover {
+            background-color: #45a049;
+        }
+    </style>
+</head>
+<body>
+    <form action="/login" method="post">
+        <h2>LiteLLM Login</h2>
+        <label for="username">Username:</label>
+        <input type="text" id="username" name="username" required>
+        <label for="password">Password:</label>
+        <input type="password" id="password" name="password" required>
+        <input type="submit" value="Submit">
+    </form>
+</body>
+</html>
+"""
--- a/litellm/router.py
+++ b/litellm/router.py
@ -289,11 +289,7 @@ class Router:
            timeout = kwargs.get("request_timeout", self.timeout)
            kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
            kwargs.setdefault("metadata", {}).update({"model_group": model})
-            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
-                # Submit the function to the executor with a timeout
-                future = executor.submit(self.function_with_fallbacks, **kwargs)
-                response = future.result(timeout=timeout)  # type: ignore
-
+            response = self.function_with_fallbacks(**kwargs)
            return response
        except Exception as e:
            raise e
--- a/litellm/tests/test_router_timeout.py
+++ b/litellm/tests/test_router_timeout.py
@ -0,0 +1,87 @@
+#### What this tests ####
+# This tests if the router timeout error handling during fallbacks
+
+import sys, os, time
+import traceback, asyncio
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+import os
+
+import litellm
+from litellm import Router
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def test_router_timeouts():
+    # Model list for OpenAI and Anthropic models
+    model_list = [
+        {
+            "model_name": "openai-gpt-4",
+            "litellm_params": {
+                "model": "azure/chatgpt-v-2",
+                "api_key": "os.environ/AZURE_API_KEY",
+                "api_base": "os.environ/AZURE_API_BASE",
+                "api_version": "os.environ/AZURE_API_VERSION",
+            },
+            "tpm": 80000,
+        },
+        {
+            "model_name": "anthropic-claude-instant-1.2",
+            "litellm_params": {
+                "model": "claude-instant-1",
+                "api_key": "os.environ/ANTHROPIC_API_KEY",
+            },
+            "tpm": 20000,
+        },
+    ]
+
+    fallbacks_list = [
+        {"openai-gpt-4": ["anthropic-claude-instant-1.2"]},
+    ]
+
+    # Configure router
+    router = Router(
+        model_list=model_list,
+        fallbacks=fallbacks_list,
+        routing_strategy="usage-based-routing",
+        debug_level="INFO",
+        set_verbose=True,
+        redis_host=os.getenv("REDIS_HOST"),
+        redis_password=os.getenv("REDIS_PASSWORD"),
+        redis_port=int(os.getenv("REDIS_PORT")),
+        timeout=10,
+    )
+
+    print("***** TPM SETTINGS *****")
+    for model_object in model_list:
+        print(f"{model_object['model_name']}: {model_object['tpm']} TPM")
+
+    # Sample list of questions
+    questions_list = [
+        {"content": "Tell me a very long joke.", "modality": "voice"},
+    ]
+
+    total_tokens_used = 0
+
+    # Process each question
+    for question in questions_list:
+        messages = [{"content": question["content"], "role": "user"}]
+
+        prompt_tokens = litellm.token_counter(text=question["content"], model="gpt-4")
+        print("prompt_tokens = ", prompt_tokens)
+
+        response = router.completion(
+            model="openai-gpt-4", messages=messages, timeout=5, num_retries=0
+        )
+
+        total_tokens_used += response.usage.total_tokens
+
+        print("Response:", response)
+        print("********** TOKENS USED SO FAR = ", total_tokens_used)
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -7490,7 +7490,10 @@ class CustomStreamWrapper:
            logprobs = None
            original_chunk = None  # this is used for function/tool calling
            if len(str_line.choices) > 0:
-                if str_line.choices[0].delta.content is not None:
+                if (
+                    str_line.choices[0].delta is not None
+                    and str_line.choices[0].delta.content is not None
+                ):
                    text = str_line.choices[0].delta.content
                else:  # function/tool calling chunk - when content is None. in this case we just return the original chunk from openai
                    original_chunk = str_line
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.20.6"
+version = "1.20.7"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@ -63,7 +63,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"

 [tool.commitizen]
-version = "1.20.6"
+version = "1.20.7"
 version_files = [
    "pyproject.toml:^version"
 ]