From b01f31c41fbe19c16a74fde91bcd837e8f7005ca Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Jan 2024 15:34:38 -0800
Subject: [PATCH 01/12] fix(utils.py): check if delta is none

---
 litellm/utils.py | 26 ++------------------------
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index bbc4e651c..3aaf53514 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2929,32 +2929,10 @@ def cost_per_token(
                 model_with_provider_and_region in model_cost_ref
             ):  # use region based pricing, if it's available
                 model_with_provider = model_with_provider_and_region
+    if model_with_provider in model_cost_ref:
+        model = model_with_provider
     # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models
     print_verbose(f"Looking up model={model} in model_cost_map")
-    if model_with_provider in model_cost_ref:
-        print_verbose(
-            f"Success: model={model_with_provider} in model_cost_map - {model_cost_ref[model_with_provider]}"
-        )
-        print_verbose(
-            f"applying cost={model_cost_ref[model_with_provider].get('input_cost_per_token', None)} for prompt_tokens={prompt_tokens}"
-        )
-        prompt_tokens_cost_usd_dollar = (
-            model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
-        )
-        print_verbose(
-            f"calculated prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}"
-        )
-        print_verbose(
-            f"applying cost={model_cost_ref[model_with_provider].get('output_cost_per_token', None)} for completion_tokens={completion_tokens}"
-        )
-        completion_tokens_cost_usd_dollar = (
-            model_cost_ref[model_with_provider]["output_cost_per_token"]
-            * completion_tokens
-        )
-        print_verbose(
-            f"calculated completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
-        )
-        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     if model in model_cost_ref:
         print_verbose(f"Success: model={model} in model_cost_map")
         print_verbose(

From 60cc3d6b309c3059474c6bd1483a0399bc1422bf Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Jan 2024 15:35:23 -0800
Subject: [PATCH 02/12] fix(utils.py): fix streaming delta content being none
 edge-case

---
 litellm/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index 3aaf53514..8f00c115d 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7487,7 +7487,10 @@ class CustomStreamWrapper:
             logprobs = None
             original_chunk = None  # this is used for function/tool calling
             if len(str_line.choices) > 0:
-                if str_line.choices[0].delta.content is not None:
+                if (
+                    str_line.choices[0].delta is not None
+                    and str_line.choices[0].delta.content is not None
+                ):
                     text = str_line.choices[0].delta.content
                 else:  # function/tool calling chunk - when content is None. in this case we just return the original chunk from openai
                     original_chunk = str_line

From 2019347f0da335512ff999c6bf8027234ea7c1e4 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Jan 2024 16:17:23 -0800
Subject: [PATCH 03/12] fix(proxy_server.py): speed up proxy startup time

---
 litellm/proxy/proxy_server.py | 62 ++++++++++++++++++++++-------------
 1 file changed, 39 insertions(+), 23 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index f1ec2744c..0f55cbd59 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1267,7 +1267,7 @@ async def generate_key_helper_fn(
     update_key_values: Optional[dict] = None,
     key_alias: Optional[str] = None,
 ):
-    global prisma_client, custom_db_client
+    global prisma_client, custom_db_client, user_api_key_cache
 
     if prisma_client is None and custom_db_client is None:
         raise Exception(
@@ -1357,6 +1357,18 @@ async def generate_key_helper_fn(
         }
         if general_settings.get("allow_user_auth", False) == True:
             key_data["key_name"] = f"sk-...{token[-4:]}"
+        saved_token = copy.deepcopy(key_data)
+        if isinstance(saved_token["aliases"], str):
+            saved_token["aliases"] = json.loads(saved_token["aliases"])
+        if isinstance(saved_token["config"], str):
+            saved_token["config"] = json.loads(saved_token["config"])
+        if isinstance(saved_token["metadata"], str):
+            saved_token["metadata"] = json.loads(saved_token["metadata"])
+        user_api_key_cache.set_cache(
+            key=key_data["token"],
+            value=LiteLLM_VerificationToken(**saved_token),  # type: ignore
+            ttl=60,
+        )
         if prisma_client is not None:
             ## CREATE USER (If necessary)
             verbose_proxy_logger.debug(f"prisma_client: Creating User={user_data}")
@@ -1671,14 +1683,16 @@ async def startup_event():
 
     if prisma_client is not None and master_key is not None:
         # add master key to db
-        await generate_key_helper_fn(
-            duration=None,
-            models=[],
-            aliases={},
-            config={},
-            spend=0,
-            token=master_key,
-            user_id="default_user_id",
+        asyncio.create_task(
+            generate_key_helper_fn(
+                duration=None,
+                models=[],
+                aliases={},
+                config={},
+                spend=0,
+                token=master_key,
+                user_id="default_user_id",
+            )
         )
 
     if prisma_client is not None and litellm.max_budget > 0:
@@ -1688,20 +1702,22 @@ async def startup_event():
             )
 
         # add proxy budget to db in the user table
-        await generate_key_helper_fn(
-            user_id=litellm_proxy_budget_name,
-            duration=None,
-            models=[],
-            aliases={},
-            config={},
-            spend=0,
-            max_budget=litellm.max_budget,
-            budget_duration=litellm.budget_duration,
-            query_type="update_data",
-            update_key_values={
-                "max_budget": litellm.max_budget,
-                "budget_duration": litellm.budget_duration,
-            },
+        asyncio.create_task(
+            generate_key_helper_fn(
+                user_id=litellm_proxy_budget_name,
+                duration=None,
+                models=[],
+                aliases={},
+                config={},
+                spend=0,
+                max_budget=litellm.max_budget,
+                budget_duration=litellm.budget_duration,
+                query_type="update_data",
+                update_key_values={
+                    "max_budget": litellm.max_budget,
+                    "budget_duration": litellm.budget_duration,
+                },
+            )
         )
 
     verbose_proxy_logger.debug(

From f966cce26ab660293c673293561d91e40f1c04fc Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 30 Jan 2024 16:47:12 -0800
Subject: [PATCH 04/12] (feat) add litellm login to proxy

---
 litellm/proxy/proxy_server.py | 78 +++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index f1ec2744c..3ed91684a 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -3103,6 +3103,84 @@ async def user_info(
         )
 
 
+html_form = """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>LiteLLM Login</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f4f4f4;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            height: 100vh;
+        }
+
+        form {
+            background-color: #fff;
+            padding: 20px;
+            border-radius: 8px;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }
+
+        label {
+            display: block;
+            margin-bottom: 8px;
+        }
+
+        input {
+            width: 100%;
+            padding: 8px;
+            margin-bottom: 16px;
+            box-sizing: border-box;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+        }
+
+        input[type="submit"] {
+            background-color: #4caf50;
+            color: #fff;
+            cursor: pointer;
+        }
+
+        input[type="submit"]:hover {
+            background-color: #45a049;
+        }
+    </style>
+</head>
+<body>
+    <form action="/login" method="post">
+        <h2>LiteLLM Login</h2>
+        <label for="username">Username:</label>
+        <input type="text" id="username" name="username" required>
+        <label for="password">Password:</label>
+        <input type="password" id="password" name="password" required>
+        <input type="submit" value="Submit">
+    </form>
+</body>
+</html>
+"""
+from fastapi import FastAPI, Form
+from fastapi.responses import HTMLResponse
+
+
+@router.get("/login/page")
+async def login_page():
+    return HTMLResponse(content=html_form, status_code=200)
+
+
+@router.get("/login")
+async def login(username: str = Form(...), password: str = Form(...)):
+    # Here you can perform authentication logic
+    # For simplicity, let's just print the received credentials
+    # print(f"Received username: {username}, password: {password}")
+    return {"message": "Login successful"}
+
+
 @router.post(
     "/user/update", tags=["user management"], dependencies=[Depends(user_api_key_auth)]
 )

From 8daabe16f4f282e6419ead7a3e3fbc0d7a4bcce4 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 30 Jan 2024 17:00:40 -0800
Subject: [PATCH 05/12] (feat) allow users to use UI without SSO

---
 litellm/proxy/proxy_server.py | 126 +++++++++++++---------------------
 litellm/proxy/utils.py        |  64 +++++++++++++++++
 2 files changed, 112 insertions(+), 78 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 3ed91684a..9bd33413c 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -76,6 +76,7 @@ from litellm.proxy.utils import (
     get_logging_payload,
     reset_budget,
     hash_token,
+    html_form,
 )
 from litellm.proxy.secret_managers.google_kms import load_google_kms
 import pydantic
@@ -94,6 +95,7 @@ from fastapi import (
     BackgroundTasks,
     Header,
     Response,
+    Form,
 )
 from fastapi.routing import APIRouter
 from fastapi.security import OAuth2PasswordBearer
@@ -2958,6 +2960,52 @@ async def google_login(request: Request):
         )
         with microsoft_sso:
             return await microsoft_sso.get_login_redirect()
+    else:
+        # No Google, Microsoft SSO
+        # Use UI Credentials set in .env
+        from fastapi.responses import HTMLResponse
+
+        return HTMLResponse(content=html_form, status_code=200)
+
+
+@router.post(
+    "/login", include_in_schema=False
+)  # hidden since this is a helper for UI sso login
+async def login(username: str = Form(...), password: str = Form(...)):
+    ui_username = os.getenv("UI_USERNAME")
+    ui_password = os.getenv("UI_PASSWORD")
+
+    if username == ui_username and password == ui_password:
+        user_id = username
+        response = await generate_key_helper_fn(
+            **{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard"}  # type: ignore
+        )
+
+        key = response["token"]  # type: ignore
+        user_id = response["user_id"]  # type: ignore
+        litellm_dashboard_ui = "https://litellm-dashboard.vercel.app/"
+
+        # if user set LITELLM_UI_LINK in .env, use that
+        litellm_ui_link_in_env = os.getenv("LITELLM_UI_LINK", None)
+        if litellm_ui_link_in_env is not None:
+            litellm_dashboard_ui = litellm_ui_link_in_env
+
+        litellm_dashboard_ui += (
+            "?userID="
+            + user_id
+            + "&accessToken="
+            + key
+            + "&proxyBaseUrl="
+            + os.getenv("PROXY_BASE_URL")
+        )
+        return RedirectResponse(url=litellm_dashboard_ui)
+    else:
+        raise ProxyException(
+            message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nCheck 'UI_USERNAME', 'UI_PASSWORD' in .env file",
+            type="auth_error",
+            param="invalid_credentials",
+            code=status.HTTP_401_UNAUTHORIZED,
+        )
 
 
 @app.get("/sso/callback", tags=["experimental"])
@@ -3103,84 +3151,6 @@ async def user_info(
         )
 
 
-html_form = """
-<!DOCTYPE html>
-<html>
-<head>
-    <title>LiteLLM Login</title>
-    <style>
-        body {
-            font-family: Arial, sans-serif;
-            background-color: #f4f4f4;
-            margin: 0;
-            padding: 0;
-            display: flex;
-            justify-content: center;
-            align-items: center;
-            height: 100vh;
-        }
-
-        form {
-            background-color: #fff;
-            padding: 20px;
-            border-radius: 8px;
-            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
-        }
-
-        label {
-            display: block;
-            margin-bottom: 8px;
-        }
-
-        input {
-            width: 100%;
-            padding: 8px;
-            margin-bottom: 16px;
-            box-sizing: border-box;
-            border: 1px solid #ccc;
-            border-radius: 4px;
-        }
-
-        input[type="submit"] {
-            background-color: #4caf50;
-            color: #fff;
-            cursor: pointer;
-        }
-
-        input[type="submit"]:hover {
-            background-color: #45a049;
-        }
-    </style>
-</head>
-<body>
-    <form action="/login" method="post">
-        <h2>LiteLLM Login</h2>
-        <label for="username">Username:</label>
-        <input type="text" id="username" name="username" required>
-        <label for="password">Password:</label>
-        <input type="password" id="password" name="password" required>
-        <input type="submit" value="Submit">
-    </form>
-</body>
-</html>
-"""
-from fastapi import FastAPI, Form
-from fastapi.responses import HTMLResponse
-
-
-@router.get("/login/page")
-async def login_page():
-    return HTMLResponse(content=html_form, status_code=200)
-
-
-@router.get("/login")
-async def login(username: str = Form(...), password: str = Form(...)):
-    # Here you can perform authentication logic
-    # For simplicity, let's just print the received credentials
-    # print(f"Received username: {username}, password: {password}")
-    return {"message": "Login successful"}
-
-
 @router.post(
     "/user/update", tags=["user management"], dependencies=[Depends(user_api_key_auth)]
 )
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 3ec45203f..d9194e712 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -1208,3 +1208,67 @@ async def reset_budget(prisma_client: PrismaClient):
             await prisma_client.update_data(
                 query_type="update_many", data_list=users_to_reset, table_name="user"
             )
+
+
+# LiteLLM Admin UI - Non SSO Login
+html_form = """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>LiteLLM Login</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f4f4f4;
+            margin: 0;
+            padding: 0;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            height: 100vh;
+        }
+
+        form {
+            background-color: #fff;
+            padding: 20px;
+            border-radius: 8px;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }
+
+        label {
+            display: block;
+            margin-bottom: 8px;
+        }
+
+        input {
+            width: 100%;
+            padding: 8px;
+            margin-bottom: 16px;
+            box-sizing: border-box;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+        }
+
+        input[type="submit"] {
+            background-color: #4caf50;
+            color: #fff;
+            cursor: pointer;
+        }
+
+        input[type="submit"]:hover {
+            background-color: #45a049;
+        }
+    </style>
+</head>
+<body>
+    <form action="/login" method="post">
+        <h2>LiteLLM Login</h2>
+        <label for="username">Username:</label>
+        <input type="text" id="username" name="username" required>
+        <label for="password">Password:</label>
+        <input type="password" id="password" name="password" required>
+        <input type="submit" value="Submit">
+    </form>
+</body>
+</html>
+"""

From cfa69c31847bb39a15e8eed1c24c750a884d8872 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 30 Jan 2024 17:05:36 -0800
Subject: [PATCH 06/12] (docs) UI - no sso

---
 docs/my-website/docs/proxy/ui.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/docs/my-website/docs/proxy/ui.md b/docs/my-website/docs/proxy/ui.md
index 0a19c427c..6538793c1 100644
--- a/docs/my-website/docs/proxy/ui.md
+++ b/docs/my-website/docs/proxy/ui.md
@@ -31,6 +31,18 @@ general_settings:
 ## 2. Setup SSO/Auth for UI
 
 <Tabs>
+<TabItem value="username" label="Quick Start - Username, Password">
+
+Set the following in your .env on the Proxy
+
+```shell
+UI_USERNAME=ishaan-litellm
+UI_PASSWORD=langchain
+```
+
+On accessing the LiteLLM UI, you will be prompted to enter your username, password
+
+</TabItem>
 
 <TabItem value="google" label="Google SSO">
 
@@ -73,6 +85,7 @@ MICROSOFT_TENANT="5a39737
     ```
 
 </TabItem>
+
 </Tabs>
 
 ## 4. Use UI

From 97805891732e34164a1fde20b2dc112e2de1e661 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 30 Jan 2024 18:13:53 -0800
Subject: [PATCH 07/12] (fix) dependencies in /sso/key/generate

---
 litellm/proxy/proxy_server.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 9bd33413c..4b13160d7 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2971,7 +2971,10 @@ async def google_login(request: Request):
 @router.post(
     "/login", include_in_schema=False
 )  # hidden since this is a helper for UI sso login
-async def login(username: str = Form(...), password: str = Form(...)):
+async def login(request: Request):
+    form = await request.form()
+    username = str(form.get("username"))
+    password = form.get("password")
     ui_username = os.getenv("UI_USERNAME")
     ui_password = os.getenv("UI_PASSWORD")
 

From 069976daed3e4d410cdd4874dc6db1d4ec783d8b Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 30 Jan 2024 18:19:49 -0800
Subject: [PATCH 08/12] (fix) install python-multipart if missing

---
 litellm/proxy/proxy_server.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 4b13160d7..edd4232f7 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2972,6 +2972,11 @@ async def google_login(request: Request):
     "/login", include_in_schema=False
 )  # hidden since this is a helper for UI sso login
 async def login(request: Request):
+    try:
+        import multipart
+    except ImportError:
+        subprocess.run(["pip", "install", "python-multipart"])
+
     form = await request.form()
     username = str(form.get("username"))
     password = form.get("password")

From a27858c5c87d40a07b2f6e25c588654f67dc1696 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Jan 2024 18:58:54 -0800
Subject: [PATCH 09/12] fix(_types.py): support datetime as a type for expires
 field

---
 litellm/proxy/_types.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index 22565eb2b..1431128ae 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -325,17 +325,18 @@ class LiteLLM_VerificationToken(LiteLLMBase):
     key_alias: Optional[str] = None
     spend: float = 0.0
     max_budget: Optional[float] = None
-    expires: Union[str, None]
+    expires: Union[datetime, str, None]
     models: List[str]
-    aliases: Dict[str, str] = {}
-    config: Dict[str, str] = {}
-    user_id: Union[str, None]
-    max_parallel_requests: Union[int, None]
-    metadata: Dict[str, str] = {}
+    aliases: Dict = {}
+    config: Dict = {}
+    user_id: Optional[str] = None
+    max_parallel_requests: Optional[int] = None
+    metadata: Dict = {}
     tpm_limit: Optional[int] = None
     rpm_limit: Optional[int] = None
     budget_duration: Optional[str] = None
     budget_reset_at: Optional[datetime] = None
+    team_id: Optional[str] = None
 
 
 class LiteLLM_Config(LiteLLMBase):

From a07f3ec2d4a638e7509dc9907ece8c323dc10313 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Jan 2024 21:11:55 -0800
Subject: [PATCH 10/12] fix(router.py): remove wrapping of router.completion()
 let clients handle this

---
 litellm/router.py                    |  6 +-
 litellm/tests/test_router_timeout.py | 87 ++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 5 deletions(-)
 create mode 100644 litellm/tests/test_router_timeout.py

diff --git a/litellm/router.py b/litellm/router.py
index 0d6d108e4..bf5781c56 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -289,11 +289,7 @@ class Router:
             timeout = kwargs.get("request_timeout", self.timeout)
             kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries)
             kwargs.setdefault("metadata", {}).update({"model_group": model})
-            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
-                # Submit the function to the executor with a timeout
-                future = executor.submit(self.function_with_fallbacks, **kwargs)
-                response = future.result(timeout=timeout)  # type: ignore
-
+            response = self.function_with_fallbacks(**kwargs)
             return response
         except Exception as e:
             raise e
diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py
new file mode 100644
index 000000000..1f93a60a6
--- /dev/null
+++ b/litellm/tests/test_router_timeout.py
@@ -0,0 +1,87 @@
+#### What this tests ####
+# This tests if the router timeout error handling during fallbacks
+
+import sys, os, time
+import traceback, asyncio
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+
+import os
+
+import litellm
+from litellm import Router
+from dotenv import load_dotenv
+
+load_dotenv()
+
+
+def test_router_timeouts():
+    # Model list for OpenAI and Anthropic models
+    model_list = [
+        {
+            "model_name": "openai-gpt-4",
+            "litellm_params": {
+                "model": "azure/chatgpt-v-2",
+                "api_key": "os.environ/AZURE_API_KEY",
+                "api_base": "os.environ/AZURE_API_BASE",
+                "api_version": "os.environ/AZURE_API_VERSION",
+            },
+            "tpm": 80000,
+        },
+        {
+            "model_name": "anthropic-claude-instant-1.2",
+            "litellm_params": {
+                "model": "claude-instant-1",
+                "api_key": "os.environ/ANTHROPIC_API_KEY",
+            },
+            "tpm": 20000,
+        },
+    ]
+
+    fallbacks_list = [
+        {"openai-gpt-4": ["anthropic-claude-instant-1.2"]},
+    ]
+
+    # Configure router
+    router = Router(
+        model_list=model_list,
+        fallbacks=fallbacks_list,
+        routing_strategy="usage-based-routing",
+        debug_level="INFO",
+        set_verbose=True,
+        redis_host=os.getenv("REDIS_HOST"),
+        redis_password=os.getenv("REDIS_PASSWORD"),
+        redis_port=int(os.getenv("REDIS_PORT")),
+        timeout=10,
+    )
+
+    print("***** TPM SETTINGS *****")
+    for model_object in model_list:
+        print(f"{model_object['model_name']}: {model_object['tpm']} TPM")
+
+    # Sample list of questions
+    questions_list = [
+        {"content": "Tell me a very long joke.", "modality": "voice"},
+    ]
+
+    total_tokens_used = 0
+
+    # Process each question
+    for question in questions_list:
+        messages = [{"content": question["content"], "role": "user"}]
+
+        prompt_tokens = litellm.token_counter(text=question["content"], model="gpt-4")
+        print("prompt_tokens = ", prompt_tokens)
+
+        response = router.completion(
+            model="openai-gpt-4", messages=messages, timeout=5, num_retries=0
+        )
+
+        total_tokens_used += response.usage.total_tokens
+
+        print("Response:", response)
+        print("********** TOKENS USED SO FAR = ", total_tokens_used)

From 114c2f82d4510696e47600bdbe6252d410ff28d9 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Jan 2024 21:15:34 -0800
Subject: [PATCH 11/12] =?UTF-8?q?bump:=20version=201.20.6=20=E2=86=92=201.?=
 =?UTF-8?q?20.7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 48b641bc1..2855deeda 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.20.6"
+version = "1.20.7"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -63,7 +63,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.20.6"
+version = "1.20.7"
 version_files = [
     "pyproject.toml:^version"
 ]

From de223d0059853cdc0cb85269805c89f177747378 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Jan 2024 21:17:01 -0800
Subject: [PATCH 12/12] build(schema.prisma): update prisma schema with
 allowed_cache_controls param

---
 litellm/proxy/schema.prisma | 2 ++
 schema.prisma               | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma
index 02e4114e5..da2857075 100644
--- a/litellm/proxy/schema.prisma
+++ b/litellm/proxy/schema.prisma
@@ -20,6 +20,7 @@ model LiteLLM_UserTable {
     rpm_limit     BigInt?
     budget_duration String? 
     budget_reset_at DateTime?
+    allowed_cache_controls String[] @default([])
 }
 
 // Generate Tokens for Proxy
@@ -41,6 +42,7 @@ model LiteLLM_VerificationToken {
     max_budget Float?    
     budget_duration String? 
     budget_reset_at DateTime?
+    allowed_cache_controls String[] @default([])
 }
 
 // store proxy config.yaml
diff --git a/schema.prisma b/schema.prisma
index 02e4114e5..da2857075 100644
--- a/schema.prisma
+++ b/schema.prisma
@@ -20,6 +20,7 @@ model LiteLLM_UserTable {
     rpm_limit     BigInt?
     budget_duration String? 
     budget_reset_at DateTime?
+    allowed_cache_controls String[] @default([])
 }
 
 // Generate Tokens for Proxy
@@ -41,6 +42,7 @@ model LiteLLM_VerificationToken {
     max_budget Float?    
     budget_duration String? 
     budget_reset_at DateTime?
+    allowed_cache_controls String[] @default([])
 }
 
 // store proxy config.yaml