From b01f31c41fbe19c16a74fde91bcd837e8f7005ca Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Jan 2024 15:34:38 -0800 Subject: [PATCH 01/12] fix(utils.py): check if delta is none --- litellm/utils.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/litellm/utils.py b/litellm/utils.py index bbc4e651c..3aaf53514 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2929,32 +2929,10 @@ def cost_per_token( model_with_provider_and_region in model_cost_ref ): # use region based pricing, if it's available model_with_provider = model_with_provider_and_region + if model_with_provider in model_cost_ref: + model = model_with_provider # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models print_verbose(f"Looking up model={model} in model_cost_map") - if model_with_provider in model_cost_ref: - print_verbose( - f"Success: model={model_with_provider} in model_cost_map - {model_cost_ref[model_with_provider]}" - ) - print_verbose( - f"applying cost={model_cost_ref[model_with_provider].get('input_cost_per_token', None)} for prompt_tokens={prompt_tokens}" - ) - prompt_tokens_cost_usd_dollar = ( - model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens - ) - print_verbose( - f"calculated prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}" - ) - print_verbose( - f"applying cost={model_cost_ref[model_with_provider].get('output_cost_per_token', None)} for completion_tokens={completion_tokens}" - ) - completion_tokens_cost_usd_dollar = ( - model_cost_ref[model_with_provider]["output_cost_per_token"] - * completion_tokens - ) - print_verbose( - f"calculated completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}" - ) - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar if model in model_cost_ref: print_verbose(f"Success: model={model} in model_cost_map") print_verbose( From 60cc3d6b309c3059474c6bd1483a0399bc1422bf Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Jan 2024 15:35:23 -0800 Subject: [PATCH 02/12] fix(utils.py): fix streaming delta content being none edge-case --- litellm/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index 3aaf53514..8f00c115d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -7487,7 +7487,10 @@ class CustomStreamWrapper: logprobs = None original_chunk = None # this is used for function/tool calling if len(str_line.choices) > 0: - if str_line.choices[0].delta.content is not None: + if ( + str_line.choices[0].delta is not None + and str_line.choices[0].delta.content is not None + ): text = str_line.choices[0].delta.content else: # function/tool calling chunk - when content is None. in this case we just return the original chunk from openai original_chunk = str_line From 2019347f0da335512ff999c6bf8027234ea7c1e4 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Jan 2024 16:17:23 -0800 Subject: [PATCH 03/12] fix(proxy_server.py): speed up proxy startup time --- litellm/proxy/proxy_server.py | 62 ++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f1ec2744c..0f55cbd59 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1267,7 +1267,7 @@ async def generate_key_helper_fn( update_key_values: Optional[dict] = None, key_alias: Optional[str] = None, ): - global prisma_client, custom_db_client + global prisma_client, custom_db_client, user_api_key_cache if prisma_client is None and custom_db_client is None: raise Exception( @@ -1357,6 +1357,18 @@ async def generate_key_helper_fn( } if general_settings.get("allow_user_auth", False) == True: key_data["key_name"] = f"sk-...{token[-4:]}" + saved_token = copy.deepcopy(key_data) + if isinstance(saved_token["aliases"], str): + saved_token["aliases"] = json.loads(saved_token["aliases"]) + if isinstance(saved_token["config"], str): + saved_token["config"] = json.loads(saved_token["config"]) + if isinstance(saved_token["metadata"], str): + saved_token["metadata"] = json.loads(saved_token["metadata"]) + user_api_key_cache.set_cache( + key=key_data["token"], + value=LiteLLM_VerificationToken(**saved_token), # type: ignore + ttl=60, + ) if prisma_client is not None: ## CREATE USER (If necessary) verbose_proxy_logger.debug(f"prisma_client: Creating User={user_data}") @@ -1671,14 +1683,16 @@ async def startup_event(): if prisma_client is not None and master_key is not None: # add master key to db - await generate_key_helper_fn( - duration=None, - models=[], - aliases={}, - config={}, - spend=0, - token=master_key, - user_id="default_user_id", + asyncio.create_task( + generate_key_helper_fn( + duration=None, + models=[], + aliases={}, + config={}, + spend=0, + token=master_key, + user_id="default_user_id", + ) ) if prisma_client is not None and litellm.max_budget > 0: @@ -1688,20 +1702,22 @@ async def startup_event(): ) # add proxy budget to db in the user table - await generate_key_helper_fn( - user_id=litellm_proxy_budget_name, - duration=None, - models=[], - aliases={}, - config={}, - spend=0, - max_budget=litellm.max_budget, - budget_duration=litellm.budget_duration, - query_type="update_data", - update_key_values={ - "max_budget": litellm.max_budget, - "budget_duration": litellm.budget_duration, - }, + asyncio.create_task( + generate_key_helper_fn( + user_id=litellm_proxy_budget_name, + duration=None, + models=[], + aliases={}, + config={}, + spend=0, + max_budget=litellm.max_budget, + budget_duration=litellm.budget_duration, + query_type="update_data", + update_key_values={ + "max_budget": litellm.max_budget, + "budget_duration": litellm.budget_duration, + }, + ) ) verbose_proxy_logger.debug( From f966cce26ab660293c673293561d91e40f1c04fc Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 30 Jan 2024 16:47:12 -0800 Subject: [PATCH 04/12] (feat) add litellm login to proxy --- litellm/proxy/proxy_server.py | 78 +++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index f1ec2744c..3ed91684a 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3103,6 +3103,84 @@ async def user_info( ) +html_form = """ + + + + LiteLLM Login + + + +
+

LiteLLM Login

+ + + + + +
+ + +""" +from fastapi import FastAPI, Form +from fastapi.responses import HTMLResponse + + +@router.get("/login/page") +async def login_page(): + return HTMLResponse(content=html_form, status_code=200) + + +@router.get("/login") +async def login(username: str = Form(...), password: str = Form(...)): + # Here you can perform authentication logic + # For simplicity, let's just print the received credentials + # print(f"Received username: {username}, password: {password}") + return {"message": "Login successful"} + + @router.post( "/user/update", tags=["user management"], dependencies=[Depends(user_api_key_auth)] ) From 8daabe16f4f282e6419ead7a3e3fbc0d7a4bcce4 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 30 Jan 2024 17:00:40 -0800 Subject: [PATCH 05/12] (feat) allow users to use UI without SSO --- litellm/proxy/proxy_server.py | 126 +++++++++++++--------------------- litellm/proxy/utils.py | 64 +++++++++++++++++ 2 files changed, 112 insertions(+), 78 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 3ed91684a..9bd33413c 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -76,6 +76,7 @@ from litellm.proxy.utils import ( get_logging_payload, reset_budget, hash_token, + html_form, ) from litellm.proxy.secret_managers.google_kms import load_google_kms import pydantic @@ -94,6 +95,7 @@ from fastapi import ( BackgroundTasks, Header, Response, + Form, ) from fastapi.routing import APIRouter from fastapi.security import OAuth2PasswordBearer @@ -2958,6 +2960,52 @@ async def google_login(request: Request): ) with microsoft_sso: return await microsoft_sso.get_login_redirect() + else: + # No Google, Microsoft SSO + # Use UI Credentials set in .env + from fastapi.responses import HTMLResponse + + return HTMLResponse(content=html_form, status_code=200) + + +@router.post( + "/login", include_in_schema=False +) # hidden since this is a helper for UI sso login +async def login(username: str = Form(...), password: str = Form(...)): + ui_username = os.getenv("UI_USERNAME") + ui_password = os.getenv("UI_PASSWORD") + + if username == ui_username and password == ui_password: + user_id = username + response = await generate_key_helper_fn( + **{"duration": "24hr", "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": user_id, "team_id": "litellm-dashboard"} # type: ignore + ) + + key = response["token"] # type: ignore + user_id = response["user_id"] # type: ignore + litellm_dashboard_ui = "https://litellm-dashboard.vercel.app/" + + # if user set LITELLM_UI_LINK in .env, use that + litellm_ui_link_in_env = os.getenv("LITELLM_UI_LINK", None) + if litellm_ui_link_in_env is not None: + litellm_dashboard_ui = litellm_ui_link_in_env + + litellm_dashboard_ui += ( + "?userID=" + + user_id + + "&accessToken=" + + key + + "&proxyBaseUrl=" + + os.getenv("PROXY_BASE_URL") + ) + return RedirectResponse(url=litellm_dashboard_ui) + else: + raise ProxyException( + message=f"Invalid credentials used to access UI. Passed in username: {username}, passed in password: {password}.\nCheck 'UI_USERNAME', 'UI_PASSWORD' in .env file", + type="auth_error", + param="invalid_credentials", + code=status.HTTP_401_UNAUTHORIZED, + ) @app.get("/sso/callback", tags=["experimental"]) @@ -3103,84 +3151,6 @@ async def user_info( ) -html_form = """ - - - - LiteLLM Login - - - -
-

LiteLLM Login

- - - - - -
- - -""" -from fastapi import FastAPI, Form -from fastapi.responses import HTMLResponse - - -@router.get("/login/page") -async def login_page(): - return HTMLResponse(content=html_form, status_code=200) - - -@router.get("/login") -async def login(username: str = Form(...), password: str = Form(...)): - # Here you can perform authentication logic - # For simplicity, let's just print the received credentials - # print(f"Received username: {username}, password: {password}") - return {"message": "Login successful"} - - @router.post( "/user/update", tags=["user management"], dependencies=[Depends(user_api_key_auth)] ) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 3ec45203f..d9194e712 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1208,3 +1208,67 @@ async def reset_budget(prisma_client: PrismaClient): await prisma_client.update_data( query_type="update_many", data_list=users_to_reset, table_name="user" ) + + +# LiteLLM Admin UI - Non SSO Login +html_form = """ + + + + LiteLLM Login + + + +
+

LiteLLM Login

+ + + + + +
+ + +""" From cfa69c31847bb39a15e8eed1c24c750a884d8872 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 30 Jan 2024 17:05:36 -0800 Subject: [PATCH 06/12] (docs) UI - no sso --- docs/my-website/docs/proxy/ui.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/my-website/docs/proxy/ui.md b/docs/my-website/docs/proxy/ui.md index 0a19c427c..6538793c1 100644 --- a/docs/my-website/docs/proxy/ui.md +++ b/docs/my-website/docs/proxy/ui.md @@ -31,6 +31,18 @@ general_settings: ## 2. Setup SSO/Auth for UI + + +Set the following in your .env on the Proxy + +```shell +UI_USERNAME=ishaan-litellm +UI_PASSWORD=langchain +``` + +On accessing the LiteLLM UI, you will be prompted to enter your username, password + + @@ -73,6 +85,7 @@ MICROSOFT_TENANT="5a39737 ``` + ## 4. Use UI From 97805891732e34164a1fde20b2dc112e2de1e661 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 30 Jan 2024 18:13:53 -0800 Subject: [PATCH 07/12] (fix) dependencies in /sso/key/generate --- litellm/proxy/proxy_server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 9bd33413c..4b13160d7 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2971,7 +2971,10 @@ async def google_login(request: Request): @router.post( "/login", include_in_schema=False ) # hidden since this is a helper for UI sso login -async def login(username: str = Form(...), password: str = Form(...)): +async def login(request: Request): + form = await request.form() + username = str(form.get("username")) + password = form.get("password") ui_username = os.getenv("UI_USERNAME") ui_password = os.getenv("UI_PASSWORD") From 069976daed3e4d410cdd4874dc6db1d4ec783d8b Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Tue, 30 Jan 2024 18:19:49 -0800 Subject: [PATCH 08/12] (fix) install python-multipart if missing --- litellm/proxy/proxy_server.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 4b13160d7..edd4232f7 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -2972,6 +2972,11 @@ async def google_login(request: Request): "/login", include_in_schema=False ) # hidden since this is a helper for UI sso login async def login(request: Request): + try: + import multipart + except ImportError: + subprocess.run(["pip", "install", "python-multipart"]) + form = await request.form() username = str(form.get("username")) password = form.get("password") From a27858c5c87d40a07b2f6e25c588654f67dc1696 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Jan 2024 18:58:54 -0800 Subject: [PATCH 09/12] fix(_types.py): support datetime as a type for expires field --- litellm/proxy/_types.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 22565eb2b..1431128ae 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -325,17 +325,18 @@ class LiteLLM_VerificationToken(LiteLLMBase): key_alias: Optional[str] = None spend: float = 0.0 max_budget: Optional[float] = None - expires: Union[str, None] + expires: Union[datetime, str, None] models: List[str] - aliases: Dict[str, str] = {} - config: Dict[str, str] = {} - user_id: Union[str, None] - max_parallel_requests: Union[int, None] - metadata: Dict[str, str] = {} + aliases: Dict = {} + config: Dict = {} + user_id: Optional[str] = None + max_parallel_requests: Optional[int] = None + metadata: Dict = {} tpm_limit: Optional[int] = None rpm_limit: Optional[int] = None budget_duration: Optional[str] = None budget_reset_at: Optional[datetime] = None + team_id: Optional[str] = None class LiteLLM_Config(LiteLLMBase): From a07f3ec2d4a638e7509dc9907ece8c323dc10313 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Jan 2024 21:11:55 -0800 Subject: [PATCH 10/12] fix(router.py): remove wrapping of router.completion() let clients handle this --- litellm/router.py | 6 +- litellm/tests/test_router_timeout.py | 87 ++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 litellm/tests/test_router_timeout.py diff --git a/litellm/router.py b/litellm/router.py index 0d6d108e4..bf5781c56 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -289,11 +289,7 @@ class Router: timeout = kwargs.get("request_timeout", self.timeout) kwargs["num_retries"] = kwargs.get("num_retries", self.num_retries) kwargs.setdefault("metadata", {}).update({"model_group": model}) - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: - # Submit the function to the executor with a timeout - future = executor.submit(self.function_with_fallbacks, **kwargs) - response = future.result(timeout=timeout) # type: ignore - + response = self.function_with_fallbacks(**kwargs) return response except Exception as e: raise e diff --git a/litellm/tests/test_router_timeout.py b/litellm/tests/test_router_timeout.py new file mode 100644 index 000000000..1f93a60a6 --- /dev/null +++ b/litellm/tests/test_router_timeout.py @@ -0,0 +1,87 @@ +#### What this tests #### +# This tests if the router timeout error handling during fallbacks + +import sys, os, time +import traceback, asyncio +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path + + +import os + +import litellm +from litellm import Router +from dotenv import load_dotenv + +load_dotenv() + + +def test_router_timeouts(): + # Model list for OpenAI and Anthropic models + model_list = [ + { + "model_name": "openai-gpt-4", + "litellm_params": { + "model": "azure/chatgpt-v-2", + "api_key": "os.environ/AZURE_API_KEY", + "api_base": "os.environ/AZURE_API_BASE", + "api_version": "os.environ/AZURE_API_VERSION", + }, + "tpm": 80000, + }, + { + "model_name": "anthropic-claude-instant-1.2", + "litellm_params": { + "model": "claude-instant-1", + "api_key": "os.environ/ANTHROPIC_API_KEY", + }, + "tpm": 20000, + }, + ] + + fallbacks_list = [ + {"openai-gpt-4": ["anthropic-claude-instant-1.2"]}, + ] + + # Configure router + router = Router( + model_list=model_list, + fallbacks=fallbacks_list, + routing_strategy="usage-based-routing", + debug_level="INFO", + set_verbose=True, + redis_host=os.getenv("REDIS_HOST"), + redis_password=os.getenv("REDIS_PASSWORD"), + redis_port=int(os.getenv("REDIS_PORT")), + timeout=10, + ) + + print("***** TPM SETTINGS *****") + for model_object in model_list: + print(f"{model_object['model_name']}: {model_object['tpm']} TPM") + + # Sample list of questions + questions_list = [ + {"content": "Tell me a very long joke.", "modality": "voice"}, + ] + + total_tokens_used = 0 + + # Process each question + for question in questions_list: + messages = [{"content": question["content"], "role": "user"}] + + prompt_tokens = litellm.token_counter(text=question["content"], model="gpt-4") + print("prompt_tokens = ", prompt_tokens) + + response = router.completion( + model="openai-gpt-4", messages=messages, timeout=5, num_retries=0 + ) + + total_tokens_used += response.usage.total_tokens + + print("Response:", response) + print("********** TOKENS USED SO FAR = ", total_tokens_used) From 114c2f82d4510696e47600bdbe6252d410ff28d9 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Jan 2024 21:15:34 -0800 Subject: [PATCH 11/12] =?UTF-8?q?bump:=20version=201.20.6=20=E2=86=92=201.?= =?UTF-8?q?20.7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 48b641bc1..2855deeda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.20.6" +version = "1.20.7" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -63,7 +63,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.20.6" +version = "1.20.7" version_files = [ "pyproject.toml:^version" ] From de223d0059853cdc0cb85269805c89f177747378 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Jan 2024 21:17:01 -0800 Subject: [PATCH 12/12] build(schema.prisma): update prisma schema with allowed_cache_controls param --- litellm/proxy/schema.prisma | 2 ++ schema.prisma | 2 ++ 2 files changed, 4 insertions(+) diff --git a/litellm/proxy/schema.prisma b/litellm/proxy/schema.prisma index 02e4114e5..da2857075 100644 --- a/litellm/proxy/schema.prisma +++ b/litellm/proxy/schema.prisma @@ -20,6 +20,7 @@ model LiteLLM_UserTable { rpm_limit BigInt? budget_duration String? budget_reset_at DateTime? + allowed_cache_controls String[] @default([]) } // Generate Tokens for Proxy @@ -41,6 +42,7 @@ model LiteLLM_VerificationToken { max_budget Float? budget_duration String? budget_reset_at DateTime? + allowed_cache_controls String[] @default([]) } // store proxy config.yaml diff --git a/schema.prisma b/schema.prisma index 02e4114e5..da2857075 100644 --- a/schema.prisma +++ b/schema.prisma @@ -20,6 +20,7 @@ model LiteLLM_UserTable { rpm_limit BigInt? budget_duration String? budget_reset_at DateTime? + allowed_cache_controls String[] @default([]) } // Generate Tokens for Proxy @@ -41,6 +42,7 @@ model LiteLLM_VerificationToken { max_budget Float? budget_duration String? budget_reset_at DateTime? + allowed_cache_controls String[] @default([]) } // store proxy config.yaml