From b6066d1eced2738c2b33ccb013e753063d191795 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 20 Jun 2024 13:49:44 -0700 Subject: [PATCH 1/4] feat - set custom routing strategy --- ...odel_prices_and_context_window_backup.json | 11 +++++++++- litellm/router.py | 13 +++++++++++ litellm/types/router.py | 22 +++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index d1d221b45..1441d92a2 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -865,7 +865,7 @@ }, "deepseek-coder": { "max_tokens": 4096, - "max_input_tokens": 16000, + "max_input_tokens": 32000, "max_output_tokens": 4096, "input_cost_per_token": 0.00000014, "output_cost_per_token": 0.00000028, @@ -1984,6 +1984,15 @@ "litellm_provider": "replicate", "mode": "chat" }, + "openrouter/deepseek/deepseek-coder": { + "max_tokens": 4096, + "max_input_tokens": 32000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000014, + "output_cost_per_token": 0.00000028, + "litellm_provider": "openrouter", + "mode": "chat" + }, "openrouter/microsoft/wizardlm-2-8x22b:nitro": { "max_tokens": 65536, "input_cost_per_token": 0.000001, diff --git a/litellm/router.py b/litellm/router.py index 9200089d5..08efbc414 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -69,6 +69,7 @@ from litellm.types.router import ( AlertingConfig, AllowedFailsPolicy, AssistantsTypedDict, + CustomRoutingStrategy, Deployment, DeploymentTypedDict, LiteLLM_Params, @@ -4814,6 +4815,18 @@ class Router: except Exception as e: pass + def set_custom_routing_strategy(self, CustomRoutingStrategy: CustomRoutingStrategy): + setattr( + self, + "get_available_deployment", + CustomRoutingStrategy.get_available_deployment, + ) + setattr( + self, + "async_get_available_deployment", + CustomRoutingStrategy.async_get_available_deployment, + ) + def flush_cache(self): litellm.cache = None self.cache.flush_cache() diff --git a/litellm/types/router.py b/litellm/types/router.py index da3c999dc..25b1b5c9c 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -451,3 +451,25 @@ class ModelGroupInfo(BaseModel): class AssistantsTypedDict(TypedDict): custom_llm_provider: Literal["azure", "openai"] litellm_params: LiteLLMParamsTypedDict + + +class CustomRoutingStrategy: + async def async_get_available_deployment( + self, + model: str, + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + specific_deployment: Optional[bool] = False, + request_kwargs: Optional[Dict] = None, + ): + pass + + def get_available_deployment( + self, + model: str, + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + specific_deployment: Optional[bool] = False, + request_kwargs: Optional[Dict] = None, + ): + pass From 7fb4e12b9f676c074941f6d2ea820d0b91fe464f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 20 Jun 2024 14:11:33 -0700 Subject: [PATCH 2/4] test custom routing strat --- litellm/tests/test_router_custom_routing.py | 126 ++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 litellm/tests/test_router_custom_routing.py diff --git a/litellm/tests/test_router_custom_routing.py b/litellm/tests/test_router_custom_routing.py new file mode 100644 index 000000000..d66c304be --- /dev/null +++ b/litellm/tests/test_router_custom_routing.py @@ -0,0 +1,126 @@ +import asyncio +import os +import random +import sys +import time +import traceback +from datetime import datetime, timedelta + +from dotenv import load_dotenv + +load_dotenv() +import copy +import os + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +from typing import Dict, List, Optional, Union + +import pytest + +import litellm +from litellm import Router +from litellm.caching import DualCache +from litellm.router import CustomRoutingStrategy as BaseCustomRoutingStrategy +from litellm.router import Deployment, LiteLLM_Params +from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler + +router = Router( + model_list=[ + { + "model_name": "azure-model", + "litellm_params": { + "model": "openai/very-special-endpoint", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", # If you are Krrish, this is OpenAI Endpoint3 on our Railway endpoint :) + "api_key": "fake-key", + }, + "model_info": {"id": "very-special-endpoint"}, + }, + { + "model_name": "azure-model", + "litellm_params": { + "model": "openai/fast-endpoint", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "api_key": "fake-key", + }, + "model_info": {"id": "fast-endpoint"}, + }, + ], + set_verbose=True, + debug_level="DEBUG", + timeout=1, +) # type: ignore + + +class CustomRoutingStrategy(BaseCustomRoutingStrategy): + async def async_get_available_deployment( + self, + model: str, + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + specific_deployment: Optional[bool] = False, + request_kwargs: Optional[Dict] = None, + ): + print("In CUSTOM async get available deployment") + model_list = router.model_list + print("router model list=", model_list) + for model in model_list: + if isinstance(model, dict): + if model["litellm_params"]["model"] == "openai/very-special-endpoint": + return model + pass + + def get_available_deployment( + self, + model: str, + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + specific_deployment: Optional[bool] = False, + request_kwargs: Optional[Dict] = None, + ): + # used for router.completion() calls + pass + + +@pytest.mark.asyncio +async def test_custom_routing(): + import litellm + + litellm.set_verbose = True + router.set_custom_routing_strategy(CustomRoutingStrategy()) + + # make 4 requests + for _ in range(4): + try: + response = await router.acompletion( + model="azure-model", messages=[{"role": "user", "content": "hello"}] + ) + print(response) + except Exception as e: + print("got exception", e) + + await asyncio.sleep(1) + print("done sending initial requests to collect latency") + """ + Note: for debugging + - By this point: slow-endpoint should have timed out 3-4 times and should be heavily penalized :) + - The next 10 requests should all be routed to the fast-endpoint + """ + + deployments = {} + # make 10 requests + for _ in range(10): + response = await router.acompletion( + model="azure-model", messages=[{"role": "user", "content": "hello"}] + ) + print(response) + _picked_model_id = response._hidden_params["model_id"] + if _picked_model_id not in deployments: + deployments[_picked_model_id] = 1 + else: + deployments[_picked_model_id] += 1 + print("deployments", deployments) + + # ALL the Requests should have been routed to the fast-endpoint + # assert deployments["fast-endpoint"] == 10 From 91d9d59717b1cd57e5e39054652ec442f3920faa Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 20 Jun 2024 14:32:52 -0700 Subject: [PATCH 3/4] docs - routing --- docs/my-website/docs/routing.md | 123 ++++++++++++++++++++++++++++++-- 1 file changed, 119 insertions(+), 4 deletions(-) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 63fac9456..fd4fb8658 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -95,7 +95,7 @@ print(response) - `router.image_generation()` - completion calls in OpenAI `/v1/images/generations` endpoint format - `router.aimage_generation()` - async image generation calls -## Advanced - Routing Strategies +## Advanced - Routing Strategies ⭐️ #### Routing Strategies - Weighted Pick, Rate Limit Aware, Least Busy, Latency Based, Cost Based Router provides 4 strategies for routing your calls across multiple deployments: @@ -262,7 +262,7 @@ if response is not None: ) ``` -### Set Time Window +#### Set Time Window Set time window for how far back to consider when averaging latency for a deployment. @@ -278,7 +278,7 @@ router_settings: routing_strategy_args: {"ttl": 10} ``` -### Set Lowest Latency Buffer +#### Set Lowest Latency Buffer Set a buffer within which deployments are candidates for making calls to. @@ -468,6 +468,122 @@ asyncio.run(router_acompletion()) ``` + + + +**Plugin a custom routing strategy to select deployments** + + +Step 1. Define your custom routing strategy + +```python + +from litellm.router import CustomRoutingStrategyBase +class CustomRoutingStrategy(CustomRoutingStrategyBase): + async def async_get_available_deployment( + self, + model: str, + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + specific_deployment: Optional[bool] = False, + request_kwargs: Optional[Dict] = None, + ): + """ + Asynchronously retrieves the available deployment based on the given parameters. + + Args: + model (str): The name of the model. + messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None. + input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None. + specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False. + request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None. + + Returns: + Returns an element from litellm.router.model_list + + """ + print("In CUSTOM async get available deployment") + model_list = router.model_list + print("router model list=", model_list) + for model in model_list: + if isinstance(model, dict): + if model["litellm_params"]["model"] == "openai/very-special-endpoint": + return model + pass + + def get_available_deployment( + self, + model: str, + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + specific_deployment: Optional[bool] = False, + request_kwargs: Optional[Dict] = None, + ): + """ + Synchronously retrieves the available deployment based on the given parameters. + + Args: + model (str): The name of the model. + messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None. + input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None. + specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False. + request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None. + + Returns: + Returns an element from litellm.router.model_list + + """ + pass +``` + +Step 2. Initialize Router with custom routing strategy +```python +from litellm import Router + +router = Router( + model_list=[ + { + "model_name": "azure-model", + "litellm_params": { + "model": "openai/very-special-endpoint", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", # If you are Krrish, this is OpenAI Endpoint3 on our Railway endpoint :) + "api_key": "fake-key", + }, + "model_info": {"id": "very-special-endpoint"}, + }, + { + "model_name": "azure-model", + "litellm_params": { + "model": "openai/fast-endpoint", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "api_key": "fake-key", + }, + "model_info": {"id": "fast-endpoint"}, + }, + ], + set_verbose=True, + debug_level="DEBUG", + timeout=1, +) # type: ignore + +router.set_custom_routing_strategy(CustomRoutingStrategy()) # 👈 Set your routing strategy here +``` + +Step 3. Test your routing strategy. Expect your custom routing strategy to be called when running `router.acompletion` requests +```python +for _ in range(10): + response = await router.acompletion( + model="azure-model", messages=[{"role": "user", "content": "hello"}] + ) + print(response) + _picked_model_id = response._hidden_params["model_id"] + print("picked model=", _picked_model_id) +``` + + + + + Picks a deployment based on the lowest cost @@ -563,7 +679,6 @@ asyncio.run(router_acompletion()) ``` - ## Basic Reliability From cdc1e952ac10df63a5a5c7ca624cc7b42338c845 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 20 Jun 2024 14:36:51 -0700 Subject: [PATCH 4/4] router - add doc string --- litellm/router.py | 15 ++++++-- litellm/tests/test_router_custom_routing.py | 40 ++++++++++++++++----- litellm/types/router.py | 30 +++++++++++++++- 3 files changed, 74 insertions(+), 11 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 08efbc414..b4589c9f0 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -69,7 +69,7 @@ from litellm.types.router import ( AlertingConfig, AllowedFailsPolicy, AssistantsTypedDict, - CustomRoutingStrategy, + CustomRoutingStrategyBase, Deployment, DeploymentTypedDict, LiteLLM_Params, @@ -4815,7 +4815,18 @@ class Router: except Exception as e: pass - def set_custom_routing_strategy(self, CustomRoutingStrategy: CustomRoutingStrategy): + def set_custom_routing_strategy( + self, CustomRoutingStrategy: CustomRoutingStrategyBase + ): + """ + Sets get_available_deployment and async_get_available_deployment on an instanced of litellm.Router + + Use this to set your custom routing strategy + + Args: + CustomRoutingStrategy: litellm.router.CustomRoutingStrategyBase + """ + setattr( self, "get_available_deployment", diff --git a/litellm/tests/test_router_custom_routing.py b/litellm/tests/test_router_custom_routing.py index d66c304be..afd602b93 100644 --- a/litellm/tests/test_router_custom_routing.py +++ b/litellm/tests/test_router_custom_routing.py @@ -21,10 +21,6 @@ import pytest import litellm from litellm import Router -from litellm.caching import DualCache -from litellm.router import CustomRoutingStrategy as BaseCustomRoutingStrategy -from litellm.router import Deployment, LiteLLM_Params -from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler router = Router( model_list=[ @@ -49,11 +45,12 @@ router = Router( ], set_verbose=True, debug_level="DEBUG", - timeout=1, -) # type: ignore +) + +from litellm.router import CustomRoutingStrategyBase -class CustomRoutingStrategy(BaseCustomRoutingStrategy): +class CustomRoutingStrategy(CustomRoutingStrategyBase): async def async_get_available_deployment( self, model: str, @@ -62,6 +59,20 @@ class CustomRoutingStrategy(BaseCustomRoutingStrategy): specific_deployment: Optional[bool] = False, request_kwargs: Optional[Dict] = None, ): + """ + Asynchronously retrieves the available deployment based on the given parameters. + + Args: + model (str): The name of the model. + messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None. + input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None. + specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False. + request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None. + + Returns: + Returns an element from litellm.router.model_list + + """ print("In CUSTOM async get available deployment") model_list = router.model_list print("router model list=", model_list) @@ -79,7 +90,20 @@ class CustomRoutingStrategy(BaseCustomRoutingStrategy): specific_deployment: Optional[bool] = False, request_kwargs: Optional[Dict] = None, ): - # used for router.completion() calls + """ + Synchronously retrieves the available deployment based on the given parameters. + + Args: + model (str): The name of the model. + messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None. + input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None. + specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False. + request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None. + + Returns: + Returns an element from litellm.router.model_list + + """ pass diff --git a/litellm/types/router.py b/litellm/types/router.py index 25b1b5c9c..206216ef0 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -453,7 +453,7 @@ class AssistantsTypedDict(TypedDict): litellm_params: LiteLLMParamsTypedDict -class CustomRoutingStrategy: +class CustomRoutingStrategyBase: async def async_get_available_deployment( self, model: str, @@ -462,6 +462,20 @@ class CustomRoutingStrategy: specific_deployment: Optional[bool] = False, request_kwargs: Optional[Dict] = None, ): + """ + Asynchronously retrieves the available deployment based on the given parameters. + + Args: + model (str): The name of the model. + messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None. + input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None. + specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False. + request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None. + + Returns: + Returns an element from litellm.router.model_list + + """ pass def get_available_deployment( @@ -472,4 +486,18 @@ class CustomRoutingStrategy: specific_deployment: Optional[bool] = False, request_kwargs: Optional[Dict] = None, ): + """ + Synchronously retrieves the available deployment based on the given parameters. + + Args: + model (str): The name of the model. + messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None. + input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None. + specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False. + request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None. + + Returns: + Returns an element from litellm.router.model_list + + """ pass