From b6066d1eced2738c2b33ccb013e753063d191795 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 20 Jun 2024 13:49:44 -0700
Subject: [PATCH 1/4] feat - set custom routing strategy

---
 ...odel_prices_and_context_window_backup.json | 11 +++++++++-
 litellm/router.py                             | 13 +++++++++++
 litellm/types/router.py                       | 22 +++++++++++++++++++
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index d1d221b45..1441d92a2 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -865,7 +865,7 @@
     },
     "deepseek-coder": {
         "max_tokens": 4096,
-        "max_input_tokens": 16000,
+        "max_input_tokens": 32000,
         "max_output_tokens": 4096,
         "input_cost_per_token": 0.00000014,
         "output_cost_per_token": 0.00000028,
@@ -1984,6 +1984,15 @@
         "litellm_provider": "replicate",
         "mode": "chat"
     },
+    "openrouter/deepseek/deepseek-coder": {
+        "max_tokens": 4096,
+        "max_input_tokens": 32000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00000014,
+        "output_cost_per_token": 0.00000028,
+        "litellm_provider": "openrouter",
+        "mode": "chat"
+    },
     "openrouter/microsoft/wizardlm-2-8x22b:nitro": {
         "max_tokens": 65536,
         "input_cost_per_token": 0.000001,
diff --git a/litellm/router.py b/litellm/router.py
index 9200089d5..08efbc414 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -69,6 +69,7 @@ from litellm.types.router import (
     AlertingConfig,
     AllowedFailsPolicy,
     AssistantsTypedDict,
+    CustomRoutingStrategy,
     Deployment,
     DeploymentTypedDict,
     LiteLLM_Params,
@@ -4814,6 +4815,18 @@ class Router:
         except Exception as e:
             pass
 
+    def set_custom_routing_strategy(self, CustomRoutingStrategy: CustomRoutingStrategy):
+        setattr(
+            self,
+            "get_available_deployment",
+            CustomRoutingStrategy.get_available_deployment,
+        )
+        setattr(
+            self,
+            "async_get_available_deployment",
+            CustomRoutingStrategy.async_get_available_deployment,
+        )
+
     def flush_cache(self):
         litellm.cache = None
         self.cache.flush_cache()
diff --git a/litellm/types/router.py b/litellm/types/router.py
index da3c999dc..25b1b5c9c 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -451,3 +451,25 @@ class ModelGroupInfo(BaseModel):
 class AssistantsTypedDict(TypedDict):
     custom_llm_provider: Literal["azure", "openai"]
     litellm_params: LiteLLMParamsTypedDict
+
+
+class CustomRoutingStrategy:
+    async def async_get_available_deployment(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        pass
+
+    def get_available_deployment(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        pass

From 7fb4e12b9f676c074941f6d2ea820d0b91fe464f Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 20 Jun 2024 14:11:33 -0700
Subject: [PATCH 2/4] test custom routing strat

---
 litellm/tests/test_router_custom_routing.py | 126 ++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 litellm/tests/test_router_custom_routing.py

diff --git a/litellm/tests/test_router_custom_routing.py b/litellm/tests/test_router_custom_routing.py
new file mode 100644
index 000000000..d66c304be
--- /dev/null
+++ b/litellm/tests/test_router_custom_routing.py
@@ -0,0 +1,126 @@
+import asyncio
+import os
+import random
+import sys
+import time
+import traceback
+from datetime import datetime, timedelta
+
+from dotenv import load_dotenv
+
+load_dotenv()
+import copy
+import os
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+from typing import Dict, List, Optional, Union
+
+import pytest
+
+import litellm
+from litellm import Router
+from litellm.caching import DualCache
+from litellm.router import CustomRoutingStrategy as BaseCustomRoutingStrategy
+from litellm.router import Deployment, LiteLLM_Params
+from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
+
+router = Router(
+    model_list=[
+        {
+            "model_name": "azure-model",
+            "litellm_params": {
+                "model": "openai/very-special-endpoint",
+                "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",  # If you are Krrish, this is OpenAI Endpoint3 on our Railway endpoint :)
+                "api_key": "fake-key",
+            },
+            "model_info": {"id": "very-special-endpoint"},
+        },
+        {
+            "model_name": "azure-model",
+            "litellm_params": {
+                "model": "openai/fast-endpoint",
+                "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+                "api_key": "fake-key",
+            },
+            "model_info": {"id": "fast-endpoint"},
+        },
+    ],
+    set_verbose=True,
+    debug_level="DEBUG",
+    timeout=1,
+)  # type: ignore
+
+
+class CustomRoutingStrategy(BaseCustomRoutingStrategy):
+    async def async_get_available_deployment(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        print("In CUSTOM async get available deployment")
+        model_list = router.model_list
+        print("router model list=", model_list)
+        for model in model_list:
+            if isinstance(model, dict):
+                if model["litellm_params"]["model"] == "openai/very-special-endpoint":
+                    return model
+        pass
+
+    def get_available_deployment(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        # used for router.completion() calls
+        pass
+
+
+@pytest.mark.asyncio
+async def test_custom_routing():
+    import litellm
+
+    litellm.set_verbose = True
+    router.set_custom_routing_strategy(CustomRoutingStrategy())
+
+    # make 4 requests
+    for _ in range(4):
+        try:
+            response = await router.acompletion(
+                model="azure-model", messages=[{"role": "user", "content": "hello"}]
+            )
+            print(response)
+        except Exception as e:
+            print("got exception", e)
+
+    await asyncio.sleep(1)
+    print("done sending initial requests to collect latency")
+    """
+    Note: for debugging
+    - By this point: slow-endpoint should have timed out 3-4 times and should be heavily penalized :)
+    - The next 10 requests should all be routed to the fast-endpoint
+    """
+
+    deployments = {}
+    # make 10 requests
+    for _ in range(10):
+        response = await router.acompletion(
+            model="azure-model", messages=[{"role": "user", "content": "hello"}]
+        )
+        print(response)
+        _picked_model_id = response._hidden_params["model_id"]
+        if _picked_model_id not in deployments:
+            deployments[_picked_model_id] = 1
+        else:
+            deployments[_picked_model_id] += 1
+    print("deployments", deployments)
+
+    # ALL the Requests should have been routed to the fast-endpoint
+    # assert deployments["fast-endpoint"] == 10

From 91d9d59717b1cd57e5e39054652ec442f3920faa Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 20 Jun 2024 14:32:52 -0700
Subject: [PATCH 3/4] docs - routing

---
 docs/my-website/docs/routing.md | 123 ++++++++++++++++++++++++++++++--
 1 file changed, 119 insertions(+), 4 deletions(-)

diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index 63fac9456..fd4fb8658 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -95,7 +95,7 @@ print(response)
 - `router.image_generation()` - completion calls in OpenAI `/v1/images/generations` endpoint format
 - `router.aimage_generation()` - async image generation calls
 
-## Advanced - Routing Strategies
+## Advanced - Routing Strategies ⭐️
 #### Routing Strategies - Weighted Pick, Rate Limit Aware, Least Busy, Latency Based, Cost Based
 
 Router provides 4 strategies for routing your calls across multiple deployments: 
@@ -262,7 +262,7 @@ if response is not None:
 	)
 ```
 
-### Set Time Window 
+#### Set Time Window 
 
 Set time window for how far back to consider when averaging latency for a deployment. 
 
@@ -278,7 +278,7 @@ router_settings:
 	routing_strategy_args: {"ttl": 10}
 ```
 
-### Set Lowest Latency Buffer
+#### Set Lowest Latency Buffer
 
 Set a buffer within which deployments are candidates for making calls to. 
 
@@ -468,6 +468,122 @@ asyncio.run(router_acompletion())
 ```
 
 </TabItem>
+
+<TabItem value="custom" label="Custom Routing Strategy">
+
+**Plugin a custom routing strategy to select deployments**
+
+
+Step 1. Define your custom routing strategy
+
+```python
+
+from litellm.router import CustomRoutingStrategyBase
+class CustomRoutingStrategy(CustomRoutingStrategyBase):
+    async def async_get_available_deployment(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        """
+        Asynchronously retrieves the available deployment based on the given parameters.
+
+        Args:
+            model (str): The name of the model.
+            messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+            input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+            specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+            request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+        Returns:
+            Returns an element from litellm.router.model_list
+
+        """
+        print("In CUSTOM async get available deployment")
+        model_list = router.model_list
+        print("router model list=", model_list)
+        for model in model_list:
+            if isinstance(model, dict):
+                if model["litellm_params"]["model"] == "openai/very-special-endpoint":
+                    return model
+        pass
+
+    def get_available_deployment(
+        self,
+        model: str,
+        messages: Optional[List[Dict[str, str]]] = None,
+        input: Optional[Union[str, List]] = None,
+        specific_deployment: Optional[bool] = False,
+        request_kwargs: Optional[Dict] = None,
+    ):
+        """
+        Synchronously retrieves the available deployment based on the given parameters.
+
+        Args:
+            model (str): The name of the model.
+            messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+            input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+            specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+            request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+        Returns:
+            Returns an element from litellm.router.model_list
+
+        """
+        pass
+```
+
+Step 2. Initialize Router with custom routing strategy
+```python
+from litellm import Router
+
+router = Router(
+    model_list=[
+        {
+            "model_name": "azure-model",
+            "litellm_params": {
+                "model": "openai/very-special-endpoint",
+                "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",  # If you are Krrish, this is OpenAI Endpoint3 on our Railway endpoint :)
+                "api_key": "fake-key",
+            },
+            "model_info": {"id": "very-special-endpoint"},
+        },
+        {
+            "model_name": "azure-model",
+            "litellm_params": {
+                "model": "openai/fast-endpoint",
+                "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+                "api_key": "fake-key",
+            },
+            "model_info": {"id": "fast-endpoint"},
+        },
+    ],
+    set_verbose=True,
+    debug_level="DEBUG",
+    timeout=1,
+)  # type: ignore
+
+router.set_custom_routing_strategy(CustomRoutingStrategy()) # 👈 Set your routing strategy here
+```
+
+Step 3. Test your routing strategy. Expect your custom routing strategy to be called when running `router.acompletion` requests
+```python
+for _ in range(10):
+	response = await router.acompletion(
+		model="azure-model", messages=[{"role": "user", "content": "hello"}]
+	)
+	print(response)
+	_picked_model_id = response._hidden_params["model_id"]
+	print("picked model=", _picked_model_id)
+```
+
+
+
+</TabItem>
+
 <TabItem value="lowest-cost" label="Lowest Cost Routing (Async)">
 
 Picks a deployment based on the lowest cost
@@ -563,7 +679,6 @@ asyncio.run(router_acompletion())
 ```
 
 </TabItem>
-
 </Tabs>
 
 ## Basic Reliability

From cdc1e952ac10df63a5a5c7ca624cc7b42338c845 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 20 Jun 2024 14:36:51 -0700
Subject: [PATCH 4/4] router - add doc string

---
 litellm/router.py                           | 15 ++++++--
 litellm/tests/test_router_custom_routing.py | 40 ++++++++++++++++-----
 litellm/types/router.py                     | 30 +++++++++++++++-
 3 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/litellm/router.py b/litellm/router.py
index 08efbc414..b4589c9f0 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -69,7 +69,7 @@ from litellm.types.router import (
     AlertingConfig,
     AllowedFailsPolicy,
     AssistantsTypedDict,
-    CustomRoutingStrategy,
+    CustomRoutingStrategyBase,
     Deployment,
     DeploymentTypedDict,
     LiteLLM_Params,
@@ -4815,7 +4815,18 @@ class Router:
         except Exception as e:
             pass
 
-    def set_custom_routing_strategy(self, CustomRoutingStrategy: CustomRoutingStrategy):
+    def set_custom_routing_strategy(
+        self, CustomRoutingStrategy: CustomRoutingStrategyBase
+    ):
+        """
+        Sets get_available_deployment and async_get_available_deployment on an instanced of litellm.Router
+
+        Use this to set your custom routing strategy
+
+        Args:
+            CustomRoutingStrategy: litellm.router.CustomRoutingStrategyBase
+        """
+
         setattr(
             self,
             "get_available_deployment",
diff --git a/litellm/tests/test_router_custom_routing.py b/litellm/tests/test_router_custom_routing.py
index d66c304be..afd602b93 100644
--- a/litellm/tests/test_router_custom_routing.py
+++ b/litellm/tests/test_router_custom_routing.py
@@ -21,10 +21,6 @@ import pytest
 
 import litellm
 from litellm import Router
-from litellm.caching import DualCache
-from litellm.router import CustomRoutingStrategy as BaseCustomRoutingStrategy
-from litellm.router import Deployment, LiteLLM_Params
-from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
 
 router = Router(
     model_list=[
@@ -49,11 +45,12 @@ router = Router(
     ],
     set_verbose=True,
     debug_level="DEBUG",
-    timeout=1,
-)  # type: ignore
+)
+
+from litellm.router import CustomRoutingStrategyBase
 
 
-class CustomRoutingStrategy(BaseCustomRoutingStrategy):
+class CustomRoutingStrategy(CustomRoutingStrategyBase):
     async def async_get_available_deployment(
         self,
         model: str,
@@ -62,6 +59,20 @@ class CustomRoutingStrategy(BaseCustomRoutingStrategy):
         specific_deployment: Optional[bool] = False,
         request_kwargs: Optional[Dict] = None,
     ):
+        """
+        Asynchronously retrieves the available deployment based on the given parameters.
+
+        Args:
+            model (str): The name of the model.
+            messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+            input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+            specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+            request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+        Returns:
+            Returns an element from litellm.router.model_list
+
+        """
         print("In CUSTOM async get available deployment")
         model_list = router.model_list
         print("router model list=", model_list)
@@ -79,7 +90,20 @@ class CustomRoutingStrategy(BaseCustomRoutingStrategy):
         specific_deployment: Optional[bool] = False,
         request_kwargs: Optional[Dict] = None,
     ):
-        # used for router.completion() calls
+        """
+        Synchronously retrieves the available deployment based on the given parameters.
+
+        Args:
+            model (str): The name of the model.
+            messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+            input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+            specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+            request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+        Returns:
+            Returns an element from litellm.router.model_list
+
+        """
         pass
 
 
diff --git a/litellm/types/router.py b/litellm/types/router.py
index 25b1b5c9c..206216ef0 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -453,7 +453,7 @@ class AssistantsTypedDict(TypedDict):
     litellm_params: LiteLLMParamsTypedDict
 
 
-class CustomRoutingStrategy:
+class CustomRoutingStrategyBase:
     async def async_get_available_deployment(
         self,
         model: str,
@@ -462,6 +462,20 @@ class CustomRoutingStrategy:
         specific_deployment: Optional[bool] = False,
         request_kwargs: Optional[Dict] = None,
     ):
+        """
+        Asynchronously retrieves the available deployment based on the given parameters.
+
+        Args:
+            model (str): The name of the model.
+            messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+            input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+            specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+            request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+        Returns:
+            Returns an element from litellm.router.model_list
+
+        """
         pass
 
     def get_available_deployment(
@@ -472,4 +486,18 @@ class CustomRoutingStrategy:
         specific_deployment: Optional[bool] = False,
         request_kwargs: Optional[Dict] = None,
     ):
+        """
+        Synchronously retrieves the available deployment based on the given parameters.
+
+        Args:
+            model (str): The name of the model.
+            messages (Optional[List[Dict[str, str]]], optional): The list of messages for a given request. Defaults to None.
+            input (Optional[Union[str, List]], optional): The input for a given embedding request. Defaults to None.
+            specific_deployment (Optional[bool], optional): Whether to retrieve a specific deployment. Defaults to False.
+            request_kwargs (Optional[Dict], optional): Additional request keyword arguments. Defaults to None.
+
+        Returns:
+            Returns an element from litellm.router.model_list
+
+        """
         pass