fix(router.py): generate consistent model id's

having the same id for a deployment, lets redis usage caching work across multiple instances
2024-04-10 15:23:57 -07:00 · 2024-04-10 15:23:57 -07:00 · a47a719caa
commit a47a719caa
parent 180cf9bd5c
4 changed files with 78 additions and 9 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -11,7 +11,7 @@ import copy, httpx
 from datetime import datetime
 from typing import Dict, List, Optional, Union, Literal, Any, BinaryIO
 import random, threading, time, traceback, uuid
-import litellm, openai
+import litellm, openai, hashlib, json
 from litellm.caching import RedisCache, InMemoryCache, DualCache

 import logging, asyncio
@ -2072,6 +2072,34 @@ class Router:
                    local_only=True,
                )  # cache for 1 hr

+    def _generate_model_id(self, model_group: str, litellm_params: dict):
+        """
+        Helper function to consistently generate the same id for a deployment
+
+        - create a string from all the litellm params
+        - hash
+        - use hash as id
+        """
+        concat_str = model_group
+        for k, v in litellm_params.items():
+            if isinstance(k, str):
+                concat_str += k
+            elif isinstance(k, dict):
+                concat_str += json.dumps(k)
+            else:
+                concat_str += str(k)
+
+            if isinstance(v, str):
+                concat_str += v
+            elif isinstance(v, dict):
+                concat_str += json.dumps(v)
+            else:
+                concat_str += str(v)
+
+        hash_object = hashlib.sha256(concat_str.encode())
+
+        return hash_object.hexdigest()
+
    def set_model_list(self, model_list: list):
        original_model_list = copy.deepcopy(model_list)
        self.model_list = []
@ -2087,7 +2115,13 @@ class Router:
                    if isinstance(v, str) and v.startswith("os.environ/"):
                        _litellm_params[k] = litellm.get_secret(v)

-            _model_info = model.pop("model_info", {})
+            _model_info: dict = model.pop("model_info", {})
+
+            # check if model info has id
+            if "id" not in _model_info:
+                _id = self._generate_model_id(_model_name, _litellm_params)
+                _model_info["id"] = _id
+
            deployment = Deployment(
                **model,
                model_name=_model_name,
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -3,6 +3,7 @@

 import dotenv, os, requests, random
 from typing import Optional, Union, List, Dict
+import datetime as datetime_og
 from datetime import datetime

 dotenv.load_dotenv()  # Loading env variables using dotenv
@ -59,7 +60,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                # ------------
                # Setup values
                # ------------
-                current_minute = datetime.now().strftime("%H-%M")
+                current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
                tpm_key = f"{model_group}:tpm:{current_minute}"
                rpm_key = f"{model_group}:rpm:{current_minute}"

@ -109,7 +110,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                # ------------
                # Setup values
                # ------------
-                current_minute = datetime.now().strftime("%H-%M")
+                current_minute = datetime.now(datetime_og.UTC).strftime(
+                    "%H-%M"
+                )  # use the same timezone regardless of system clock

                tpm_key = f"{id}:tpm:{current_minute}"
                rpm_key = f"{id}:rpm:{current_minute}"
@ -162,7 +165,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
            f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}"
        )

-        current_minute = datetime.now().strftime("%H-%M")
+        current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
        tpm_keys = []
        rpm_keys = []
        for m in healthy_deployments:
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -932,6 +932,35 @@ def test_openai_completion_on_router():
 # test_openai_completion_on_router()


+def test_consistent_model_id():
+    """
+    - For a given model group + litellm params, assert the model id is always the same
+
+    Test on `_generate_model_id`
+
+    Test on `set_model_list`
+
+    Test on `_add_deployment`
+    """
+    model_group = "gpt-3.5-turbo"
+    litellm_params = {
+        "model": "openai/my-fake-model",
+        "api_key": "my-fake-key",
+        "api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/",
+        "stream_timeout": 0.001,
+    }
+
+    id1 = Router()._generate_model_id(
+        model_group=model_group, litellm_params=litellm_params
+    )
+
+    id2 = Router()._generate_model_id(
+        model_group=model_group, litellm_params=litellm_params
+    )
+
+    assert id1 == id2
+
+
 def test_reading_keys_os_environ():
    import openai

--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -47,6 +47,7 @@ class RouterConfig(BaseModel):
    class Config:
        protected_namespaces = ()

+
 class ModelInfo(BaseModel):
    id: Optional[
        str
@ -132,9 +133,11 @@ class Deployment(BaseModel):
    litellm_params: LiteLLM_Params
    model_info: ModelInfo

-    def __init__(self, model_info: Optional[ModelInfo] = None, **params):
+    def __init__(self, model_info: Optional[Union[ModelInfo, dict]] = None, **params):
        if model_info is None:
            model_info = ModelInfo()
+        elif isinstance(model_info, dict):
+            model_info = ModelInfo(**model_info)
        super().__init__(model_info=model_info, **params)

    def to_json(self, **kwargs):