fix(router.py): generate consistent model id's

having the same id for a deployment, lets redis usage caching work across multiple instances
2024-04-10 15:23:57 -07:00 · 2024-04-10 15:23:57 -07:00 · a47a719caa
commit a47a719caa
parent 180cf9bd5c
4 changed files with 78 additions and 9 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -11,7 +11,7 @@ import copy, httpx
 from datetime import datetime
 from typing import Dict, List, Optional, Union, Literal, Any, BinaryIO
 import random, threading, time, traceback, uuid
-import litellm, openai
+import litellm, openai, hashlib, json
 from litellm.caching import RedisCache, InMemoryCache, DualCache
 import logging, asyncio
@ -2072,6 +2072,34 @@ class Router:
                    local_only=True,
                )  # cache for 1 hr
    def _generate_model_id(self, model_group: str, litellm_params: dict):
        """
        Helper function to consistently generate the same id for a deployment
        - create a string from all the litellm params
        - hash
        - use hash as id
        """
        concat_str = model_group
        for k, v in litellm_params.items():
            if isinstance(k, str):
                concat_str += k
            elif isinstance(k, dict):
                concat_str += json.dumps(k)
            else:
                concat_str += str(k)
            if isinstance(v, str):
                concat_str += v
            elif isinstance(v, dict):
                concat_str += json.dumps(v)
            else:
                concat_str += str(v)
        hash_object = hashlib.sha256(concat_str.encode())
        return hash_object.hexdigest()
    def set_model_list(self, model_list: list):
        original_model_list = copy.deepcopy(model_list)
        self.model_list = []
@ -2087,7 +2115,13 @@ class Router:
                    if isinstance(v, str) and v.startswith("os.environ/"):
                        _litellm_params[k] = litellm.get_secret(v)
-            _model_info = model.pop("model_info", {})
+            _model_info: dict = model.pop("model_info", {})
            # check if model info has id
            if "id" not in _model_info:
                _id = self._generate_model_id(_model_name, _litellm_params)
                _model_info["id"] = _id
            deployment = Deployment(
                **model,
                model_name=_model_name,
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -3,6 +3,7 @@
 import dotenv, os, requests, random
 from typing import Optional, Union, List, Dict
 import datetime as datetime_og
 from datetime import datetime
 dotenv.load_dotenv()  # Loading env variables using dotenv
@ -59,7 +60,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                # ------------
                # Setup values
                # ------------
-                current_minute = datetime.now().strftime("%H-%M")
+                current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
                tpm_key = f"{model_group}:tpm:{current_minute}"
                rpm_key = f"{model_group}:rpm:{current_minute}"
@ -109,7 +110,9 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
                # ------------
                # Setup values
                # ------------
-                current_minute = datetime.now().strftime("%H-%M")
+                current_minute = datetime.now(datetime_og.UTC).strftime(
                    "%H-%M"
                )  # use the same timezone regardless of system clock
                tpm_key = f"{id}:tpm:{current_minute}"
                rpm_key = f"{id}:rpm:{current_minute}"
@ -162,7 +165,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
            f"get_available_deployments - Usage Based. model_group: {model_group}, healthy_deployments: {healthy_deployments}"
        )
-        current_minute = datetime.now().strftime("%H-%M")
+        current_minute = datetime.now(datetime_og.UTC).strftime("%H-%M")
        tpm_keys = []
        rpm_keys = []
        for m in healthy_deployments:
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@ -932,6 +932,35 @@ def test_openai_completion_on_router():
 # test_openai_completion_on_router()
 def test_consistent_model_id():
    """
    - For a given model group + litellm params, assert the model id is always the same
    Test on `_generate_model_id`
    Test on `set_model_list`
    Test on `_add_deployment`
    """
    model_group = "gpt-3.5-turbo"
    litellm_params = {
        "model": "openai/my-fake-model",
        "api_key": "my-fake-key",
        "api_base": "https://openai-function-calling-workers.tasslexyz.workers.dev/",
        "stream_timeout": 0.001,
    }
    id1 = Router()._generate_model_id(
        model_group=model_group, litellm_params=litellm_params
    )
    id2 = Router()._generate_model_id(
        model_group=model_group, litellm_params=litellm_params
    )
    assert id1 == id2
 def test_reading_keys_os_environ():
    import openai
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -13,7 +13,7 @@ class ModelConfig(BaseModel):
    rpm: int
    class Config:
-        protected_namespaces = ()     
+        protected_namespaces = ()
 class RouterConfig(BaseModel):
@ -45,7 +45,8 @@ class RouterConfig(BaseModel):
    ] = "simple-shuffle"
    class Config:
-        protected_namespaces = ()      
+        protected_namespaces = ()
 class ModelInfo(BaseModel):
    id: Optional[
@ -132,9 +133,11 @@ class Deployment(BaseModel):
    litellm_params: LiteLLM_Params
    model_info: ModelInfo
-    def __init__(self, model_info: Optional[ModelInfo] = None, **params):
+    def __init__(self, model_info: Optional[Union[ModelInfo, dict]] = None, **params):
        if model_info is None:
            model_info = ModelInfo()
        elif isinstance(model_info, dict):
            model_info = ModelInfo(**model_info)
        super().__init__(model_info=model_info, **params)
    def to_json(self, **kwargs):
@ -146,7 +149,7 @@ class Deployment(BaseModel):
    class Config:
        extra = "allow"
-        protected_namespaces = ()      
+        protected_namespaces = ()
    def __contains__(self, key):
        # Define custom behavior for the 'in' operator