LiteLLM Minor Fixes & Improvements (01/10/2025) - p1 (#7670)

* test(test_get_model_info.py): add unit test confirming router deployment updates global 'get_model_info' * fix(get_supported_openai_params.py): fix custom llm provider 'get_supported_openai_params' Fixes https://github.com/BerriAI/litellm/issues/7668 * docs(azure.md): clarify how azure ad token refresh on proxy works Closes https://github.com/BerriAI/litellm/issues/7665
2025-04-25 02:34:29 +00:00 · 2025-01-10 17:49:05 -08:00 · 2025-01-10 17:49:05 -08:00 · a3e65c9bcb
commit a3e65c9bcb
parent 8576ca8ccb
6 changed files with 112 additions and 5 deletions
--- a/docs/my-website/docs/providers/azure.md
+++ b/docs/my-website/docs/providers/azure.md
@ -587,6 +587,16 @@ response = completion(
 </TabItem>
 <TabItem value="proxy" label="PROXY config.yaml">

+1. Add relevant env vars
+
+```bash
+export AZURE_TENANT_ID=""
+export AZURE_CLIENT_ID=""
+export AZURE_CLIENT_SECRET=""
+```
+
+2. Setup config.yaml
+
 ```yaml
 model_list:
  - model_name: gpt-3.5-turbo
@ -598,6 +608,12 @@ litellm_settings:
    enable_azure_ad_token_refresh: true # 👈 KEY CHANGE
 ```

+3. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
 </TabItem>
 </Tabs>

--- a/litellm/litellm_core_utils/get_supported_openai_params.py
+++ b/litellm/litellm_core_utils/get_supported_openai_params.py
@ -1,6 +1,7 @@
 from typing import Literal, Optional

 import litellm
+from litellm import LlmProviders
 from litellm.exceptions import BadRequestError


@ -199,5 +200,15 @@ def get_supported_openai_params(  # noqa: PLR0915
                    model=model
                )
            )
+    elif custom_llm_provider in litellm._custom_providers:
+        if request_type == "chat_completion":
+            provider_config = litellm.ProviderConfigManager.get_provider_chat_config(
+                model=model, provider=LlmProviders.CUSTOM
+            )
+            return provider_config.get_supported_openai_params(model=model)
+        elif request_type == "embeddings":
+            return None
+        elif request_type == "transcription":
+            return None

    return None
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -30,6 +30,7 @@ model_list:
    model_info:
      input_cost_per_token: 0.0000006
      output_cost_per_token: 0.0000006
+  

 # litellm_settings:
 #   key_generation_settings:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1946,16 +1946,15 @@ def register_model(model_cost: Union[str, dict]):  # noqa: PLR0915
    for key, value in loaded_model_cost.items():
        ## get model info ##
        try:
-            existing_model: Union[ModelInfo, dict] = get_model_info(model=key)
+            existing_model: dict = cast(dict, get_model_info(model=key))
            model_cost_key = existing_model["key"]
        except Exception:
            existing_model = {}
            model_cost_key = key
        ## override / add new keys to the existing model cost dictionary
-        litellm.model_cost.setdefault(model_cost_key, {}).update(
-            _update_dictionary(existing_model, value)  # type: ignore
-        )
-        verbose_logger.debug(f"{key} added to model cost map")
+        updated_dictionary = _update_dictionary(existing_model, value)
+        litellm.model_cost.setdefault(model_cost_key, {}).update(updated_dictionary)
+        verbose_logger.debug(f"{model_cost_key} added to model cost map")
        # add new model names to provider lists
        if value.get("litellm_provider") == "openai":
            if key not in litellm.open_ai_chat_completion_models:
--- a/tests/local_testing/test_custom_llm.py
+++ b/tests/local_testing/test_custom_llm.py
@ -397,3 +397,58 @@ async def test_image_generation_async_additional_params():
        mock_client.call_args.kwargs["optional_params"] == {
            "my_custom_param": "my-custom-param"
        }
+
+
+def test_get_supported_openai_params():
+
+    class MyCustomLLM(CustomLLM):
+
+        # This is what `get_supported_openai_params` should be returning:
+        def get_supported_openai_params(self, model: str) -> list[str]:
+            return [
+                "tools",
+                "tool_choice",
+                "temperature",
+                "top_p",
+                "top_k",
+                "min_p",
+                "typical_p",
+                "stop",
+                "seed",
+                "response_format",
+                "max_tokens",
+                "presence_penalty",
+                "frequency_penalty",
+                "repeat_penalty",
+                "tfs_z",
+                "mirostat_mode",
+                "mirostat_tau",
+                "mirostat_eta",
+                "logit_bias",
+            ]
+
+        def completion(self, *args, **kwargs) -> litellm.ModelResponse:
+            return litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": "Hello world"}],
+                mock_response="Hi!",
+            )  # type: ignore
+
+    my_custom_llm = MyCustomLLM()
+
+    litellm.custom_provider_map = [  # 👈 KEY STEP - REGISTER HANDLER
+        {"provider": "my-custom-llm", "custom_handler": my_custom_llm}
+    ]
+
+    resp = completion(
+        model="my-custom-llm/my-fake-model",
+        messages=[{"role": "user", "content": "Hello world!"}],
+    )
+
+    assert resp.choices[0].message.content == "Hi!"
+
+    # Get supported openai params
+    from litellm import get_supported_openai_params
+
+    response = get_supported_openai_params(model="my-custom-llm/my-fake-model")
+    assert response is not None
--- a/tests/local_testing/test_get_model_info.py
+++ b/tests/local_testing/test_get_model_info.py
@ -285,3 +285,28 @@ def test_get_model_info_custom_provider():
    get_model_info(
        model="my-custom-llm/my-fake-model"
    )  # 💥 "Exception: This model isn't mapped yet." in v1.56.10
+
+
+def test_get_model_info_custom_model_router():
+    from litellm import Router
+    from litellm import get_model_info
+
+    litellm._turn_on_debug()
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "ma-summary",
+                "litellm_params": {
+                    "api_base": "http://ma-mix-llm-serving.cicero.svc.cluster.local/v1",
+                    "input_cost_per_token": 1,
+                    "output_cost_per_token": 1,
+                    "model": "openai/meta-llama/Meta-Llama-3-8B-Instruct",
+                    "model_id": "c20d603e-1166-4e0f-aa65-ed9c476ad4ca",
+                },
+            }
+        ]
+    )
+    info = get_model_info("openai/meta-llama/Meta-Llama-3-8B-Instruct")
+    print("info", info)
+    assert info is not None