feat(proxy_server.py): expose new /model_group/info endpoint

returns model-group level info on supported params, max tokens, pricing, etc.
2025-04-25 10:44:24 +00:00 · 2024-05-26 14:07:35 -07:00 · 2024-05-26 14:07:35 -07:00 · 22b6b99b34
commit 22b6b99b34
parent bec13d465a
6 changed files with 191 additions and 16 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -34,7 +34,7 @@ from dataclasses import (
 import litellm._service_logger  # for storing API inputs, outputs, and metadata
 from litellm.llms.custom_httpx.http_handler import HTTPHandler
 from litellm.caching import DualCache
-from litellm.types.utils import CostPerToken, ProviderField
+from litellm.types.utils import CostPerToken, ProviderField, ModelInfo

 oidc_cache = DualCache()

@ -7092,7 +7092,7 @@ def get_max_tokens(model: str):
        )


-def get_model_info(model: str):
+def get_model_info(model: str) -> ModelInfo:
    """
    Get a dict for the maximum tokens (context window),
    input_cost_per_token, output_cost_per_token  for a given model.
@ -7154,7 +7154,7 @@ def get_model_info(model: str):
        if custom_llm_provider == "huggingface":
            max_tokens = _get_max_position_embeddings(model_name=model)
            return {
-                "max_tokens": max_tokens,
+                "max_tokens": max_tokens,  # type: ignore
                "input_cost_per_token": 0,
                "output_cost_per_token": 0,
                "litellm_provider": "huggingface",