fix(utils.py): support get_max_tokens() call with same model_name as completion

Closes https://github.com/BerriAI/litellm/issues/3921
2025-04-25 10:44:24 +00:00 · 2024-05-31 21:37:25 -07:00 · 2024-05-31 21:37:25 -07:00 · 7523f803d2
commit 7523f803d2
parent b8df5d1a01
2 changed files with 19 additions and 0 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -7065,6 +7065,11 @@ def get_max_tokens(model: str):
        if custom_llm_provider == "huggingface":
            max_tokens = _get_max_position_embeddings(model_name=model)
            return max_tokens
+        if model in litellm.model_cost:  # check if extracted model is in model_list
+            if "max_output_tokens" in litellm.model_cost[model]:
+                return litellm.model_cost[model]["max_output_tokens"]
+            elif "max_tokens" in litellm.model_cost[model]:
+                return litellm.model_cost[model]["max_tokens"]
        else:
            raise Exception()
    except: