Merge pull request #5431 from BerriAI/litellm_Add_fireworks_ai_health_check

[Fix-Proxy] /health check for provider wildcard models (fireworks/*)
2025-04-25 18:54:30 +00:00 · 2024-08-29 14:25:05 -07:00 · 2024-08-29 14:25:05 -07:00 · 5851a8f901
commit 5851a8f901
parent 559a6ad826 284b8b3418
4 changed files with 71 additions and 2 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -363,6 +363,7 @@ ai21_models: List = []
 nlp_cloud_models: List = []
 aleph_alpha_models: List = []
 bedrock_models: List = []
+fireworks_ai_models: List = []
 deepinfra_models: List = []
 perplexity_models: List = []
 watsonx_models: List = []
@ -423,6 +424,8 @@ for key, value in model_cost.items():
        watsonx_models.append(key)
    elif value.get("litellm_provider") == "gemini":
        gemini_models.append(key)
+    elif value.get("litellm_provider") == "fireworks_ai":
+        fireworks_ai_models.append(key)
 # known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
 openai_compatible_endpoints: List = [
    "api.perplexity.ai",
@ -726,6 +729,7 @@ models_by_provider: dict = {
    "maritalk": maritalk_models,
    "watsonx": watsonx_models,
    "gemini": gemini_models,
+    "fireworks_ai": fireworks_ai_models,
 }

 # mapping for those models which have larger equivalents
--- a/litellm/litellm_core_utils/llm_request_utils.py
+++ b/litellm/litellm_core_utils/llm_request_utils.py
@ -1,5 +1,7 @@
 from typing import Dict, Optional

+import litellm
+

 def _ensure_extra_body_is_safe(extra_body: Optional[Dict]) -> Optional[Dict]:
    """
@ -26,3 +28,26 @@ def _ensure_extra_body_is_safe(extra_body: Optional[Dict]) -> Optional[Dict]:
                extra_body["metadata"]["prompt"] = _prompt.__dict__

    return extra_body
+
+
+def pick_cheapest_model_from_llm_provider(custom_llm_provider: str):
+    """
+    Pick a random model from the LLM provider.
+    """
+    if custom_llm_provider not in litellm.models_by_provider:
+        raise ValueError(f"Unknown LLM provider: {custom_llm_provider}")
+
+    known_models = litellm.models_by_provider.get(custom_llm_provider, [])
+    min_cost = float("inf")
+    cheapest_model = None
+    for model in known_models:
+        model_info = litellm.get_model_info(
+            model=model, custom_llm_provider=custom_llm_provider
+        )
+        _cost = model_info.get("input_cost_per_token", 0) + model_info.get(
+            "output_cost_per_token", 0
+        )
+        if _cost < min_cost:
+            min_cost = _cost
+            cheapest_model = model
+    return cheapest_model
--- a/litellm/main.py
+++ b/litellm/main.py
@ -5076,6 +5076,18 @@ async def ahealth_check(
                model_params["prompt"] = prompt
                await litellm.aimage_generation(**model_params)
                response = {}
+            elif "*" in model:
+                from litellm.litellm_core_utils.llm_request_utils import (
+                    pick_cheapest_model_from_llm_provider,
+                )
+
+                # this is a wildcard model, we need to pick a random model from the provider
+                cheapest_model = pick_cheapest_model_from_llm_provider(
+                    custom_llm_provider=custom_llm_provider
+                )
+                model_params["model"] = cheapest_model
+                await acompletion(**model_params)
+                response = {}  # args like remaining ratelimit etc.
            else:  # default to completion calls
                await acompletion(**model_params)
                response = {}  # args like remaining ratelimit etc.
--- a/litellm/tests/test_health_check.py
+++ b/litellm/tests/test_health_check.py
@ -1,14 +1,18 @@
 #### What this tests ####
 #    This tests if ahealth_check() actually works

-import sys, os
+import os
+import sys
 import traceback
+
 import pytest

 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
-import litellm, asyncio
+import asyncio
+
+import litellm


@pytest.mark.asyncio
@ -105,3 +109,27 @@ async def test_sagemaker_embedding_health_check():


 # asyncio.run(test_sagemaker_embedding_health_check())
+
+
+@pytest.mark.asyncio
+async def test_fireworks_health_check():
+    """
+    This should not fail
+
+    ensure that provider wildcard model passes health check
+    """
+    response = await litellm.ahealth_check(
+        model_params={
+            "api_key": os.environ.get("FIREWORKS_AI_API_KEY"),
+            "model": "fireworks_ai/*",
+            "messages": [{"role": "user", "content": "What's 1 + 1?"}],
+        },
+        mode=None,
+        prompt="What's 1 + 1?",
+        input=["test from litellm"],
+        default_timeout=6000,
+    )
+    print(f"response: {response}")
+    assert response == {}
+
+    return response