Merge pull request #1785 from BerriAI/litellm_fix_health_check

fix(main.py): for health checks, don't use cached responses
2025-04-25 18:54:30 +00:00 · 2024-02-02 17:47:15 -08:00 · 2024-02-02 17:47:15 -08:00 · a1c567a47d
commit a1c567a47d
parent 6767e808a6 62ad6f19b7
2 changed files with 22 additions and 0 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3259,6 +3259,9 @@ async def ahealth_check(
                organization=organization,
            )
        else:
+            model_params["cache"] = {
+                "no-cache": True
+            }  # don't used cached responses for making health check calls
            if mode == "embedding":
                model_params.pop("messages", None)
                model_params["input"] = input
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -1694,6 +1694,25 @@ def test_completion_anyscale_api():
 # test_completion_anyscale_api()


+def test_completion_cohere():
+    try:
+        # litellm.set_verbose=True
+        messages = [
+            {"role": "system", "content": "You're a good bot"},
+            {
+                "role": "user",
+                "content": "Hey",
+            },
+        ]
+        response = completion(
+            model="command-nightly",
+            messages=messages,
+        )
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+
 def test_azure_cloudflare_api():
    litellm.set_verbose = True
    try: