fix(bedrock_httpx.py): move bedrock ai21 calls to being async

2025-04-26 03:04:13 +00:00 · 2024-05-16 22:21:30 -07:00 · 2024-05-16 22:21:30 -07:00 · 0293f7766a
commit 0293f7766a
parent 180bc46ca4
5 changed files with 88 additions and 71 deletions
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -2665,7 +2665,12 @@ def response_format_tests(response: litellm.ModelResponse):
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize(
    "model",
-    ["bedrock/cohere.command-r-plus-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"],
+    [
+        "bedrock/cohere.command-r-plus-v1:0",
+        "anthropic.claude-3-sonnet-20240229-v1:0",
+        "anthropic.claude-instant-v1",
+        "bedrock/ai21.j2-mid",
+    ],
 )
@pytest.mark.asyncio
 async def test_completion_bedrock_httpx_models(sync_mode, model):
@ -2675,6 +2680,8 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
        response = completion(
            model=model,
            messages=[{"role": "user", "content": "Hey! how's it going?"}],
+            temperature=0.2,
+            max_tokens=200,
        )

        assert isinstance(response, litellm.ModelResponse)
@ -2684,6 +2691,8 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
        response = await litellm.acompletion(
            model=model,
            messages=[{"role": "user", "content": "Hey! how's it going?"}],
+            temperature=0.2,
+            max_tokens=200,
        )

        assert isinstance(response, litellm.ModelResponse)
@ -2740,48 +2749,9 @@ def test_completion_bedrock_titan():
 # test_completion_bedrock_titan()


-def test_completion_bedrock_claude():
-    print("calling claude")
-    try:
-        response = completion(
-            model="anthropic.claude-instant-v1",
-            messages=messages,
-            max_tokens=10,
-            temperature=0.1,
-            logger_fn=logger_fn,
-        )
-        # Add any assertions here to check the response
-        print(response)
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
 # test_completion_bedrock_claude()


-def test_completion_bedrock_cohere():
-    print("calling bedrock cohere")
-    litellm.set_verbose = True
-    try:
-        response = completion(
-            model="bedrock/cohere.command-text-v14",
-            messages=[{"role": "user", "content": "hi"}],
-            temperature=0.1,
-            max_tokens=10,
-            stream=True,
-        )
-        # Add any assertions here to check the response
-        print(response)
-        for chunk in response:
-            print(chunk)
-    except RateLimitError:
-        pass
-    except Exception as e:
-        pytest.fail(f"Error occurred: {e}")
-
-
 # test_completion_bedrock_cohere()


@ -2804,23 +2774,6 @@ def test_completion_bedrock_cohere():
 #         pytest.fail(f"Error occurred: {e}")
 # test_completion_bedrock_claude_stream()

-# def test_completion_bedrock_ai21():
-#     try:
-#         litellm.set_verbose = False
-#         response = completion(
-#             model="bedrock/ai21.j2-mid",
-#             messages=messages,
-#             temperature=0.2,
-#             top_p=0.2,
-#             max_tokens=20
-#         )
-#         # Add any assertions here to check the response
-#         print(response)
-#     except RateLimitError:
-#         pass
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
-

 ######## Test VLLM ########
 # def test_completion_vllm():