fix(bedrock_httpx.py): move bedrock ai21 calls to being async

This commit is contained in:
Krrish Dholakia 2024-05-16 22:21:30 -07:00
parent 180bc46ca4
commit 0293f7766a
5 changed files with 88 additions and 71 deletions

View file

@ -2665,7 +2665,12 @@ def response_format_tests(response: litellm.ModelResponse):
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.parametrize(
"model",
["bedrock/cohere.command-r-plus-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0"],
[
"bedrock/cohere.command-r-plus-v1:0",
"anthropic.claude-3-sonnet-20240229-v1:0",
"anthropic.claude-instant-v1",
"bedrock/ai21.j2-mid",
],
)
@pytest.mark.asyncio
async def test_completion_bedrock_httpx_models(sync_mode, model):
@ -2675,6 +2680,8 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
response = completion(
model=model,
messages=[{"role": "user", "content": "Hey! how's it going?"}],
temperature=0.2,
max_tokens=200,
)
assert isinstance(response, litellm.ModelResponse)
@ -2684,6 +2691,8 @@ async def test_completion_bedrock_httpx_models(sync_mode, model):
response = await litellm.acompletion(
model=model,
messages=[{"role": "user", "content": "Hey! how's it going?"}],
temperature=0.2,
max_tokens=200,
)
assert isinstance(response, litellm.ModelResponse)
@ -2740,48 +2749,9 @@ def test_completion_bedrock_titan():
# test_completion_bedrock_titan()
def test_completion_bedrock_claude():
print("calling claude")
try:
response = completion(
model="anthropic.claude-instant-v1",
messages=messages,
max_tokens=10,
temperature=0.1,
logger_fn=logger_fn,
)
# Add any assertions here to check the response
print(response)
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude()
def test_completion_bedrock_cohere():
print("calling bedrock cohere")
litellm.set_verbose = True
try:
response = completion(
model="bedrock/cohere.command-text-v14",
messages=[{"role": "user", "content": "hi"}],
temperature=0.1,
max_tokens=10,
stream=True,
)
# Add any assertions here to check the response
print(response)
for chunk in response:
print(chunk)
except RateLimitError:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_cohere()
@ -2804,23 +2774,6 @@ def test_completion_bedrock_cohere():
# pytest.fail(f"Error occurred: {e}")
# test_completion_bedrock_claude_stream()
# def test_completion_bedrock_ai21():
# try:
# litellm.set_verbose = False
# response = completion(
# model="bedrock/ai21.j2-mid",
# messages=messages,
# temperature=0.2,
# top_p=0.2,
# max_tokens=20
# )
# # Add any assertions here to check the response
# print(response)
# except RateLimitError:
# pass
# except Exception as e:
# pytest.fail(f"Error occurred: {e}")
######## Test VLLM ########
# def test_completion_vllm():