diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 28cafe02c..6505d432d 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -187,11 +187,15 @@ def test_cohere_embedding3(): def test_bedrock_embedding_titan(): try: # this tests if we support str input for bedrock embedding - litellm.set_verbose = False + litellm.set_verbose = True + litellm.enable_cache() + import time + + current_time = str(time.time()) # DO NOT MAKE THE INPUT A LIST in this test response = embedding( - model="amazon.titan-embed-text-v1", - input="good morning from litellm, attempting to embed data", # input should always be a string in this test + model="bedrock/amazon.titan-embed-text-v1", + input=f"good morning from litellm, attempting to embed data {current_time}", # input should always be a string in this test ) print(f"response:", response) assert isinstance( @@ -201,8 +205,23 @@ def test_bedrock_embedding_titan(): assert all( isinstance(x, float) for x in response["data"][0]["embedding"] ), "Expected response to be a list of floats" - print("Response Usage", response.usage) - assert response.usage.prompt_tokens == 11 + + # this also tests if we can return a cache response for this scenario + import time + + start_time = time.time() + + response = embedding( + model="bedrock/amazon.titan-embed-text-v1", + input=f"good morning from litellm, attempting to embed data {current_time}", # input should always be a string in this test + ) + print(response) + + end_time = time.time() + print(f"Embedding 2 response time: {end_time - start_time} seconds") + + assert end_time - start_time < 0.1 + litellm.disable_cache() except Exception as e: pytest.fail(f"Error occurred: {e}")