From ea952a57b039d3060d800b459bae62a45cb392f6 Mon Sep 17 00:00:00 2001 From: Simon Sanchez Viloria Date: Sun, 7 Jul 2024 17:56:01 +0200 Subject: [PATCH] (test - watsonx) Added tests for watsonx embeddings with mocked endpoints --- litellm/tests/test_embedding.py | 67 ++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 5 deletions(-) diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py index 30988dba1c..94fe407d1a 100644 --- a/litellm/tests/test_embedding.py +++ b/litellm/tests/test_embedding.py @@ -11,6 +11,7 @@ sys.path.insert( ) # Adds the parent directory to the system path import litellm from litellm import embedding, completion, completion_cost +from unittest.mock import MagicMock, patch litellm.set_verbose = False @@ -484,14 +485,70 @@ def test_mistral_embeddings(): pytest.fail(f"Error occurred: {e}") -@pytest.mark.skip(reason="local test") def test_watsonx_embeddings(): + + def mock_wx_embed_request(method:str, url:str, **kwargs): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Type": "application/json"} + mock_response.json.return_value = { + "model_id": "ibm/slate-30m-english-rtrvr", + "created_at": "2024-01-01T00:00:00.00Z", + "results": [ {"embedding": [0.0]*254} ], + "input_token_count": 8 + } + return mock_response + try: litellm.set_verbose = True - response = litellm.embedding( - model="watsonx/ibm/slate-30m-english-rtrvr", - input=["good morning from litellm"], - ) + with patch("requests.request", side_effect=mock_wx_embed_request): + response = litellm.embedding( + model="watsonx/ibm/slate-30m-english-rtrvr", + input=["good morning from litellm"], + token="secret-token" + ) + print(f"response: {response}") + assert isinstance(response.usage, litellm.Usage) + except litellm.RateLimitError as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +@pytest.mark.asyncio +async def test_watsonx_aembeddings(): + + def mock_async_client(*args, **kwargs): + + class MockedAsyncClient: + def __init__(*args, **kwargs): + pass + + async def send(self, request, *args, stream: bool = False, **kwags): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"Content-Type": "application/json"} + mock_response.json.return_value = { + "model_id": "ibm/slate-30m-english-rtrvr", + "created_at": "2024-01-01T00:00:00.00Z", + "results": [ {"embedding": [0.0]*254} ], + "input_token_count": 8 + } + mock_response.is_error = False + return mock_response + + def build_request(*args, **kwargs): + pass + + return MockedAsyncClient(*args, **kwargs) + + try: + litellm.set_verbose = True + with patch("httpx.AsyncClient", side_effect=mock_async_client): + response = await litellm.aembedding( + model="watsonx/ibm/slate-30m-english-rtrvr", + input=["good morning from litellm"], + token="secret-token" + ) print(f"response: {response}") assert isinstance(response.usage, litellm.Usage) except litellm.RateLimitError as e: