(test - watsonx) Added tests for watsonx embeddings with mocked endpoints

2025-04-25 18:54:30 +00:00 · 2024-07-07 17:56:01 +02:00 · 2024-07-07 17:56:01 +02:00 · ea952a57b0
commit ea952a57b0
parent 06e6f52358
1 changed files with 62 additions and 5 deletions
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@ -11,6 +11,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion, completion_cost
+from unittest.mock import MagicMock, patch

 litellm.set_verbose = False

@ -484,14 +485,70 @@ def test_mistral_embeddings():
        pytest.fail(f"Error occurred: {e}")


-@pytest.mark.skip(reason="local test")
 def test_watsonx_embeddings():
+    
+    def mock_wx_embed_request(method:str, url:str, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"Content-Type": "application/json"}
+        mock_response.json.return_value = {
+                "model_id": "ibm/slate-30m-english-rtrvr",
+                "created_at": "2024-01-01T00:00:00.00Z",
+                "results": [ {"embedding": [0.0]*254} ],
+                "input_token_count": 8
+        }
+        return mock_response
+
    try:
        litellm.set_verbose = True
-        response = litellm.embedding(
-            model="watsonx/ibm/slate-30m-english-rtrvr",
-            input=["good morning from litellm"],
-        )
+        with patch("requests.request", side_effect=mock_wx_embed_request):
+            response = litellm.embedding(
+                model="watsonx/ibm/slate-30m-english-rtrvr",
+                input=["good morning from litellm"],
+                token="secret-token"
+            )
+        print(f"response: {response}")
+        assert isinstance(response.usage, litellm.Usage)
+    except litellm.RateLimitError as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+@pytest.mark.asyncio
+async def test_watsonx_aembeddings():
+
+    def mock_async_client(*args, **kwargs):
+        
+        class MockedAsyncClient:
+            def __init__(*args, **kwargs):
+                pass
+
+            async def send(self, request, *args, stream: bool = False, **kwags):
+                mock_response = MagicMock()
+                mock_response.status_code = 200
+                mock_response.headers = {"Content-Type": "application/json"}
+                mock_response.json.return_value = {
+                        "model_id": "ibm/slate-30m-english-rtrvr",
+                        "created_at": "2024-01-01T00:00:00.00Z",
+                        "results": [ {"embedding": [0.0]*254} ],
+                        "input_token_count": 8
+                }
+                mock_response.is_error = False
+                return mock_response
+            
+            def build_request(*args, **kwargs):
+                pass
+
+        return MockedAsyncClient(*args, **kwargs)
+
+    try:
+        litellm.set_verbose = True
+        with patch("httpx.AsyncClient", side_effect=mock_async_client):
+            response = await litellm.aembedding(
+                model="watsonx/ibm/slate-30m-english-rtrvr",
+                input=["good morning from litellm"],
+                token="secret-token"
+            )
        print(f"response: {response}")
        assert isinstance(response.usage, litellm.Usage)
    except litellm.RateLimitError as e: