From ea952a57b039d3060d800b459bae62a45cb392f6 Mon Sep 17 00:00:00 2001
From: Simon Sanchez Viloria <simon.s.viloria@ibm.com>
Date: Sun, 7 Jul 2024 17:56:01 +0200
Subject: [PATCH] (test - watsonx) Added tests for watsonx embeddings with
 mocked endpoints

---
 litellm/tests/test_embedding.py | 67 ++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 5 deletions(-)

diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
index 30988dba1c..94fe407d1a 100644
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@@ -11,6 +11,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path
 import litellm
 from litellm import embedding, completion, completion_cost
+from unittest.mock import MagicMock, patch
 
 litellm.set_verbose = False
 
@@ -484,14 +485,70 @@ def test_mistral_embeddings():
         pytest.fail(f"Error occurred: {e}")
 
 
-@pytest.mark.skip(reason="local test")
 def test_watsonx_embeddings():
+    
+    def mock_wx_embed_request(method:str, url:str, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.headers = {"Content-Type": "application/json"}
+        mock_response.json.return_value = {
+                "model_id": "ibm/slate-30m-english-rtrvr",
+                "created_at": "2024-01-01T00:00:00.00Z",
+                "results": [ {"embedding": [0.0]*254} ],
+                "input_token_count": 8
+        }
+        return mock_response
+
     try:
         litellm.set_verbose = True
-        response = litellm.embedding(
-            model="watsonx/ibm/slate-30m-english-rtrvr",
-            input=["good morning from litellm"],
-        )
+        with patch("requests.request", side_effect=mock_wx_embed_request):
+            response = litellm.embedding(
+                model="watsonx/ibm/slate-30m-english-rtrvr",
+                input=["good morning from litellm"],
+                token="secret-token"
+            )
+        print(f"response: {response}")
+        assert isinstance(response.usage, litellm.Usage)
+    except litellm.RateLimitError as e:
+        pass
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+@pytest.mark.asyncio
+async def test_watsonx_aembeddings():
+
+    def mock_async_client(*args, **kwargs):
+        
+        class MockedAsyncClient:
+            def __init__(*args, **kwargs):
+                pass
+
+            async def send(self, request, *args, stream: bool = False, **kwags):
+                mock_response = MagicMock()
+                mock_response.status_code = 200
+                mock_response.headers = {"Content-Type": "application/json"}
+                mock_response.json.return_value = {
+                        "model_id": "ibm/slate-30m-english-rtrvr",
+                        "created_at": "2024-01-01T00:00:00.00Z",
+                        "results": [ {"embedding": [0.0]*254} ],
+                        "input_token_count": 8
+                }
+                mock_response.is_error = False
+                return mock_response
+            
+            def build_request(*args, **kwargs):
+                pass
+
+        return MockedAsyncClient(*args, **kwargs)
+
+    try:
+        litellm.set_verbose = True
+        with patch("httpx.AsyncClient", side_effect=mock_async_client):
+            response = await litellm.aembedding(
+                model="watsonx/ibm/slate-30m-english-rtrvr",
+                input=["good morning from litellm"],
+                token="secret-token"
+            )
         print(f"response: {response}")
         assert isinstance(response.usage, litellm.Usage)
     except litellm.RateLimitError as e: