(test - watsonx) Added tests for watsonx embeddings with mocked endpoints

This commit is contained in:
Simon Sanchez Viloria 2024-07-07 17:56:01 +02:00
parent 06e6f52358
commit ea952a57b0

View file

@ -11,6 +11,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion, completion_cost
from unittest.mock import MagicMock, patch
litellm.set_verbose = False
@ -484,14 +485,70 @@ def test_mistral_embeddings():
pytest.fail(f"Error occurred: {e}")
@pytest.mark.skip(reason="local test")
def test_watsonx_embeddings():
def mock_wx_embed_request(method:str, url:str, **kwargs):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.headers = {"Content-Type": "application/json"}
mock_response.json.return_value = {
"model_id": "ibm/slate-30m-english-rtrvr",
"created_at": "2024-01-01T00:00:00.00Z",
"results": [ {"embedding": [0.0]*254} ],
"input_token_count": 8
}
return mock_response
try:
litellm.set_verbose = True
response = litellm.embedding(
model="watsonx/ibm/slate-30m-english-rtrvr",
input=["good morning from litellm"],
)
with patch("requests.request", side_effect=mock_wx_embed_request):
response = litellm.embedding(
model="watsonx/ibm/slate-30m-english-rtrvr",
input=["good morning from litellm"],
token="secret-token"
)
print(f"response: {response}")
assert isinstance(response.usage, litellm.Usage)
except litellm.RateLimitError as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
@pytest.mark.asyncio
async def test_watsonx_aembeddings():
def mock_async_client(*args, **kwargs):
class MockedAsyncClient:
def __init__(*args, **kwargs):
pass
async def send(self, request, *args, stream: bool = False, **kwags):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.headers = {"Content-Type": "application/json"}
mock_response.json.return_value = {
"model_id": "ibm/slate-30m-english-rtrvr",
"created_at": "2024-01-01T00:00:00.00Z",
"results": [ {"embedding": [0.0]*254} ],
"input_token_count": 8
}
mock_response.is_error = False
return mock_response
def build_request(*args, **kwargs):
pass
return MockedAsyncClient(*args, **kwargs)
try:
litellm.set_verbose = True
with patch("httpx.AsyncClient", side_effect=mock_async_client):
response = await litellm.aembedding(
model="watsonx/ibm/slate-30m-english-rtrvr",
input=["good morning from litellm"],
token="secret-token"
)
print(f"response: {response}")
assert isinstance(response.usage, litellm.Usage)
except litellm.RateLimitError as e: