Complete 'requests' library removal (#7350)

* refactor: initial commit moving watsonx_text to base_llm_http_handler + clarifying new provider directory structure * refactor(watsonx/completion/handler.py): move to using base llm http handler removes 'requests' library usage * fix(watsonx_text/transformation.py): fix result transformation migrates to transformation.py, for usage with base llm http handler * fix(streaming_handler.py): migrate watsonx streaming to transformation.py ensures streaming works with base llm http handler * fix(streaming_handler.py): fix streaming linting errors and remove watsonx conditional logic * fix(watsonx/): fix chat route post completion route refactor * refactor(watsonx/embed): refactor watsonx to use base llm http handler for embedding calls as well * refactor(base.py): remove requests library usage from litellm * build(pyproject.toml): remove requests library usage * fix: fix linting errors * fix: fix linting errors * fix(types/utils.py): fix validation errors for modelresponsestream * fix(replicate/handler.py): fix linting errors * fix(litellm_logging.py): handle modelresponsestream object * fix(streaming_handler.py): fix modelresponsestream args * fix: remove unused imports * test: fix test * fix: fix test * test: fix test * test: fix tests * test: fix test * test: fix patch target * test: fix test
2025-04-25 18:54:30 +00:00 · 2024-12-22 07:21:25 -08:00 · 2024-12-22 07:21:25 -08:00 · 3671829e39
commit 3671829e39
parent 8b1ea40e7b
39 changed files with 2147 additions and 2279 deletions
--- a/tests/local_testing/test_embedding.py
+++ b/tests/local_testing/test_embedding.py
@ -801,8 +801,11 @@ def test_fireworks_embeddings():


 def test_watsonx_embeddings():
+    from litellm.llms.custom_httpx.http_handler import HTTPHandler

-    def mock_wx_embed_request(method: str, url: str, **kwargs):
+    client = HTTPHandler()
+
+    def mock_wx_embed_request(url: str, **kwargs):
        mock_response = MagicMock()
        mock_response.status_code = 200
        mock_response.headers = {"Content-Type": "application/json"}
@ -816,12 +819,14 @@ def test_watsonx_embeddings():

    try:
        litellm.set_verbose = True
-        with patch("requests.request", side_effect=mock_wx_embed_request):
+        with patch.object(client, "post", side_effect=mock_wx_embed_request):
            response = litellm.embedding(
                model="watsonx/ibm/slate-30m-english-rtrvr",
                input=["good morning from litellm"],
                token="secret-token",
+                client=client,
            )
+
        print(f"response: {response}")
        assert isinstance(response.usage, litellm.Usage)
    except litellm.RateLimitError as e:
@ -832,6 +837,9 @@ def test_watsonx_embeddings():

@pytest.mark.asyncio
 async def test_watsonx_aembeddings():
+    from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+
+    client = AsyncHTTPHandler()

    def mock_async_client(*args, **kwargs):

@ -856,12 +864,14 @@ async def test_watsonx_aembeddings():

    try:
        litellm.set_verbose = True
-        with patch("httpx.AsyncClient", side_effect=mock_async_client):
+        with patch.object(client, "post", side_effect=mock_async_client) as mock_client:
            response = await litellm.aembedding(
                model="watsonx/ibm/slate-30m-english-rtrvr",
                input=["good morning from litellm"],
                token="secret-token",
+                client=client,
            )
+            mock_client.assert_called_once()
        print(f"response: {response}")
        assert isinstance(response.usage, litellm.Usage)
    except litellm.RateLimitError as e: