Complete 'requests' library removal (#7350)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 12s

* refactor: initial commit moving watsonx_text to base_llm_http_handler + clarifying new provider directory structure

* refactor(watsonx/completion/handler.py): move to using base llm http handler

removes 'requests' library usage

* fix(watsonx_text/transformation.py): fix result transformation

migrates to transformation.py, for usage with base llm http handler

* fix(streaming_handler.py): migrate watsonx streaming to transformation.py

ensures streaming works with base llm http handler

* fix(streaming_handler.py): fix streaming linting errors and remove watsonx conditional logic

* fix(watsonx/): fix chat route post completion route refactor

* refactor(watsonx/embed): refactor watsonx to use base llm http handler for embedding calls as well

* refactor(base.py): remove requests library usage from litellm

* build(pyproject.toml): remove requests library usage

* fix: fix linting errors

* fix: fix linting errors

* fix(types/utils.py): fix validation errors for modelresponsestream

* fix(replicate/handler.py): fix linting errors

* fix(litellm_logging.py): handle modelresponsestream object

* fix(streaming_handler.py): fix modelresponsestream args

* fix: remove unused imports

* test: fix test

* fix: fix test

* test: fix test

* test: fix tests

* test: fix test

* test: fix patch target

* test: fix test
This commit is contained in:
Krish Dholakia 2024-12-22 07:21:25 -08:00 committed by GitHub
parent 8b1ea40e7b
commit 3671829e39
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
39 changed files with 2147 additions and 2279 deletions

View file

@ -801,8 +801,11 @@ def test_fireworks_embeddings():
def test_watsonx_embeddings():
from litellm.llms.custom_httpx.http_handler import HTTPHandler
def mock_wx_embed_request(method: str, url: str, **kwargs):
client = HTTPHandler()
def mock_wx_embed_request(url: str, **kwargs):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.headers = {"Content-Type": "application/json"}
@ -816,12 +819,14 @@ def test_watsonx_embeddings():
try:
litellm.set_verbose = True
with patch("requests.request", side_effect=mock_wx_embed_request):
with patch.object(client, "post", side_effect=mock_wx_embed_request):
response = litellm.embedding(
model="watsonx/ibm/slate-30m-english-rtrvr",
input=["good morning from litellm"],
token="secret-token",
client=client,
)
print(f"response: {response}")
assert isinstance(response.usage, litellm.Usage)
except litellm.RateLimitError as e:
@ -832,6 +837,9 @@ def test_watsonx_embeddings():
@pytest.mark.asyncio
async def test_watsonx_aembeddings():
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
client = AsyncHTTPHandler()
def mock_async_client(*args, **kwargs):
@ -856,12 +864,14 @@ async def test_watsonx_aembeddings():
try:
litellm.set_verbose = True
with patch("httpx.AsyncClient", side_effect=mock_async_client):
with patch.object(client, "post", side_effect=mock_async_client) as mock_client:
response = await litellm.aembedding(
model="watsonx/ibm/slate-30m-english-rtrvr",
input=["good morning from litellm"],
token="secret-token",
client=client,
)
mock_client.assert_called_once()
print(f"response: {response}")
assert isinstance(response.usage, litellm.Usage)
except litellm.RateLimitError as e: