Complete 'requests' library removal (#7350)

* refactor: initial commit moving watsonx_text to base_llm_http_handler + clarifying new provider directory structure * refactor(watsonx/completion/handler.py): move to using base llm http handler removes 'requests' library usage * fix(watsonx_text/transformation.py): fix result transformation migrates to transformation.py, for usage with base llm http handler * fix(streaming_handler.py): migrate watsonx streaming to transformation.py ensures streaming works with base llm http handler * fix(streaming_handler.py): fix streaming linting errors and remove watsonx conditional logic * fix(watsonx/): fix chat route post completion route refactor * refactor(watsonx/embed): refactor watsonx to use base llm http handler for embedding calls as well * refactor(base.py): remove requests library usage from litellm * build(pyproject.toml): remove requests library usage * fix: fix linting errors * fix: fix linting errors * fix(types/utils.py): fix validation errors for modelresponsestream * fix(replicate/handler.py): fix linting errors * fix(litellm_logging.py): handle modelresponsestream object * fix(streaming_handler.py): fix modelresponsestream args * fix: remove unused imports * test: fix test * fix: fix test * test: fix test * test: fix tests * test: fix test * test: fix patch target * test: fix test
2025-04-25 18:54:30 +00:00 · 2024-12-22 07:21:25 -08:00 · 2024-12-22 07:21:25 -08:00 · 3671829e39
commit 3671829e39
parent 8b1ea40e7b
39 changed files with 2147 additions and 2279 deletions
--- a/litellm/llms/replicate/chat/handler.py
+++ b/litellm/llms/replicate/chat/handler.py
@ -168,7 +168,9 @@ def completion(
        time.time()
    )  # for pricing this must remain right before calling api

-    prediction_url = replicate_config.get_complete_url(api_base, model)
+    prediction_url = replicate_config.get_complete_url(
+        api_base=api_base, model=model, optional_params=optional_params
+    )

    ## COMPLETION CALL
    httpx_client = _get_httpx_client(
@ -235,7 +237,9 @@ async def async_completion(
    headers: dict,
 ) -> Union[ModelResponse, CustomStreamWrapper]:

-    prediction_url = replicate_config.get_complete_url(api_base=api_base, model=model)
+    prediction_url = replicate_config.get_complete_url(
+        api_base=api_base, model=model, optional_params=optional_params
+    )
    async_handler = get_async_httpx_client(
        llm_provider=litellm.LlmProviders.REPLICATE,
        params={"timeout": 600.0},