(code refactor) - Add BaseRerankConfig. Use BaseRerankConfig for cohere/rerank and azure_ai/rerank (#7319)

* add base rerank config * working sync cohere rerank * update rerank types * update base rerank config * remove old rerank * add new cohere handler.py * add cohere rerank transform * add get_provider_rerank_config * add rerank to base llm http handler * add rerank utils * add arerank to llm http handler.py * add AzureAIRerankConfig * updates rerank config * update test rerank * fix unused imports * update get_provider_rerank_config * test_basic_rerank_caching * fix unused import * test rerank
2025-04-26 03:04:13 +00:00 · 2024-12-19 17:03:34 -08:00 · 2024-12-19 17:03:34 -08:00 · 5f15b0aa20
commit 5f15b0aa20
parent a790d43116
19 changed files with 645 additions and 425 deletions
--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -9,12 +9,14 @@ import litellm.types
 import litellm.types.utils
 from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
 from litellm.llms.base_llm.embedding.transformation import BaseEmbeddingConfig
+from litellm.llms.base_llm.rerank.transformation import BaseRerankConfig
 from litellm.llms.custom_httpx.http_handler import (
    AsyncHTTPHandler,
    HTTPHandler,
    _get_httpx_client,
    get_async_httpx_client,
 )
+from litellm.types.rerank import OptionalRerankParams, RerankResponse
 from litellm.types.utils import EmbeddingResponse
 from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager

@ -524,7 +526,138 @@ class BaseLLMHTTPHandler:
            request_data=request_data,
        )

-    def _handle_error(self, e: Exception, provider_config: BaseConfig):
+    def rerank(
+        self,
+        model: str,
+        custom_llm_provider: str,
+        logging_obj: LiteLLMLoggingObj,
+        optional_rerank_params: OptionalRerankParams,
+        timeout: Optional[Union[float, httpx.Timeout]],
+        model_response: RerankResponse,
+        _is_async: bool = False,
+        headers: dict = {},
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> RerankResponse:
+
+        provider_config = ProviderConfigManager.get_provider_rerank_config(
+            model=model, provider=litellm.LlmProviders(custom_llm_provider)
+        )
+        # get config from model, custom llm provider
+        headers = provider_config.validate_environment(
+            api_key=api_key,
+            headers=headers,
+            model=model,
+        )
+
+        api_base = provider_config.get_complete_url(
+            api_base=api_base,
+            model=model,
+        )
+
+        data = provider_config.transform_rerank_request(
+            model=model,
+            optional_rerank_params=optional_rerank_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=optional_rerank_params.get("query", ""),
+            api_key=api_key,
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+
+        if _is_async is True:
+            return self.arerank(  # type: ignore
+                model=model,
+                request_data=data,
+                custom_llm_provider=custom_llm_provider,
+                provider_config=provider_config,
+                logging_obj=logging_obj,
+                model_response=model_response,
+                api_base=api_base,
+                headers=headers,
+                api_key=api_key,
+                timeout=timeout,
+                client=client,
+            )
+
+        if client is None or not isinstance(client, HTTPHandler):
+            sync_httpx_client = _get_httpx_client()
+        else:
+            sync_httpx_client = client
+
+        try:
+            response = sync_httpx_client.post(
+                url=api_base,
+                headers=headers,
+                data=json.dumps(data),
+                timeout=timeout,
+            )
+        except Exception as e:
+            raise self._handle_error(
+                e=e,
+                provider_config=provider_config,
+            )
+
+        return provider_config.transform_rerank_response(
+            model=model,
+            raw_response=response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            api_key=api_key,
+            request_data=data,
+        )
+
+    async def arerank(
+        self,
+        model: str,
+        request_data: dict,
+        custom_llm_provider: str,
+        provider_config: BaseRerankConfig,
+        logging_obj: LiteLLMLoggingObj,
+        model_response: RerankResponse,
+        api_base: str,
+        headers: dict,
+        api_key: Optional[str] = None,
+        timeout: Optional[Union[float, httpx.Timeout]] = None,
+        client: Optional[Union[HTTPHandler, AsyncHTTPHandler]] = None,
+    ) -> RerankResponse:
+
+        if client is None or not isinstance(client, AsyncHTTPHandler):
+            async_httpx_client = get_async_httpx_client(
+                llm_provider=litellm.LlmProviders(custom_llm_provider)
+            )
+        else:
+            async_httpx_client = client
+        try:
+            response = await async_httpx_client.post(
+                url=api_base,
+                headers=headers,
+                data=json.dumps(request_data),
+                timeout=timeout,
+            )
+        except Exception as e:
+            raise self._handle_error(e=e, provider_config=provider_config)
+
+        return provider_config.transform_rerank_response(
+            model=model,
+            raw_response=response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            api_key=api_key,
+            request_data=request_data,
+        )
+
+    def _handle_error(
+        self, e: Exception, provider_config: Union[BaseConfig, BaseRerankConfig]
+    ):
        status_code = getattr(e, "status_code", 500)
        error_headers = getattr(e, "headers", None)
        error_text = getattr(e, "text", str(e))