feat(api): introduce /rerank

# What does this PR do? ## Test Plan
2025-12-17 21:47:33 +00:00 · 2025-08-14 16:14:38 -07:00 · 2025-08-14 16:14:38 -07:00 · 306625025a
commit 306625025a
parent e69acbafbf
8 changed files with 336 additions and 1 deletions
--- a/llama_stack/providers/inline/inference/meta_reference/inference.py
+++ b/llama_stack/providers/inline/inference/meta_reference/inference.py
@ -33,6 +33,9 @@ from llama_stack.apis.inference import (
    InterleavedContent,
    LogProbConfig,
    Message,
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+    RerankResponse,
    ResponseFormat,
    SamplingParams,
    StopReason,
@ -442,6 +445,15 @@ class MetaReferenceInferenceImpl(
        results = await self._nonstream_chat_completion(request_batch)
        return BatchChatCompletionResponse(batch=results)

+    async def rerank(
+        self,
+        model: str,
+        query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
+        items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
+        max_num_results: int | None = None,
+    ) -> RerankResponse:
+        raise NotImplementedError("Reranking is not supported for Meta Reference")
+
    async def _nonstream_chat_completion(
        self, request_batch: list[ChatCompletionRequest]
    ) -> list[ChatCompletionResponse]: