mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-18 00:19:47 +00:00
feat(api): introduce /rerank
# What does this PR do? ## Test Plan
This commit is contained in:
parent
e69acbafbf
commit
306625025a
8 changed files with 336 additions and 1 deletions
|
|
@ -473,6 +473,28 @@ class EmbeddingsResponse(BaseModel):
|
|||
embeddings: list[list[float]]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RerankData(BaseModel):
|
||||
"""A single rerank result from a reranking response.
|
||||
|
||||
:param index: The original index of the document in the input list
|
||||
:param relevance_score: The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance.
|
||||
"""
|
||||
|
||||
index: int
|
||||
relevance_score: float
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RerankResponse(BaseModel):
|
||||
"""Response from a reranking request.
|
||||
|
||||
:param data: List of rerank result objects, sorted by relevance score (descending)
|
||||
"""
|
||||
|
||||
data: list[RerankData]
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIChatCompletionContentPartTextParam(BaseModel):
|
||||
"""Text content part for OpenAI-compatible chat completion messages.
|
||||
|
|
@ -1131,6 +1153,24 @@ class InferenceProvider(Protocol):
|
|||
"""
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/rerank", method="POST", experimental=True)
|
||||
async def rerank(
|
||||
self,
|
||||
model: str,
|
||||
query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
|
||||
items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
|
||||
max_num_results: int | None = None,
|
||||
) -> RerankResponse:
|
||||
"""Rerank a list of documents based on their relevance to a query.
|
||||
|
||||
:param model: The identifier of the reranking model to use.
|
||||
:param query: The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length.
|
||||
:param items: List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length.
|
||||
:param max_num_results: (Optional) Maximum number of results to return. Default: returns all.
|
||||
:returns: RerankResponse with indices sorted by relevance score (descending).
|
||||
"""
|
||||
raise NotImplementedError("Reranking is not implemented")
|
||||
|
||||
@webmethod(route="/openai/v1/completions", method="POST")
|
||||
async def openai_completion(
|
||||
self,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue