mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
feat(api): introduce /rerank (#2940)
Some checks failed
Integration Tests (Replay) / Integration Tests (, , , client=, vision=) (push) Failing after 1s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Vector IO Integration Tests / test-matrix (push) Failing after 6s
Pre-commit / pre-commit (push) Failing after 7s
Test Llama Stack Build / build-single-provider (push) Failing after 6s
Python Package Build Test / build (3.13) (push) Failing after 8s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 9s
Python Package Build Test / build (3.12) (push) Failing after 9s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Test External API and Providers / test-external (venv) (push) Failing after 10s
Update ReadTheDocs / update-readthedocs (push) Failing after 11s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 14s
Unit Tests / unit-tests (3.13) (push) Failing after 12s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 19s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 19s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 21s
Test Llama Stack Build / generate-matrix (push) Failing after 21s
Test Llama Stack Build / build (push) Has been skipped
UI Tests / ui-tests (22) (push) Failing after 21s
Some checks failed
Integration Tests (Replay) / Integration Tests (, , , client=, vision=) (push) Failing after 1s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Vector IO Integration Tests / test-matrix (push) Failing after 6s
Pre-commit / pre-commit (push) Failing after 7s
Test Llama Stack Build / build-single-provider (push) Failing after 6s
Python Package Build Test / build (3.13) (push) Failing after 8s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 9s
Python Package Build Test / build (3.12) (push) Failing after 9s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Test External API and Providers / test-external (venv) (push) Failing after 10s
Update ReadTheDocs / update-readthedocs (push) Failing after 11s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 14s
Unit Tests / unit-tests (3.13) (push) Failing after 12s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 19s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 19s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 21s
Test Llama Stack Build / generate-matrix (push) Failing after 21s
Test Llama Stack Build / build (push) Has been skipped
UI Tests / ui-tests (22) (push) Failing after 21s
# What does this PR do? Context: https://github.com/meta-llama/llama-stack/issues/2937 The API design is inspired by existing offerings, but not exactly the same: * `top_n` as the parameter to control number of results, instead of `top_k`, since `n` is conventional to control number * `truncation` bool instead of `max_token_per_doc`, since we should just handle the truncation automatically depending on model capability, instead of user setting the context length manually. * `data` field in the response, to be consistent with other OpenAI APIs (though they don't have a rerank API). Also, it is one less name to learn in the API. ## Test Plan Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
d78ac434bd
commit
c5e2e269e2
8 changed files with 336 additions and 1 deletions
132
docs/_static/llama-stack-spec.html
vendored
132
docs/_static/llama-stack-spec.html
vendored
|
@ -4605,6 +4605,49 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/inference/rerank": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "RerankResponse with indices sorted by relevance score (descending).",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RerankResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"description": "Rerank a list of documents based on their relevance to a query.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/RerankRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume": {
|
||||
"post": {
|
||||
"responses": {
|
||||
|
@ -16587,6 +16630,95 @@
|
|||
],
|
||||
"title": "RegisterVectorDbRequest"
|
||||
},
|
||||
"RerankRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the reranking model to use."
|
||||
},
|
||||
"query": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
||||
}
|
||||
],
|
||||
"description": "The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length."
|
||||
},
|
||||
"items": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length."
|
||||
},
|
||||
"max_num_results": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) Maximum number of results to return. Default: returns all."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model",
|
||||
"query",
|
||||
"items"
|
||||
],
|
||||
"title": "RerankRequest"
|
||||
},
|
||||
"RerankData": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The original index of the document in the input list"
|
||||
},
|
||||
"relevance_score": {
|
||||
"type": "number",
|
||||
"description": "The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"index",
|
||||
"relevance_score"
|
||||
],
|
||||
"title": "RerankData",
|
||||
"description": "A single rerank result from a reranking response."
|
||||
},
|
||||
"RerankResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/RerankData"
|
||||
},
|
||||
"description": "List of rerank result objects, sorted by relevance score (descending)"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"data"
|
||||
],
|
||||
"title": "RerankResponse",
|
||||
"description": "Response from a reranking request."
|
||||
},
|
||||
"ResumeAgentTurnRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue