From bab9d7aaea9ab3fd7d68cabed6e22345f1d7f739 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Wed, 3 Sep 2025 17:34:05 -0700
Subject: [PATCH 01/18] Add rerank API for NVIDIA Inference Provider

---
 docs/docs/providers/inference/index.mdx       |    2 +
 docs/static/llama-stack-spec.html             | 4992 +++++++++++++++++
 docs/static/llama-stack-spec.yaml             | 3724 ++++++++++++
 example.py                                    |  257 +
 llama_stack/apis/inference/inference.py       |    2 +-
 llama_stack/apis/models/models.py             |    2 +
 llama_stack/core/routers/inference.py         |   24 +
 .../remote/inference/nvidia/models.py         |  131 +
 .../remote/inference/nvidia/nvidia.py         |   80 +
 9 files changed, 9213 insertions(+), 1 deletion(-)
 create mode 100644 example.py
 create mode 100644 llama_stack/providers/remote/inference/nvidia/models.py

diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index ebbaf1be1..e96169cad 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -4,6 +4,7 @@ description: "Llama Stack Inference API for generating completions, chat complet
     This API provides the raw interface to the underlying models. Two kinds of models are supported:
     - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search."
+    - Rerank models: these models rerank the documents by relevance."
 sidebar_label: Inference
 title: Inference
 ---
@@ -17,5 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe
     This API provides the raw interface to the underlying models. Two kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models rerank the documents by relevance.
 
 This section contains documentation for all available providers for the **inference** API.
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 96e97035f..b260f01a7 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -4819,6 +4819,2834 @@
                 "title": "OpenAIUserMessageParam",
                 "description": "A message from the user in an OpenAI-compatible chat completion request."
             },
+            "OpenAICompletionWithInputMessages": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "The ID of the chat completion"
+                    },
+                    "choices": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIChoice"
+                        },
+                        "description": "List of choices"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "chat.completion",
+                        "default": "chat.completion",
+                        "description": "The object type, which will be \"chat.completion\""
+                    },
+                    "created": {
+                        "type": "integer",
+                        "description": "The Unix timestamp in seconds when the chat completion was created"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model that was used to generate the chat completion"
+                    },
+                    "input_messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIMessageParam"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "choices",
+                    "object",
+                    "created",
+                    "model",
+                    "input_messages"
+                ],
+                "title": "OpenAICompletionWithInputMessages"
+            },
+            "DataSource": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/URIDataSource"
+                    },
+                    {
+                        "$ref": "#/components/schemas/RowsDataSource"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "uri": "#/components/schemas/URIDataSource",
+                        "rows": "#/components/schemas/RowsDataSource"
+                    }
+                }
+            },
+            "Dataset": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "dataset",
+                        "default": "dataset",
+                        "description": "Type of resource, always 'dataset' for datasets"
+                    },
+                    "purpose": {
+                        "type": "string",
+                        "enum": [
+                            "post-training/messages",
+                            "eval/question-answer",
+                            "eval/messages-answer"
+                        ],
+                        "description": "Purpose of the dataset indicating its intended use"
+                    },
+                    "source": {
+                        "$ref": "#/components/schemas/DataSource",
+                        "description": "Data source configuration for the dataset"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata for the dataset"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "purpose",
+                    "source",
+                    "metadata"
+                ],
+                "title": "Dataset",
+                "description": "Dataset resource for storing and accessing training or evaluation data."
+            },
+            "RowsDataSource": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "rows",
+                        "default": "rows"
+                    },
+                    "rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "rows"
+                ],
+                "title": "RowsDataSource",
+                "description": "A dataset stored in rows."
+            },
+            "URIDataSource": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "uri",
+                        "default": "uri"
+                    },
+                    "uri": {
+                        "type": "string",
+                        "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "uri"
+                ],
+                "title": "URIDataSource",
+                "description": "A dataset that can be obtained from a URI."
+            },
+            "Model": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string",
+                        "description": "Unique identifier for this resource in llama stack"
+                    },
+                    "provider_resource_id": {
+                        "type": "string",
+                        "description": "Unique identifier for this resource in the provider"
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "ID of the provider that owns this resource"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "model",
+                        "default": "model",
+                        "description": "The resource type, always 'model' for model resources"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Any additional metadata for this model"
+                    },
+                    "model_type": {
+                        "$ref": "#/components/schemas/ModelType",
+                        "default": "llm",
+                        "description": "The type of model (LLM or embedding model)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "metadata",
+                    "model_type"
+                ],
+                "title": "Model",
+                "description": "A model resource representing an AI model registered in Llama Stack."
+            },
+            "ModelType": {
+                "type": "string",
+                "enum": [
+                    "llm",
+                    "embedding",
+                    "rerank"
+                ],
+                "title": "ModelType",
+                "description": "Enumeration of supported model types in Llama Stack."
+            },
+            "AgentTurnInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "agent_turn_input",
+                        "default": "agent_turn_input",
+                        "description": "Discriminator type. Always \"agent_turn_input\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "AgentTurnInputType",
+                "description": "Parameter type for agent turn input."
+            },
+            "ArrayType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "array",
+                        "default": "array",
+                        "description": "Discriminator type. Always \"array\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ArrayType",
+                "description": "Parameter type for array values."
+            },
+            "BooleanType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "boolean",
+                        "default": "boolean",
+                        "description": "Discriminator type. Always \"boolean\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "BooleanType",
+                "description": "Parameter type for boolean values."
+            },
+            "ChatCompletionInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "chat_completion_input",
+                        "default": "chat_completion_input",
+                        "description": "Discriminator type. Always \"chat_completion_input\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ChatCompletionInputType",
+                "description": "Parameter type for chat completion input."
+            },
+            "CompletionInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "completion_input",
+                        "default": "completion_input",
+                        "description": "Discriminator type. Always \"completion_input\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "CompletionInputType",
+                "description": "Parameter type for completion input."
+            },
+            "JsonType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "json",
+                        "default": "json",
+                        "description": "Discriminator type. Always \"json\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "JsonType",
+                "description": "Parameter type for JSON values."
+            },
+            "NumberType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "number",
+                        "default": "number",
+                        "description": "Discriminator type. Always \"number\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "NumberType",
+                "description": "Parameter type for numeric values."
+            },
+            "ObjectType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "object",
+                        "default": "object",
+                        "description": "Discriminator type. Always \"object\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ObjectType",
+                "description": "Parameter type for object values."
+            },
+            "ParamType": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/StringType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/NumberType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/BooleanType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ArrayType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ObjectType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/JsonType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/UnionType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ChatCompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/CompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnInputType"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "string": "#/components/schemas/StringType",
+                        "number": "#/components/schemas/NumberType",
+                        "boolean": "#/components/schemas/BooleanType",
+                        "array": "#/components/schemas/ArrayType",
+                        "object": "#/components/schemas/ObjectType",
+                        "json": "#/components/schemas/JsonType",
+                        "union": "#/components/schemas/UnionType",
+                        "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
+                        "completion_input": "#/components/schemas/CompletionInputType",
+                        "agent_turn_input": "#/components/schemas/AgentTurnInputType"
+                    }
+                }
+            },
+            "ScoringFn": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "scoring_function",
+                        "default": "scoring_function",
+                        "description": "The resource type, always scoring_function"
+                    },
+                    "description": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "return_type": {
+                        "$ref": "#/components/schemas/ParamType"
+                    },
+                    "params": {
+                        "$ref": "#/components/schemas/ScoringFnParams"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "metadata",
+                    "return_type"
+                ],
+                "title": "ScoringFn",
+                "description": "A scoring function resource for evaluating model outputs."
+            },
+            "StringType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "string",
+                        "default": "string",
+                        "description": "Discriminator type. Always \"string\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "StringType",
+                "description": "Parameter type for string values."
+            },
+            "UnionType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "union",
+                        "default": "union",
+                        "description": "Discriminator type. Always \"union\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "UnionType",
+                "description": "Parameter type for union values."
+            },
+            "Shield": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "shield",
+                        "default": "shield",
+                        "description": "The resource type, always shield"
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Configuration parameters for the shield"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type"
+                ],
+                "title": "Shield",
+                "description": "A safety shield resource that can be used to check content."
+            },
+            "Span": {
+                "type": "object",
+                "properties": {
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span"
+                    },
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this span belongs to"
+                    },
+                    "parent_span_id": {
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Human-readable name describing the operation this span represents"
+                    },
+                    "start_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the operation began"
+                    },
+                    "end_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the operation finished, if completed"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "span_id",
+                    "trace_id",
+                    "name",
+                    "start_time"
+                ],
+                "title": "Span",
+                "description": "A span representing a single operation within a trace."
+            },
+            "GetSpanTreeRequest": {
+                "type": "object",
+                "properties": {
+                    "attributes_to_return": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The attributes to return in the tree."
+                    },
+                    "max_depth": {
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "GetSpanTreeRequest"
+            },
+            "SpanStatus": {
+                "type": "string",
+                "enum": [
+                    "ok",
+                    "error"
+                ],
+                "title": "SpanStatus",
+                "description": "The status of a span indicating whether it completed successfully or with an error."
+            },
+            "SpanWithStatus": {
+                "type": "object",
+                "properties": {
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span"
+                    },
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this span belongs to"
+                    },
+                    "parent_span_id": {
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Human-readable name describing the operation this span represents"
+                    },
+                    "start_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the operation began"
+                    },
+                    "end_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the operation finished, if completed"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
+                    },
+                    "status": {
+                        "$ref": "#/components/schemas/SpanStatus",
+                        "description": "(Optional) The current status of the span"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "span_id",
+                    "trace_id",
+                    "name",
+                    "start_time"
+                ],
+                "title": "SpanWithStatus",
+                "description": "A span that includes status information."
+            },
+            "QuerySpanTreeResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/SpanWithStatus"
+                        },
+                        "description": "Dictionary mapping span IDs to spans with status information"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QuerySpanTreeResponse",
+                "description": "Response containing a tree structure of spans."
+            },
+            "Tool": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "tool",
+                        "default": "tool",
+                        "description": "Type of resource, always 'tool'"
+                    },
+                    "toolgroup_id": {
+                        "type": "string",
+                        "description": "ID of the tool group this tool belongs to"
+                    },
+                    "description": {
+                        "type": "string",
+                        "description": "Human-readable description of what the tool does"
+                    },
+                    "parameters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolParameter"
+                        },
+                        "description": "List of parameters this tool accepts"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Additional metadata about the tool"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "toolgroup_id",
+                    "description",
+                    "parameters"
+                ],
+                "title": "Tool",
+                "description": "A tool that can be invoked by agents."
+            },
+            "ToolGroup": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "tool_group",
+                        "default": "tool_group",
+                        "description": "Type of resource, always 'tool_group'"
+                    },
+                    "mcp_endpoint": {
+                        "$ref": "#/components/schemas/URL",
+                        "description": "(Optional) Model Context Protocol endpoint for remote tools"
+                    },
+                    "args": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Additional arguments for the tool group"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type"
+                ],
+                "title": "ToolGroup",
+                "description": "A group of related tools managed together."
+            },
+            "Trace": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace"
+                    },
+                    "root_span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the root span that started this trace"
+                    },
+                    "start_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the trace began"
+                    },
+                    "end_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the trace finished, if completed"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "root_span_id",
+                    "start_time"
+                ],
+                "title": "Trace",
+                "description": "A trace representing the complete execution path of a request across multiple operations."
+            },
+            "Checkpoint": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string",
+                        "description": "Unique identifier for the checkpoint"
+                    },
+                    "created_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the checkpoint was created"
+                    },
+                    "epoch": {
+                        "type": "integer",
+                        "description": "Training epoch when the checkpoint was saved"
+                    },
+                    "post_training_job_id": {
+                        "type": "string",
+                        "description": "Identifier of the training job that created this checkpoint"
+                    },
+                    "path": {
+                        "type": "string",
+                        "description": "File system path where the checkpoint is stored"
+                    },
+                    "training_metrics": {
+                        "$ref": "#/components/schemas/PostTrainingMetric",
+                        "description": "(Optional) Training metrics associated with this checkpoint"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "created_at",
+                    "epoch",
+                    "post_training_job_id",
+                    "path"
+                ],
+                "title": "Checkpoint",
+                "description": "Checkpoint created during training runs."
+            },
+            "PostTrainingJobArtifactsResponse": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string",
+                        "description": "Unique identifier for the training job"
+                    },
+                    "checkpoints": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Checkpoint"
+                        },
+                        "description": "List of model checkpoints created during training"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "checkpoints"
+                ],
+                "title": "PostTrainingJobArtifactsResponse",
+                "description": "Artifacts of a finetuning job."
+            },
+            "PostTrainingMetric": {
+                "type": "object",
+                "properties": {
+                    "epoch": {
+                        "type": "integer",
+                        "description": "Training epoch number"
+                    },
+                    "train_loss": {
+                        "type": "number",
+                        "description": "Loss value on the training dataset"
+                    },
+                    "validation_loss": {
+                        "type": "number",
+                        "description": "Loss value on the validation dataset"
+                    },
+                    "perplexity": {
+                        "type": "number",
+                        "description": "Perplexity metric indicating model confidence"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "epoch",
+                    "train_loss",
+                    "validation_loss",
+                    "perplexity"
+                ],
+                "title": "PostTrainingMetric",
+                "description": "Training metrics captured during post-training jobs."
+            },
+            "PostTrainingJobStatusResponse": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string",
+                        "description": "Unique identifier for the training job"
+                    },
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "completed",
+                            "in_progress",
+                            "failed",
+                            "scheduled",
+                            "cancelled"
+                        ],
+                        "description": "Current status of the training job"
+                    },
+                    "scheduled_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job was scheduled"
+                    },
+                    "started_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job execution began"
+                    },
+                    "completed_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job finished, if completed"
+                    },
+                    "resources_allocated": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Information about computational resources allocated to the job"
+                    },
+                    "checkpoints": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Checkpoint"
+                        },
+                        "description": "List of model checkpoints created during training"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "status",
+                    "checkpoints"
+                ],
+                "title": "PostTrainingJobStatusResponse",
+                "description": "Status of a finetuning job."
+            },
+            "ListPostTrainingJobsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "job_uuid": {
+                                    "type": "string"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "job_uuid"
+                            ],
+                            "title": "PostTrainingJob"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListPostTrainingJobsResponse"
+            },
+            "VectorDB": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "vector_db",
+                        "default": "vector_db",
+                        "description": "Type of resource, always 'vector_db' for vector databases"
+                    },
+                    "embedding_model": {
+                        "type": "string",
+                        "description": "Name of the embedding model to use for vector generation"
+                    },
+                    "embedding_dimension": {
+                        "type": "integer",
+                        "description": "Dimension of the embedding vectors"
+                    },
+                    "vector_db_name": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "embedding_model",
+                    "embedding_dimension"
+                ],
+                "title": "VectorDB",
+                "description": "Vector database resource for storing and querying vector embeddings."
+            },
+            "HealthInfo": {
+                "type": "object",
+                "properties": {
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "OK",
+                            "Error",
+                            "Not Implemented"
+                        ],
+                        "description": "Current health status of the service"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "status"
+                ],
+                "title": "HealthInfo",
+                "description": "Health status information for the service."
+            },
+            "RAGDocument": {
+                "type": "object",
+                "properties": {
+                    "document_id": {
+                        "type": "string",
+                        "description": "The unique identifier for the document."
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/InterleavedContentItem"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/InterleavedContentItem"
+                                }
+                            },
+                            {
+                                "$ref": "#/components/schemas/URL"
+                            }
+                        ],
+                        "description": "The content of the document."
+                    },
+                    "mime_type": {
+                        "type": "string",
+                        "description": "The MIME type of the document."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata for the document."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "document_id",
+                    "content",
+                    "metadata"
+                ],
+                "title": "RAGDocument",
+                "description": "A document to be used for document ingestion in the RAG Tool."
+            },
+            "InsertRequest": {
+                "type": "object",
+                "properties": {
+                    "documents": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RAGDocument"
+                        },
+                        "description": "List of documents to index in the RAG system"
+                    },
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "ID of the vector database to store the document embeddings"
+                    },
+                    "chunk_size_in_tokens": {
+                        "type": "integer",
+                        "description": "(Optional) Size in tokens for document chunking during indexing"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "documents",
+                    "vector_db_id",
+                    "chunk_size_in_tokens"
+                ],
+                "title": "InsertRequest"
+            },
+            "Chunk": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Metadata associated with the chunk that will be used in the model context during inference."
+                    },
+                    "embedding": {
+                        "type": "array",
+                        "items": {
+                            "type": "number"
+                        },
+                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
+                    },
+                    "stored_chunk_id": {
+                        "type": "string",
+                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
+                    },
+                    "chunk_metadata": {
+                        "$ref": "#/components/schemas/ChunkMetadata",
+                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "metadata"
+                ],
+                "title": "Chunk",
+                "description": "A chunk of content that can be inserted into a vector database."
+            },
+            "ChunkMetadata": {
+                "type": "object",
+                "properties": {
+                    "chunk_id": {
+                        "type": "string",
+                        "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content."
+                    },
+                    "document_id": {
+                        "type": "string",
+                        "description": "The ID of the document this chunk belongs to."
+                    },
+                    "source": {
+                        "type": "string",
+                        "description": "The source of the content, such as a URL, file path, or other identifier."
+                    },
+                    "created_timestamp": {
+                        "type": "integer",
+                        "description": "An optional timestamp indicating when the chunk was created."
+                    },
+                    "updated_timestamp": {
+                        "type": "integer",
+                        "description": "An optional timestamp indicating when the chunk was last updated."
+                    },
+                    "chunk_window": {
+                        "type": "string",
+                        "description": "The window of the chunk, which can be used to group related chunks together."
+                    },
+                    "chunk_tokenizer": {
+                        "type": "string",
+                        "description": "The tokenizer used to create the chunk. Default is Tiktoken."
+                    },
+                    "chunk_embedding_model": {
+                        "type": "string",
+                        "description": "The embedding model used to create the chunk's embedding."
+                    },
+                    "chunk_embedding_dimension": {
+                        "type": "integer",
+                        "description": "The dimension of the embedding vector for the chunk."
+                    },
+                    "content_token_count": {
+                        "type": "integer",
+                        "description": "The number of tokens in the content of the chunk."
+                    },
+                    "metadata_token_count": {
+                        "type": "integer",
+                        "description": "The number of tokens in the metadata of the chunk."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "ChunkMetadata",
+                "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that     will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`     is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.     Use `Chunk.metadata` for metadata that will be used in the context during inference."
+            },
+            "InsertChunksRequest": {
+                "type": "object",
+                "properties": {
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "The identifier of the vector database to insert the chunks into."
+                    },
+                    "chunks": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Chunk"
+                        },
+                        "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
+                    },
+                    "ttl_seconds": {
+                        "type": "integer",
+                        "description": "The time to live of the chunks."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "vector_db_id",
+                    "chunks"
+                ],
+                "title": "InsertChunksRequest"
+            },
+            "ProviderInfo": {
+                "type": "object",
+                "properties": {
+                    "api": {
+                        "type": "string",
+                        "description": "The API name this provider implements"
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the provider"
+                    },
+                    "provider_type": {
+                        "type": "string",
+                        "description": "The type of provider implementation"
+                    },
+                    "config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Configuration parameters for the provider"
+                    },
+                    "health": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Current health status of the provider"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "api",
+                    "provider_id",
+                    "provider_type",
+                    "config",
+                    "health"
+                ],
+                "title": "ProviderInfo",
+                "description": "Information about a registered provider including its configuration and health status."
+            },
+            "InvokeToolRequest": {
+                "type": "object",
+                "properties": {
+                    "tool_name": {
+                        "type": "string",
+                        "description": "The name of the tool to invoke."
+                    },
+                    "kwargs": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "A dictionary of arguments to pass to the tool."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "tool_name",
+                    "kwargs"
+                ],
+                "title": "InvokeToolRequest"
+            },
+            "ToolInvocationResult": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "(Optional) The output content from the tool execution"
+                    },
+                    "error_message": {
+                        "type": "string",
+                        "description": "(Optional) Error message if the tool execution failed"
+                    },
+                    "error_code": {
+                        "type": "integer",
+                        "description": "(Optional) Numeric error code if the tool execution failed"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Additional metadata about the tool execution"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "ToolInvocationResult",
+                "description": "Result of a tool invocation."
+            },
+            "PaginatedResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "The list of items for the current page"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "description": "Whether there are more items available after this set"
+                    },
+                    "url": {
+                        "type": "string",
+                        "description": "The URL for accessing this list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more"
+                ],
+                "title": "PaginatedResponse",
+                "description": "A generic paginated response that follows a simple format."
+            },
+            "Job": {
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the job"
+                    },
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "completed",
+                            "in_progress",
+                            "failed",
+                            "scheduled",
+                            "cancelled"
+                        ],
+                        "description": "Current execution status of the job"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_id",
+                    "status"
+                ],
+                "title": "Job",
+                "description": "A job execution instance with status tracking."
+            },
+            "ListBenchmarksResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Benchmark"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListBenchmarksResponse"
+            },
+            "Order": {
+                "type": "string",
+                "enum": [
+                    "asc",
+                    "desc"
+                ],
+                "title": "Order",
+                "description": "Sort order for paginated responses."
+            },
+            "ListOpenAIChatCompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "id": {
+                                    "type": "string",
+                                    "description": "The ID of the chat completion"
+                                },
+                                "choices": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/OpenAIChoice"
+                                    },
+                                    "description": "List of choices"
+                                },
+                                "object": {
+                                    "type": "string",
+                                    "const": "chat.completion",
+                                    "default": "chat.completion",
+                                    "description": "The object type, which will be \"chat.completion\""
+                                },
+                                "created": {
+                                    "type": "integer",
+                                    "description": "The Unix timestamp in seconds when the chat completion was created"
+                                },
+                                "model": {
+                                    "type": "string",
+                                    "description": "The model that was used to generate the chat completion"
+                                },
+                                "input_messages": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/OpenAIMessageParam"
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "id",
+                                "choices",
+                                "object",
+                                "created",
+                                "model",
+                                "input_messages"
+                            ],
+                            "title": "OpenAICompletionWithInputMessages"
+                        },
+                        "description": "List of chat completion objects with their input messages"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "description": "Whether there are more completions available beyond this list"
+                    },
+                    "first_id": {
+                        "type": "string",
+                        "description": "ID of the first completion in this list"
+                    },
+                    "last_id": {
+                        "type": "string",
+                        "description": "ID of the last completion in this list"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list",
+                        "description": "Must be \"list\" to identify this as a list response"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more",
+                    "first_id",
+                    "last_id",
+                    "object"
+                ],
+                "title": "ListOpenAIChatCompletionResponse",
+                "description": "Response from listing OpenAI-compatible chat completions."
+            },
+            "ListDatasetsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Dataset"
+                        },
+                        "description": "List of datasets"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListDatasetsResponse",
+                "description": "Response from listing datasets."
+            },
+            "ListModelsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Model"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListModelsResponse"
+            },
+            "ListOpenAIResponseInputItem": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        },
+                        "description": "List of input items"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list",
+                        "description": "Object type identifier, always \"list\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseInputItem",
+                "description": "List container for OpenAI response input items."
+            },
+            "ListOpenAIResponseObject": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseObjectWithInput"
+                        },
+                        "description": "List of response objects with their input context"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "description": "Whether there are more results available beyond this page"
+                    },
+                    "first_id": {
+                        "type": "string",
+                        "description": "Identifier of the first item in this page"
+                    },
+                    "last_id": {
+                        "type": "string",
+                        "description": "Identifier of the last item in this page"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list",
+                        "description": "Object type identifier, always \"list\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more",
+                    "first_id",
+                    "last_id",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseObject",
+                "description": "Paginated list of OpenAI response objects with navigation metadata."
+            },
+            "OpenAIResponseObjectWithInput": {
+                "type": "object",
+                "properties": {
+                    "created_at": {
+                        "type": "integer",
+                        "description": "Unix timestamp when the response was created"
+                    },
+                    "error": {
+                        "$ref": "#/components/schemas/OpenAIResponseError",
+                        "description": "(Optional) Error details if the response generation failed"
+                    },
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier for this response"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "Model identifier used for generation"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "response",
+                        "default": "response",
+                        "description": "Object type identifier, always \"response\""
+                    },
+                    "output": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseOutput"
+                        },
+                        "description": "List of generated output items (messages, tool calls, etc.)"
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "Whether tool calls can be executed in parallel"
+                    },
+                    "previous_response_id": {
+                        "type": "string",
+                        "description": "(Optional) ID of the previous response in a conversation"
+                    },
+                    "status": {
+                        "type": "string",
+                        "description": "Current status of the response generation"
+                    },
+                    "temperature": {
+                        "type": "number",
+                        "description": "(Optional) Sampling temperature used for generation"
+                    },
+                    "text": {
+                        "$ref": "#/components/schemas/OpenAIResponseText",
+                        "description": "Text formatting configuration for the response"
+                    },
+                    "top_p": {
+                        "type": "number",
+                        "description": "(Optional) Nucleus sampling parameter used for generation"
+                    },
+                    "truncation": {
+                        "type": "string",
+                        "description": "(Optional) Truncation strategy applied to the response"
+                    },
+                    "user": {
+                        "type": "string",
+                        "description": "(Optional) User identifier associated with the request"
+                    },
+                    "input": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        },
+                        "description": "List of input items that led to this response"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "created_at",
+                    "id",
+                    "model",
+                    "object",
+                    "output",
+                    "parallel_tool_calls",
+                    "status",
+                    "text",
+                    "input"
+                ],
+                "title": "OpenAIResponseObjectWithInput",
+                "description": "OpenAI response object extended with input context information."
+            },
+            "ListPromptsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Prompt"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListPromptsResponse",
+                "description": "Response model to list prompts."
+            },
+            "ListProvidersResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ProviderInfo"
+                        },
+                        "description": "List of provider information objects"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListProvidersResponse",
+                "description": "Response containing a list of all available providers."
+            },
+            "RouteInfo": {
+                "type": "object",
+                "properties": {
+                    "route": {
+                        "type": "string",
+                        "description": "The API endpoint path"
+                    },
+                    "method": {
+                        "type": "string",
+                        "description": "HTTP method for the route"
+                    },
+                    "provider_types": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of provider types that implement this route"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "route",
+                    "method",
+                    "provider_types"
+                ],
+                "title": "RouteInfo",
+                "description": "Information about an API route including its path, method, and implementing providers."
+            },
+            "ListRoutesResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RouteInfo"
+                        },
+                        "description": "List of available route information objects"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListRoutesResponse",
+                "description": "Response containing a list of all available API routes."
+            },
+            "ListToolDefsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolDef"
+                        },
+                        "description": "List of tool definitions"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListToolDefsResponse",
+                "description": "Response containing a list of tool definitions."
+            },
+            "ListScoringFunctionsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ScoringFn"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListScoringFunctionsResponse"
+            },
+            "ListShieldsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Shield"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListShieldsResponse"
+            },
+            "ListToolGroupsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolGroup"
+                        },
+                        "description": "List of tool groups"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListToolGroupsResponse",
+                "description": "Response containing a list of tool groups."
+            },
+            "ListToolsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Tool"
+                        },
+                        "description": "List of tools"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListToolsResponse",
+                "description": "Response containing a list of tools."
+            },
+            "ListVectorDBsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/VectorDB"
+                        },
+                        "description": "List of vector databases"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListVectorDBsResponse",
+                "description": "Response from listing vector databases."
+            },
+            "Event": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/UnstructuredLogEvent"
+                    },
+                    {
+                        "$ref": "#/components/schemas/MetricEvent"
+                    },
+                    {
+                        "$ref": "#/components/schemas/StructuredLogEvent"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "unstructured_log": "#/components/schemas/UnstructuredLogEvent",
+                        "metric": "#/components/schemas/MetricEvent",
+                        "structured_log": "#/components/schemas/StructuredLogEvent"
+                    }
+                }
+            },
+            "EventType": {
+                "type": "string",
+                "enum": [
+                    "unstructured_log",
+                    "structured_log",
+                    "metric"
+                ],
+                "title": "EventType",
+                "description": "The type of telemetry event being logged."
+            },
+            "LogSeverity": {
+                "type": "string",
+                "enum": [
+                    "verbose",
+                    "debug",
+                    "info",
+                    "warn",
+                    "error",
+                    "critical"
+                ],
+                "title": "LogSeverity",
+                "description": "The severity level of a log message."
+            },
+            "MetricEvent": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this event belongs to"
+                    },
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span this event belongs to"
+                    },
+                    "timestamp": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the event occurred"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "integer"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
+                    },
+                    "type": {
+                        "$ref": "#/components/schemas/EventType",
+                        "const": "metric",
+                        "default": "metric",
+                        "description": "Event type identifier set to METRIC"
+                    },
+                    "metric": {
+                        "type": "string",
+                        "description": "The name of the metric being measured"
+                    },
+                    "value": {
+                        "oneOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "number"
+                            }
+                        ],
+                        "description": "The numeric value of the metric measurement"
+                    },
+                    "unit": {
+                        "type": "string",
+                        "description": "The unit of measurement for the metric value"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "span_id",
+                    "timestamp",
+                    "type",
+                    "metric",
+                    "value",
+                    "unit"
+                ],
+                "title": "MetricEvent",
+                "description": "A metric event containing a measured value."
+            },
+            "SpanEndPayload": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "$ref": "#/components/schemas/StructuredLogType",
+                        "const": "span_end",
+                        "default": "span_end",
+                        "description": "Payload type identifier set to SPAN_END"
+                    },
+                    "status": {
+                        "$ref": "#/components/schemas/SpanStatus",
+                        "description": "The final status of the span indicating success or failure"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "status"
+                ],
+                "title": "SpanEndPayload",
+                "description": "Payload for a span end event."
+            },
+            "SpanStartPayload": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "$ref": "#/components/schemas/StructuredLogType",
+                        "const": "span_start",
+                        "default": "span_start",
+                        "description": "Payload type identifier set to SPAN_START"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Human-readable name describing the operation this span represents"
+                    },
+                    "parent_span_id": {
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "name"
+                ],
+                "title": "SpanStartPayload",
+                "description": "Payload for a span start event."
+            },
+            "StructuredLogEvent": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this event belongs to"
+                    },
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span this event belongs to"
+                    },
+                    "timestamp": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the event occurred"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "integer"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
+                    },
+                    "type": {
+                        "$ref": "#/components/schemas/EventType",
+                        "const": "structured_log",
+                        "default": "structured_log",
+                        "description": "Event type identifier set to STRUCTURED_LOG"
+                    },
+                    "payload": {
+                        "$ref": "#/components/schemas/StructuredLogPayload",
+                        "description": "The structured payload data for the log event"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "span_id",
+                    "timestamp",
+                    "type",
+                    "payload"
+                ],
+                "title": "StructuredLogEvent",
+                "description": "A structured log event containing typed payload data."
+            },
+            "StructuredLogPayload": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/SpanStartPayload"
+                    },
+                    {
+                        "$ref": "#/components/schemas/SpanEndPayload"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "span_start": "#/components/schemas/SpanStartPayload",
+                        "span_end": "#/components/schemas/SpanEndPayload"
+                    }
+                }
+            },
+            "StructuredLogType": {
+                "type": "string",
+                "enum": [
+                    "span_start",
+                    "span_end"
+                ],
+                "title": "StructuredLogType",
+                "description": "The type of structured log event payload."
+            },
+            "UnstructuredLogEvent": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this event belongs to"
+                    },
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span this event belongs to"
+                    },
+                    "timestamp": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the event occurred"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "integer"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
+                    },
+                    "type": {
+                        "$ref": "#/components/schemas/EventType",
+                        "const": "unstructured_log",
+                        "default": "unstructured_log",
+                        "description": "Event type identifier set to UNSTRUCTURED_LOG"
+                    },
+                    "message": {
+                        "type": "string",
+                        "description": "The log message text"
+                    },
+                    "severity": {
+                        "$ref": "#/components/schemas/LogSeverity",
+                        "description": "The severity level of the log message"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "span_id",
+                    "timestamp",
+                    "type",
+                    "message",
+                    "severity"
+                ],
+                "title": "UnstructuredLogEvent",
+                "description": "An unstructured log event containing a simple text message."
+            },
+            "LogEventRequest": {
+                "type": "object",
+                "properties": {
+                    "event": {
+                        "$ref": "#/components/schemas/Event",
+                        "description": "The event to log."
+                    },
+                    "ttl_seconds": {
+                        "type": "integer",
+                        "description": "The time to live of the event."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "event",
+                    "ttl_seconds"
+                ],
+                "title": "LogEventRequest"
+            },
+            "VectorStoreChunkingStrategy": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto"
+                    },
+                    {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "auto": "#/components/schemas/VectorStoreChunkingStrategyAuto",
+                        "static": "#/components/schemas/VectorStoreChunkingStrategyStatic"
+                    }
+                }
+            },
+            "VectorStoreChunkingStrategyAuto": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "auto",
+                        "default": "auto",
+                        "description": "Strategy type, always \"auto\" for automatic chunking"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "VectorStoreChunkingStrategyAuto",
+                "description": "Automatic chunking strategy for vector store files."
+            },
+            "VectorStoreChunkingStrategyStatic": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "static",
+                        "default": "static",
+                        "description": "Strategy type, always \"static\" for static chunking"
+                    },
+                    "static": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyStaticConfig",
+                        "description": "Configuration parameters for the static chunking strategy"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "static"
+                ],
+                "title": "VectorStoreChunkingStrategyStatic",
+                "description": "Static chunking strategy with configurable parameters."
+            },
+            "VectorStoreChunkingStrategyStaticConfig": {
+                "type": "object",
+                "properties": {
+                    "chunk_overlap_tokens": {
+                        "type": "integer",
+                        "default": 400,
+                        "description": "Number of tokens to overlap between adjacent chunks"
+                    },
+                    "max_chunk_size_tokens": {
+                        "type": "integer",
+                        "default": 800,
+                        "description": "Maximum number of tokens per chunk, must be between 100 and 4096"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "chunk_overlap_tokens",
+                    "max_chunk_size_tokens"
+                ],
+                "title": "VectorStoreChunkingStrategyStaticConfig",
+                "description": "Configuration for static chunking strategy."
+            },
+            "OpenaiAttachFileToVectorStoreRequest": {
+                "type": "object",
+                "properties": {
+                    "file_id": {
+                        "type": "string",
+                        "description": "The ID of the file to attach to the vector store."
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The key-value attributes stored with the file, which can be used for filtering."
+                    },
+                    "chunking_strategy": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
+                        "description": "The chunking strategy to use for the file."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "file_id"
+                ],
+                "title": "OpenaiAttachFileToVectorStoreRequest"
+            },
+            "VectorStoreFileLastError": {
+                "type": "object",
+                "properties": {
+                    "code": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "server_error"
+                            },
+                            {
+                                "type": "string",
+                                "const": "rate_limit_exceeded"
+                            }
+                        ],
+                        "description": "Error code indicating the type of failure"
+                    },
+                    "message": {
+                        "type": "string",
+                        "description": "Human-readable error message describing the failure"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "code",
+                    "message"
+                ],
+                "title": "VectorStoreFileLastError",
+                "description": "Error information for failed vector store file processing."
+            },
+            "VectorStoreFileObject": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier for the file"
+                    },
+                    "object": {
+                        "type": "string",
+                        "default": "vector_store.file",
+                        "description": "Object type identifier, always \"vector_store.file\""
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Key-value attributes associated with the file"
+                    },
+                    "chunking_strategy": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
+                        "description": "Strategy used for splitting the file into chunks"
+                    },
+                    "created_at": {
+                        "type": "integer",
+                        "description": "Timestamp when the file was added to the vector store"
+                    },
+                    "last_error": {
+                        "$ref": "#/components/schemas/VectorStoreFileLastError",
+                        "description": "(Optional) Error information if file processing failed"
+                    },
+                    "status": {
+                        "$ref": "#/components/schemas/VectorStoreFileStatus",
+                        "description": "Current processing status of the file"
+                    },
+                    "usage_bytes": {
+                        "type": "integer",
+                        "default": 0,
+                        "description": "Storage space used by this file in bytes"
+                    },
+                    "vector_store_id": {
+                        "type": "string",
+                        "description": "ID of the vector store containing this file"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "object",
+                    "attributes",
+                    "chunking_strategy",
+                    "created_at",
+                    "status",
+                    "usage_bytes",
+                    "vector_store_id"
+                ],
+                "title": "VectorStoreFileObject",
+                "description": "OpenAI Vector Store File object."
+            },
+            "VectorStoreFileStatus": {
+                "oneOf": [
+                    {
+                        "type": "string",
+                        "const": "completed"
+                    },
+                    {
+                        "type": "string",
+                        "const": "in_progress"
+                    },
+                    {
+                        "type": "string",
+                        "const": "cancelled"
+                    },
+                    {
+                        "type": "string",
+                        "const": "failed"
+                    }
+                ]
+            },
             "OpenAIJSONSchema": {
                 "type": "object",
                 "properties": {
@@ -12782,6 +15610,2170 @@
                 "title": "VectorStoreSearchResponsePage",
                 "description": "Paginated response from searching a vector store."
             },
+<<<<<<< HEAD
+=======
+            "OpenaiUpdateVectorStoreRequest": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "The name of the vector store."
+                    },
+                    "expires_after": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The expiration policy for a vector store."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Set of 16 key-value pairs that can be attached to an object."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenaiUpdateVectorStoreRequest"
+            },
+            "OpenaiUpdateVectorStoreFileRequest": {
+                "type": "object",
+                "properties": {
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The updated key-value attributes to store with the file."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "attributes"
+                ],
+                "title": "OpenaiUpdateVectorStoreFileRequest"
+            },
+            "DPOAlignmentConfig": {
+                "type": "object",
+                "properties": {
+                    "beta": {
+                        "type": "number",
+                        "description": "Temperature parameter for the DPO loss"
+                    },
+                    "loss_type": {
+                        "$ref": "#/components/schemas/DPOLossType",
+                        "default": "sigmoid",
+                        "description": "The type of loss function to use for DPO"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "beta",
+                    "loss_type"
+                ],
+                "title": "DPOAlignmentConfig",
+                "description": "Configuration for Direct Preference Optimization (DPO) alignment."
+            },
+            "DPOLossType": {
+                "type": "string",
+                "enum": [
+                    "sigmoid",
+                    "hinge",
+                    "ipo",
+                    "kto_pair"
+                ],
+                "title": "DPOLossType"
+            },
+            "DataConfig": {
+                "type": "object",
+                "properties": {
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the training dataset"
+                    },
+                    "batch_size": {
+                        "type": "integer",
+                        "description": "Number of samples per training batch"
+                    },
+                    "shuffle": {
+                        "type": "boolean",
+                        "description": "Whether to shuffle the dataset during training"
+                    },
+                    "data_format": {
+                        "$ref": "#/components/schemas/DatasetFormat",
+                        "description": "Format of the dataset (instruct or dialog)"
+                    },
+                    "validation_dataset_id": {
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the validation dataset"
+                    },
+                    "packed": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency"
+                    },
+                    "train_on_input": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to compute loss on input tokens as well as output tokens"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dataset_id",
+                    "batch_size",
+                    "shuffle",
+                    "data_format"
+                ],
+                "title": "DataConfig",
+                "description": "Configuration for training data and data loading."
+            },
+            "DatasetFormat": {
+                "type": "string",
+                "enum": [
+                    "instruct",
+                    "dialog"
+                ],
+                "title": "DatasetFormat",
+                "description": "Format of the training dataset."
+            },
+            "EfficiencyConfig": {
+                "type": "object",
+                "properties": {
+                    "enable_activation_checkpointing": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to use activation checkpointing to reduce memory usage"
+                    },
+                    "enable_activation_offloading": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to offload activations to CPU to save GPU memory"
+                    },
+                    "memory_efficient_fsdp_wrap": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to use memory-efficient FSDP wrapping"
+                    },
+                    "fsdp_cpu_offload": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to offload FSDP parameters to CPU"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "EfficiencyConfig",
+                "description": "Configuration for memory and compute efficiency optimizations."
+            },
+            "OptimizerConfig": {
+                "type": "object",
+                "properties": {
+                    "optimizer_type": {
+                        "$ref": "#/components/schemas/OptimizerType",
+                        "description": "Type of optimizer to use (adam, adamw, or sgd)"
+                    },
+                    "lr": {
+                        "type": "number",
+                        "description": "Learning rate for the optimizer"
+                    },
+                    "weight_decay": {
+                        "type": "number",
+                        "description": "Weight decay coefficient for regularization"
+                    },
+                    "num_warmup_steps": {
+                        "type": "integer",
+                        "description": "Number of steps for learning rate warmup"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "optimizer_type",
+                    "lr",
+                    "weight_decay",
+                    "num_warmup_steps"
+                ],
+                "title": "OptimizerConfig",
+                "description": "Configuration parameters for the optimization algorithm."
+            },
+            "OptimizerType": {
+                "type": "string",
+                "enum": [
+                    "adam",
+                    "adamw",
+                    "sgd"
+                ],
+                "title": "OptimizerType",
+                "description": "Available optimizer algorithms for training."
+            },
+            "TrainingConfig": {
+                "type": "object",
+                "properties": {
+                    "n_epochs": {
+                        "type": "integer",
+                        "description": "Number of training epochs to run"
+                    },
+                    "max_steps_per_epoch": {
+                        "type": "integer",
+                        "default": 1,
+                        "description": "Maximum number of steps to run per epoch"
+                    },
+                    "gradient_accumulation_steps": {
+                        "type": "integer",
+                        "default": 1,
+                        "description": "Number of steps to accumulate gradients before updating"
+                    },
+                    "max_validation_steps": {
+                        "type": "integer",
+                        "default": 1,
+                        "description": "(Optional) Maximum number of validation steps per epoch"
+                    },
+                    "data_config": {
+                        "$ref": "#/components/schemas/DataConfig",
+                        "description": "(Optional) Configuration for data loading and formatting"
+                    },
+                    "optimizer_config": {
+                        "$ref": "#/components/schemas/OptimizerConfig",
+                        "description": "(Optional) Configuration for the optimization algorithm"
+                    },
+                    "efficiency_config": {
+                        "$ref": "#/components/schemas/EfficiencyConfig",
+                        "description": "(Optional) Configuration for memory and compute optimizations"
+                    },
+                    "dtype": {
+                        "type": "string",
+                        "default": "bf16",
+                        "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "n_epochs",
+                    "max_steps_per_epoch",
+                    "gradient_accumulation_steps"
+                ],
+                "title": "TrainingConfig",
+                "description": "Comprehensive configuration for the training process."
+            },
+            "PreferenceOptimizeRequest": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string",
+                        "description": "The UUID of the job to create."
+                    },
+                    "finetuned_model": {
+                        "type": "string",
+                        "description": "The model to fine-tune."
+                    },
+                    "algorithm_config": {
+                        "$ref": "#/components/schemas/DPOAlignmentConfig",
+                        "description": "The algorithm configuration."
+                    },
+                    "training_config": {
+                        "$ref": "#/components/schemas/TrainingConfig",
+                        "description": "The training configuration."
+                    },
+                    "hyperparam_search_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The hyperparam search configuration."
+                    },
+                    "logger_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The logger configuration."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "finetuned_model",
+                    "algorithm_config",
+                    "training_config",
+                    "hyperparam_search_config",
+                    "logger_config"
+                ],
+                "title": "PreferenceOptimizeRequest"
+            },
+            "PostTrainingJob": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid"
+                ],
+                "title": "PostTrainingJob"
+            },
+            "DefaultRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "default",
+                        "default": "default",
+                        "description": "Type of query generator, always 'default'"
+                    },
+                    "separator": {
+                        "type": "string",
+                        "default": " ",
+                        "description": "String separator used to join query terms"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "separator"
+                ],
+                "title": "DefaultRAGQueryGeneratorConfig",
+                "description": "Configuration for the default RAG query generator."
+            },
+            "LLMRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "llm",
+                        "default": "llm",
+                        "description": "Type of query generator, always 'llm'"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "Name of the language model to use for query generation"
+                    },
+                    "template": {
+                        "type": "string",
+                        "description": "Template string for formatting the query generation prompt"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "model",
+                    "template"
+                ],
+                "title": "LLMRAGQueryGeneratorConfig",
+                "description": "Configuration for the LLM-based RAG query generator."
+            },
+            "RAGQueryConfig": {
+                "type": "object",
+                "properties": {
+                    "query_generator_config": {
+                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig",
+                        "description": "Configuration for the query generator."
+                    },
+                    "max_tokens_in_context": {
+                        "type": "integer",
+                        "default": 4096,
+                        "description": "Maximum number of tokens in the context."
+                    },
+                    "max_chunks": {
+                        "type": "integer",
+                        "default": 5,
+                        "description": "Maximum number of chunks to retrieve."
+                    },
+                    "chunk_template": {
+                        "type": "string",
+                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
+                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
+                    },
+                    "mode": {
+                        "$ref": "#/components/schemas/RAGSearchMode",
+                        "default": "vector",
+                        "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
+                    },
+                    "ranker": {
+                        "$ref": "#/components/schemas/Ranker",
+                        "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "query_generator_config",
+                    "max_tokens_in_context",
+                    "max_chunks",
+                    "chunk_template"
+                ],
+                "title": "RAGQueryConfig",
+                "description": "Configuration for the RAG query generation."
+            },
+            "RAGQueryGeneratorConfig": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
+                    },
+                    {
+                        "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
+                        "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                    }
+                }
+            },
+            "RAGSearchMode": {
+                "type": "string",
+                "enum": [
+                    "vector",
+                    "keyword",
+                    "hybrid"
+                ],
+                "title": "RAGSearchMode",
+                "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
+            },
+            "RRFRanker": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "rrf",
+                        "default": "rrf",
+                        "description": "The type of ranker, always \"rrf\""
+                    },
+                    "impact_factor": {
+                        "type": "number",
+                        "default": 60.0,
+                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "impact_factor"
+                ],
+                "title": "RRFRanker",
+                "description": "Reciprocal Rank Fusion (RRF) ranker configuration."
+            },
+            "Ranker": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/RRFRanker"
+                    },
+                    {
+                        "$ref": "#/components/schemas/WeightedRanker"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "rrf": "#/components/schemas/RRFRanker",
+                        "weighted": "#/components/schemas/WeightedRanker"
+                    }
+                }
+            },
+            "WeightedRanker": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "weighted",
+                        "default": "weighted",
+                        "description": "The type of ranker, always \"weighted\""
+                    },
+                    "alpha": {
+                        "type": "number",
+                        "default": 0.5,
+                        "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "alpha"
+                ],
+                "title": "WeightedRanker",
+                "description": "Weighted ranker configuration that combines vector and keyword scores."
+            },
+            "QueryRequest": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The query content to search for in the indexed documents"
+                    },
+                    "vector_db_ids": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of vector database IDs to search within"
+                    },
+                    "query_config": {
+                        "$ref": "#/components/schemas/RAGQueryConfig",
+                        "description": "(Optional) Configuration parameters for the query operation"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "vector_db_ids"
+                ],
+                "title": "QueryRequest"
+            },
+            "RAGQueryResult": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "(Optional) The retrieved content from the query"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata about the query result"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "metadata"
+                ],
+                "title": "RAGQueryResult",
+                "description": "Result of a RAG query containing retrieved content and metadata."
+            },
+            "QueryChunksRequest": {
+                "type": "object",
+                "properties": {
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "The identifier of the vector database to query."
+                    },
+                    "query": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The query to search for."
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The parameters of the query."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "vector_db_id",
+                    "query"
+                ],
+                "title": "QueryChunksRequest"
+            },
+            "QueryChunksResponse": {
+                "type": "object",
+                "properties": {
+                    "chunks": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Chunk"
+                        },
+                        "description": "List of content chunks returned from the query"
+                    },
+                    "scores": {
+                        "type": "array",
+                        "items": {
+                            "type": "number"
+                        },
+                        "description": "Relevance scores corresponding to each returned chunk"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "chunks",
+                    "scores"
+                ],
+                "title": "QueryChunksResponse",
+                "description": "Response from querying chunks in a vector database."
+            },
+            "QueryMetricsRequest": {
+                "type": "object",
+                "properties": {
+                    "start_time": {
+                        "type": "integer",
+                        "description": "The start time of the metric to query."
+                    },
+                    "end_time": {
+                        "type": "integer",
+                        "description": "The end time of the metric to query."
+                    },
+                    "granularity": {
+                        "type": "string",
+                        "description": "The granularity of the metric to query."
+                    },
+                    "query_type": {
+                        "type": "string",
+                        "enum": [
+                            "range",
+                            "instant"
+                        ],
+                        "description": "The type of query to perform."
+                    },
+                    "label_matchers": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "name": {
+                                    "type": "string",
+                                    "description": "The name of the label to match"
+                                },
+                                "value": {
+                                    "type": "string",
+                                    "description": "The value to match against"
+                                },
+                                "operator": {
+                                    "type": "string",
+                                    "enum": [
+                                        "=",
+                                        "!=",
+                                        "=~",
+                                        "!~"
+                                    ],
+                                    "description": "The comparison operator to use for matching",
+                                    "default": "="
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "name",
+                                "value",
+                                "operator"
+                            ],
+                            "title": "MetricLabelMatcher",
+                            "description": "A matcher for filtering metrics by label values."
+                        },
+                        "description": "The label matchers to apply to the metric."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "start_time",
+                    "query_type"
+                ],
+                "title": "QueryMetricsRequest"
+            },
+            "MetricDataPoint": {
+                "type": "object",
+                "properties": {
+                    "timestamp": {
+                        "type": "integer",
+                        "description": "Unix timestamp when the metric value was recorded"
+                    },
+                    "value": {
+                        "type": "number",
+                        "description": "The numeric value of the metric at this timestamp"
+                    },
+                    "unit": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "timestamp",
+                    "value",
+                    "unit"
+                ],
+                "title": "MetricDataPoint",
+                "description": "A single data point in a metric time series."
+            },
+            "MetricLabel": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "The name of the label"
+                    },
+                    "value": {
+                        "type": "string",
+                        "description": "The value of the label"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "name",
+                    "value"
+                ],
+                "title": "MetricLabel",
+                "description": "A label associated with a metric."
+            },
+            "MetricSeries": {
+                "type": "object",
+                "properties": {
+                    "metric": {
+                        "type": "string",
+                        "description": "The name of the metric"
+                    },
+                    "labels": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricLabel"
+                        },
+                        "description": "List of labels associated with this metric series"
+                    },
+                    "values": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricDataPoint"
+                        },
+                        "description": "List of data points in chronological order"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "metric",
+                    "labels",
+                    "values"
+                ],
+                "title": "MetricSeries",
+                "description": "A time series of metric data points."
+            },
+            "QueryMetricsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricSeries"
+                        },
+                        "description": "List of metric series matching the query criteria"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QueryMetricsResponse",
+                "description": "Response containing metric time series data."
+            },
+            "QueryCondition": {
+                "type": "object",
+                "properties": {
+                    "key": {
+                        "type": "string",
+                        "description": "The attribute key to filter on"
+                    },
+                    "op": {
+                        "$ref": "#/components/schemas/QueryConditionOp",
+                        "description": "The comparison operator to apply"
+                    },
+                    "value": {
+                        "oneOf": [
+                            {
+                                "type": "null"
+                            },
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array"
+                            },
+                            {
+                                "type": "object"
+                            }
+                        ],
+                        "description": "The value to compare against"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "key",
+                    "op",
+                    "value"
+                ],
+                "title": "QueryCondition",
+                "description": "A condition for filtering query results."
+            },
+            "QueryConditionOp": {
+                "type": "string",
+                "enum": [
+                    "eq",
+                    "ne",
+                    "gt",
+                    "lt"
+                ],
+                "title": "QueryConditionOp",
+                "description": "Comparison operators for query conditions."
+            },
+            "QuerySpansRequest": {
+                "type": "object",
+                "properties": {
+                    "attribute_filters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/QueryCondition"
+                        },
+                        "description": "The attribute filters to apply to the spans."
+                    },
+                    "attributes_to_return": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The attributes to return in the spans."
+                    },
+                    "max_depth": {
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "attribute_filters",
+                    "attributes_to_return"
+                ],
+                "title": "QuerySpansRequest"
+            },
+            "QuerySpansResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Span"
+                        },
+                        "description": "List of spans matching the query criteria"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QuerySpansResponse",
+                "description": "Response containing a list of spans."
+            },
+            "QueryTracesRequest": {
+                "type": "object",
+                "properties": {
+                    "attribute_filters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/QueryCondition"
+                        },
+                        "description": "The attribute filters to apply to the traces."
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "The limit of traces to return."
+                    },
+                    "offset": {
+                        "type": "integer",
+                        "description": "The offset of the traces to return."
+                    },
+                    "order_by": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The order by of the traces to return."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "QueryTracesRequest"
+            },
+            "QueryTracesResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Trace"
+                        },
+                        "description": "List of traces matching the query criteria"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QueryTracesResponse",
+                "description": "Response containing a list of traces."
+            },
+            "RegisterBenchmarkRequest": {
+                "type": "object",
+                "properties": {
+                    "benchmark_id": {
+                        "type": "string",
+                        "description": "The ID of the benchmark to register."
+                    },
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "The ID of the dataset to use for the benchmark."
+                    },
+                    "scoring_functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The scoring functions to use for the benchmark."
+                    },
+                    "provider_benchmark_id": {
+                        "type": "string",
+                        "description": "The ID of the provider benchmark to use for the benchmark."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The ID of the provider to use for the benchmark."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The metadata to use for the benchmark."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "benchmark_id",
+                    "dataset_id",
+                    "scoring_functions"
+                ],
+                "title": "RegisterBenchmarkRequest"
+            },
+            "RegisterDatasetRequest": {
+                "type": "object",
+                "properties": {
+                    "purpose": {
+                        "type": "string",
+                        "enum": [
+                            "post-training/messages",
+                            "eval/question-answer",
+                            "eval/messages-answer"
+                        ],
+                        "description": "The purpose of the dataset. One of: - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }"
+                    },
+                    "source": {
+                        "$ref": "#/components/schemas/DataSource",
+                        "description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}."
+                    },
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "The ID of the dataset. If not provided, an ID will be generated."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "purpose",
+                    "source"
+                ],
+                "title": "RegisterDatasetRequest"
+            },
+            "RegisterModelRequest": {
+                "type": "object",
+                "properties": {
+                    "model_id": {
+                        "type": "string",
+                        "description": "The identifier of the model to register."
+                    },
+                    "provider_model_id": {
+                        "type": "string",
+                        "description": "The identifier of the model in the provider."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The identifier of the provider."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Any additional metadata for this model."
+                    },
+                    "model_type": {
+                        "$ref": "#/components/schemas/ModelType",
+                        "description": "The type of model to register."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model_id"
+                ],
+                "title": "RegisterModelRequest"
+            },
+            "RegisterScoringFunctionRequest": {
+                "type": "object",
+                "properties": {
+                    "scoring_fn_id": {
+                        "type": "string",
+                        "description": "The ID of the scoring function to register."
+                    },
+                    "description": {
+                        "type": "string",
+                        "description": "The description of the scoring function."
+                    },
+                    "return_type": {
+                        "$ref": "#/components/schemas/ParamType",
+                        "description": "The return type of the scoring function."
+                    },
+                    "provider_scoring_fn_id": {
+                        "type": "string",
+                        "description": "The ID of the provider scoring function to use for the scoring function."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The ID of the provider to use for the scoring function."
+                    },
+                    "params": {
+                        "$ref": "#/components/schemas/ScoringFnParams",
+                        "description": "The parameters for the scoring function for benchmark eval, these can be overridden for app eval."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "scoring_fn_id",
+                    "description",
+                    "return_type"
+                ],
+                "title": "RegisterScoringFunctionRequest"
+            },
+            "RegisterShieldRequest": {
+                "type": "object",
+                "properties": {
+                    "shield_id": {
+                        "type": "string",
+                        "description": "The identifier of the shield to register."
+                    },
+                    "provider_shield_id": {
+                        "type": "string",
+                        "description": "The identifier of the shield in the provider."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The identifier of the provider."
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The parameters of the shield."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "shield_id"
+                ],
+                "title": "RegisterShieldRequest"
+            },
+            "RegisterToolGroupRequest": {
+                "type": "object",
+                "properties": {
+                    "toolgroup_id": {
+                        "type": "string",
+                        "description": "The ID of the tool group to register."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The ID of the provider to use for the tool group."
+                    },
+                    "mcp_endpoint": {
+                        "$ref": "#/components/schemas/URL",
+                        "description": "The MCP endpoint to use for the tool group."
+                    },
+                    "args": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "A dictionary of arguments to pass to the tool group."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "toolgroup_id",
+                    "provider_id"
+                ],
+                "title": "RegisterToolGroupRequest"
+            },
+            "RegisterVectorDbRequest": {
+                "type": "object",
+                "properties": {
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "The identifier of the vector database to register."
+                    },
+                    "embedding_model": {
+                        "type": "string",
+                        "description": "The embedding model to use."
+                    },
+                    "embedding_dimension": {
+                        "type": "integer",
+                        "description": "The dimension of the embedding model."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The identifier of the provider."
+                    },
+                    "vector_db_name": {
+                        "type": "string",
+                        "description": "The name of the vector database."
+                    },
+                    "provider_vector_db_id": {
+                        "type": "string",
+                        "description": "The identifier of the vector database in the provider."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "vector_db_id",
+                    "embedding_model"
+                ],
+                "title": "RegisterVectorDbRequest"
+            },
+            "RerankRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint."
+                    },
+                    "query": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                            }
+                        ],
+                        "description": "The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length."
+                    },
+                    "items": {
+                        "type": "array",
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                                }
+                            ]
+                        },
+                        "description": "List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length."
+                    },
+                    "max_num_results": {
+                        "type": "integer",
+                        "description": "(Optional) Maximum number of results to return. Default: returns all."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "query",
+                    "items"
+                ],
+                "title": "RerankRequest"
+            },
+            "RerankData": {
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "type": "integer",
+                        "description": "The original index of the document in the input list"
+                    },
+                    "relevance_score": {
+                        "type": "number",
+                        "description": "The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "index",
+                    "relevance_score"
+                ],
+                "title": "RerankData",
+                "description": "A single rerank result from a reranking response."
+            },
+            "RerankResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RerankData"
+                        },
+                        "description": "List of rerank result objects, sorted by relevance score (descending)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "RerankResponse",
+                "description": "Response from a reranking request."
+            },
+            "ResumeAgentTurnRequest": {
+                "type": "object",
+                "properties": {
+                    "tool_responses": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolResponse"
+                        },
+                        "description": "The tool call responses to resume the turn with."
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "description": "Whether to stream the response."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "tool_responses"
+                ],
+                "title": "ResumeAgentTurnRequest"
+            },
+            "RunEvalRequest": {
+                "type": "object",
+                "properties": {
+                    "benchmark_config": {
+                        "$ref": "#/components/schemas/BenchmarkConfig",
+                        "description": "The configuration for the benchmark."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "benchmark_config"
+                ],
+                "title": "RunEvalRequest"
+            },
+            "RunModerationRequest": {
+                "type": "object",
+                "properties": {
+                    "input": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ],
+                        "description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The content moderation model you would like to use."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input",
+                    "model"
+                ],
+                "title": "RunModerationRequest"
+            },
+            "ModerationObject": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "The unique identifier for the moderation request."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model used to generate the moderation results."
+                    },
+                    "results": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ModerationObjectResults"
+                        },
+                        "description": "A list of moderation objects"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "model",
+                    "results"
+                ],
+                "title": "ModerationObject",
+                "description": "A moderation object."
+            },
+            "ModerationObjectResults": {
+                "type": "object",
+                "properties": {
+                    "flagged": {
+                        "type": "boolean",
+                        "description": "Whether any of the below categories are flagged."
+                    },
+                    "categories": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "boolean"
+                        },
+                        "description": "A list of the categories, and whether they are flagged or not."
+                    },
+                    "category_applied_input_types": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "array",
+                            "items": {
+                                "type": "string"
+                            }
+                        },
+                        "description": "A list of the categories along with the input type(s) that the score applies to."
+                    },
+                    "category_scores": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        },
+                        "description": "A list of the categories along with their scores as predicted by model."
+                    },
+                    "user_message": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "flagged",
+                    "metadata"
+                ],
+                "title": "ModerationObjectResults",
+                "description": "A moderation object."
+            },
+            "RunShieldRequest": {
+                "type": "object",
+                "properties": {
+                    "shield_id": {
+                        "type": "string",
+                        "description": "The identifier of the shield to run."
+                    },
+                    "messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Message"
+                        },
+                        "description": "The messages to run the shield on."
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The parameters of the shield."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "shield_id",
+                    "messages",
+                    "params"
+                ],
+                "title": "RunShieldRequest"
+            },
+            "RunShieldResponse": {
+                "type": "object",
+                "properties": {
+                    "violation": {
+                        "$ref": "#/components/schemas/SafetyViolation",
+                        "description": "(Optional) Safety violation detected by the shield, if any"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "RunShieldResponse",
+                "description": "Response from running a safety shield."
+            },
+            "SaveSpansToDatasetRequest": {
+                "type": "object",
+                "properties": {
+                    "attribute_filters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/QueryCondition"
+                        },
+                        "description": "The attribute filters to apply to the spans."
+                    },
+                    "attributes_to_save": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The attributes to save to the dataset."
+                    },
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "The ID of the dataset to save the spans to."
+                    },
+                    "max_depth": {
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "attribute_filters",
+                    "attributes_to_save",
+                    "dataset_id"
+                ],
+                "title": "SaveSpansToDatasetRequest"
+            },
+            "ScoreRequest": {
+                "type": "object",
+                "properties": {
+                    "input_rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "The rows to score."
+                    },
+                    "scoring_functions": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/ScoringFnParams"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "The scoring functions to use for the scoring."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input_rows",
+                    "scoring_functions"
+                ],
+                "title": "ScoreRequest"
+            },
+            "ScoreResponse": {
+                "type": "object",
+                "properties": {
+                    "results": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ScoringResult"
+                        },
+                        "description": "A map of scoring function name to ScoringResult."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "results"
+                ],
+                "title": "ScoreResponse",
+                "description": "The response from scoring."
+            },
+            "ScoreBatchRequest": {
+                "type": "object",
+                "properties": {
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "The ID of the dataset to score."
+                    },
+                    "scoring_functions": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/ScoringFnParams"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "The scoring functions to use for the scoring."
+                    },
+                    "save_results_dataset": {
+                        "type": "boolean",
+                        "description": "Whether to save the results to a dataset."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dataset_id",
+                    "scoring_functions",
+                    "save_results_dataset"
+                ],
+                "title": "ScoreBatchRequest"
+            },
+            "ScoreBatchResponse": {
+                "type": "object",
+                "properties": {
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "(Optional) The identifier of the dataset that was scored"
+                    },
+                    "results": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ScoringResult"
+                        },
+                        "description": "A map of scoring function name to ScoringResult"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "results"
+                ],
+                "title": "ScoreBatchResponse",
+                "description": "Response from batch scoring operations on datasets."
+            },
+            "SetDefaultVersionRequest": {
+                "type": "object",
+                "properties": {
+                    "version": {
+                        "type": "integer",
+                        "description": "The version to set as default."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "version"
+                ],
+                "title": "SetDefaultVersionRequest"
+            },
+            "AlgorithmConfig": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/LoraFinetuningConfig"
+                    },
+                    {
+                        "$ref": "#/components/schemas/QATFinetuningConfig"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "LoRA": "#/components/schemas/LoraFinetuningConfig",
+                        "QAT": "#/components/schemas/QATFinetuningConfig"
+                    }
+                }
+            },
+            "LoraFinetuningConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "LoRA",
+                        "default": "LoRA",
+                        "description": "Algorithm type identifier, always \"LoRA\""
+                    },
+                    "lora_attn_modules": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of attention module names to apply LoRA to"
+                    },
+                    "apply_lora_to_mlp": {
+                        "type": "boolean",
+                        "description": "Whether to apply LoRA to MLP layers"
+                    },
+                    "apply_lora_to_output": {
+                        "type": "boolean",
+                        "description": "Whether to apply LoRA to output projection layers"
+                    },
+                    "rank": {
+                        "type": "integer",
+                        "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)"
+                    },
+                    "alpha": {
+                        "type": "integer",
+                        "description": "LoRA scaling parameter that controls adaptation strength"
+                    },
+                    "use_dora": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)"
+                    },
+                    "quantize_base": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to quantize the base model weights"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "lora_attn_modules",
+                    "apply_lora_to_mlp",
+                    "apply_lora_to_output",
+                    "rank",
+                    "alpha"
+                ],
+                "title": "LoraFinetuningConfig",
+                "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning."
+            },
+            "QATFinetuningConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "QAT",
+                        "default": "QAT",
+                        "description": "Algorithm type identifier, always \"QAT\""
+                    },
+                    "quantizer_name": {
+                        "type": "string",
+                        "description": "Name of the quantization algorithm to use"
+                    },
+                    "group_size": {
+                        "type": "integer",
+                        "description": "Size of groups for grouped quantization"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "quantizer_name",
+                    "group_size"
+                ],
+                "title": "QATFinetuningConfig",
+                "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning."
+            },
+            "SupervisedFineTuneRequest": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string",
+                        "description": "The UUID of the job to create."
+                    },
+                    "training_config": {
+                        "$ref": "#/components/schemas/TrainingConfig",
+                        "description": "The training configuration."
+                    },
+                    "hyperparam_search_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The hyperparam search configuration."
+                    },
+                    "logger_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The logger configuration."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model to fine-tune."
+                    },
+                    "checkpoint_dir": {
+                        "type": "string",
+                        "description": "The directory to save checkpoint(s) to."
+                    },
+                    "algorithm_config": {
+                        "$ref": "#/components/schemas/AlgorithmConfig",
+                        "description": "The algorithm configuration."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "training_config",
+                    "hyperparam_search_config",
+                    "logger_config"
+                ],
+                "title": "SupervisedFineTuneRequest"
+            },
+            "SyntheticDataGenerateRequest": {
+                "type": "object",
+                "properties": {
+                    "dialogs": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Message"
+                        },
+                        "description": "List of conversation messages to use as input for synthetic data generation"
+                    },
+                    "filtering_function": {
+                        "type": "string",
+                        "enum": [
+                            "none",
+                            "random",
+                            "top_k",
+                            "top_p",
+                            "top_k_top_p",
+                            "sigmoid"
+                        ],
+                        "description": "Type of filtering to apply to generated synthetic data samples"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dialogs",
+                    "filtering_function"
+                ],
+                "title": "SyntheticDataGenerateRequest"
+            },
+            "SyntheticDataGenerationResponse": {
+                "type": "object",
+                "properties": {
+                    "synthetic_data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "List of generated synthetic data samples that passed the filtering criteria"
+                    },
+                    "statistics": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Statistical information about the generation process and filtering results"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "synthetic_data"
+                ],
+                "title": "SyntheticDataGenerationResponse",
+                "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
+            },
+            "UpdatePromptRequest": {
+                "type": "object",
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "The updated prompt text content."
+                    },
+                    "version": {
+                        "type": "integer",
+                        "description": "The current version of the prompt being updated."
+                    },
+                    "variables": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "Updated list of variable names that can be used in the prompt template."
+                    },
+                    "set_as_default": {
+                        "type": "boolean",
+                        "description": "Set the new version as the default (default=True)."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "prompt",
+                    "version",
+                    "set_as_default"
+                ],
+                "title": "UpdatePromptRequest"
+            },
+>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
             "VersionInfo": {
                 "type": "object",
                 "properties": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index b9e03d614..ebe142557 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -3634,6 +3634,2130 @@ components:
       title: OpenAIUserMessageParam
       description: >-
         A message from the user in an OpenAI-compatible chat completion request.
+<<<<<<< HEAD
+=======
+    OpenAICompletionWithInputMessages:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The ID of the chat completion
+        choices:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIChoice'
+          description: List of choices
+        object:
+          type: string
+          const: chat.completion
+          default: chat.completion
+          description: >-
+            The object type, which will be "chat.completion"
+        created:
+          type: integer
+          description: >-
+            The Unix timestamp in seconds when the chat completion was created
+        model:
+          type: string
+          description: >-
+            The model that was used to generate the chat completion
+        input_messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIMessageParam'
+      additionalProperties: false
+      required:
+        - id
+        - choices
+        - object
+        - created
+        - model
+        - input_messages
+      title: OpenAICompletionWithInputMessages
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    Dataset:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: dataset
+          default: dataset
+          description: >-
+            Type of resource, always 'dataset' for datasets
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            Purpose of the dataset indicating its intended use
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            Data source configuration for the dataset
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Additional metadata for the dataset
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - purpose
+        - source
+        - metadata
+      title: Dataset
+      description: >-
+        Dataset resource for storing and accessing training or evaluation data.
+    RowsDataSource:
+      type: object
+      properties:
+        type:
+          type: string
+          const: rows
+          default: rows
+        rows:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: >-
+            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
+            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
+            world!"}]} ]
+      additionalProperties: false
+      required:
+        - type
+        - rows
+      title: RowsDataSource
+      description: A dataset stored in rows.
+    URIDataSource:
+      type: object
+      properties:
+        type:
+          type: string
+          const: uri
+          default: uri
+        uri:
+          type: string
+          description: >-
+            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
+            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
+      additionalProperties: false
+      required:
+        - type
+        - uri
+      title: URIDataSource
+      description: >-
+        A dataset that can be obtained from a URI.
+    Model:
+      type: object
+      properties:
+        identifier:
+          type: string
+          description: >-
+            Unique identifier for this resource in llama stack
+        provider_resource_id:
+          type: string
+          description: >-
+            Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          description: >-
+            ID of the provider that owns this resource
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: model
+          default: model
+          description: >-
+            The resource type, always 'model' for model resources
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          default: llm
+          description: >-
+            The type of model (LLM or embedding model)
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - metadata
+        - model_type
+      title: Model
+      description: >-
+        A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
+    AgentTurnInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: agent_turn_input
+          default: agent_turn_input
+          description: >-
+            Discriminator type. Always "agent_turn_input"
+      additionalProperties: false
+      required:
+        - type
+      title: AgentTurnInputType
+      description: Parameter type for agent turn input.
+    ArrayType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: array
+          default: array
+          description: Discriminator type. Always "array"
+      additionalProperties: false
+      required:
+        - type
+      title: ArrayType
+      description: Parameter type for array values.
+    BooleanType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: boolean
+          default: boolean
+          description: Discriminator type. Always "boolean"
+      additionalProperties: false
+      required:
+        - type
+      title: BooleanType
+      description: Parameter type for boolean values.
+    ChatCompletionInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: chat_completion_input
+          default: chat_completion_input
+          description: >-
+            Discriminator type. Always "chat_completion_input"
+      additionalProperties: false
+      required:
+        - type
+      title: ChatCompletionInputType
+      description: >-
+        Parameter type for chat completion input.
+    CompletionInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: completion_input
+          default: completion_input
+          description: >-
+            Discriminator type. Always "completion_input"
+      additionalProperties: false
+      required:
+        - type
+      title: CompletionInputType
+      description: Parameter type for completion input.
+    JsonType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: json
+          default: json
+          description: Discriminator type. Always "json"
+      additionalProperties: false
+      required:
+        - type
+      title: JsonType
+      description: Parameter type for JSON values.
+    NumberType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: number
+          default: number
+          description: Discriminator type. Always "number"
+      additionalProperties: false
+      required:
+        - type
+      title: NumberType
+      description: Parameter type for numeric values.
+    ObjectType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: object
+          default: object
+          description: Discriminator type. Always "object"
+      additionalProperties: false
+      required:
+        - type
+      title: ObjectType
+      description: Parameter type for object values.
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+        - $ref: '#/components/schemas/AgentTurnInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+          agent_turn_input: '#/components/schemas/AgentTurnInputType'
+    ScoringFn:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: scoring_function
+          default: scoring_function
+          description: >-
+            The resource type, always scoring_function
+        description:
+          type: string
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - metadata
+        - return_type
+      title: ScoringFn
+      description: >-
+        A scoring function resource for evaluating model outputs.
+    StringType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: string
+          default: string
+          description: Discriminator type. Always "string"
+      additionalProperties: false
+      required:
+        - type
+      title: StringType
+      description: Parameter type for string values.
+    UnionType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: union
+          default: union
+          description: Discriminator type. Always "union"
+      additionalProperties: false
+      required:
+        - type
+      title: UnionType
+      description: Parameter type for union values.
+    Shield:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: shield
+          default: shield
+          description: The resource type, always shield
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Configuration parameters for the shield
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+      title: Shield
+      description: >-
+        A safety shield resource that can be used to check content.
+    Span:
+      type: object
+      properties:
+        span_id:
+          type: string
+          description: Unique identifier for the span
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this span belongs to
+        parent_span_id:
+          type: string
+          description: >-
+            (Optional) Unique identifier for the parent span, if this is a child span
+        name:
+          type: string
+          description: >-
+            Human-readable name describing the operation this span represents
+        start_time:
+          type: string
+          format: date-time
+          description: Timestamp when the operation began
+        end_time:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the operation finished, if completed
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the span
+      additionalProperties: false
+      required:
+        - span_id
+        - trace_id
+        - name
+        - start_time
+      title: Span
+      description: >-
+        A span representing a single operation within a trace.
+    GetSpanTreeRequest:
+      type: object
+      properties:
+        attributes_to_return:
+          type: array
+          items:
+            type: string
+          description: The attributes to return in the tree.
+        max_depth:
+          type: integer
+          description: The maximum depth of the tree.
+      additionalProperties: false
+      title: GetSpanTreeRequest
+    SpanStatus:
+      type: string
+      enum:
+        - ok
+        - error
+      title: SpanStatus
+      description: >-
+        The status of a span indicating whether it completed successfully or with
+        an error.
+    SpanWithStatus:
+      type: object
+      properties:
+        span_id:
+          type: string
+          description: Unique identifier for the span
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this span belongs to
+        parent_span_id:
+          type: string
+          description: >-
+            (Optional) Unique identifier for the parent span, if this is a child span
+        name:
+          type: string
+          description: >-
+            Human-readable name describing the operation this span represents
+        start_time:
+          type: string
+          format: date-time
+          description: Timestamp when the operation began
+        end_time:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the operation finished, if completed
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the span
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+          description: >-
+            (Optional) The current status of the span
+      additionalProperties: false
+      required:
+        - span_id
+        - trace_id
+        - name
+        - start_time
+      title: SpanWithStatus
+      description: A span that includes status information.
+    QuerySpanTreeResponse:
+      type: object
+      properties:
+        data:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/SpanWithStatus'
+          description: >-
+            Dictionary mapping span IDs to spans with status information
+      additionalProperties: false
+      required:
+        - data
+      title: QuerySpanTreeResponse
+      description: >-
+        Response containing a tree structure of spans.
+    Tool:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: tool
+          default: tool
+          description: Type of resource, always 'tool'
+        toolgroup_id:
+          type: string
+          description: >-
+            ID of the tool group this tool belongs to
+        description:
+          type: string
+          description: >-
+            Human-readable description of what the tool does
+        parameters:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolParameter'
+          description: List of parameters this tool accepts
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Additional metadata about the tool
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - toolgroup_id
+        - description
+        - parameters
+      title: Tool
+      description: A tool that can be invoked by agents.
+    ToolGroup:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: tool_group
+          default: tool_group
+          description: Type of resource, always 'tool_group'
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            (Optional) Model Context Protocol endpoint for remote tools
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Additional arguments for the tool group
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+      title: ToolGroup
+      description: >-
+        A group of related tools managed together.
+    Trace:
+      type: object
+      properties:
+        trace_id:
+          type: string
+          description: Unique identifier for the trace
+        root_span_id:
+          type: string
+          description: >-
+            Unique identifier for the root span that started this trace
+        start_time:
+          type: string
+          format: date-time
+          description: Timestamp when the trace began
+        end_time:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the trace finished, if completed
+      additionalProperties: false
+      required:
+        - trace_id
+        - root_span_id
+        - start_time
+      title: Trace
+      description: >-
+        A trace representing the complete execution path of a request across multiple
+        operations.
+    Checkpoint:
+      type: object
+      properties:
+        identifier:
+          type: string
+          description: Unique identifier for the checkpoint
+        created_at:
+          type: string
+          format: date-time
+          description: >-
+            Timestamp when the checkpoint was created
+        epoch:
+          type: integer
+          description: >-
+            Training epoch when the checkpoint was saved
+        post_training_job_id:
+          type: string
+          description: >-
+            Identifier of the training job that created this checkpoint
+        path:
+          type: string
+          description: >-
+            File system path where the checkpoint is stored
+        training_metrics:
+          $ref: '#/components/schemas/PostTrainingMetric'
+          description: >-
+            (Optional) Training metrics associated with this checkpoint
+      additionalProperties: false
+      required:
+        - identifier
+        - created_at
+        - epoch
+        - post_training_job_id
+        - path
+      title: Checkpoint
+      description: Checkpoint created during training runs.
+    PostTrainingJobArtifactsResponse:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+          description: Unique identifier for the training job
+        checkpoints:
+          type: array
+          items:
+            $ref: '#/components/schemas/Checkpoint'
+          description: >-
+            List of model checkpoints created during training
+      additionalProperties: false
+      required:
+        - job_uuid
+        - checkpoints
+      title: PostTrainingJobArtifactsResponse
+      description: Artifacts of a finetuning job.
+    PostTrainingMetric:
+      type: object
+      properties:
+        epoch:
+          type: integer
+          description: Training epoch number
+        train_loss:
+          type: number
+          description: Loss value on the training dataset
+        validation_loss:
+          type: number
+          description: Loss value on the validation dataset
+        perplexity:
+          type: number
+          description: >-
+            Perplexity metric indicating model confidence
+      additionalProperties: false
+      required:
+        - epoch
+        - train_loss
+        - validation_loss
+        - perplexity
+      title: PostTrainingMetric
+      description: >-
+        Training metrics captured during post-training jobs.
+    PostTrainingJobStatusResponse:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+          description: Unique identifier for the training job
+        status:
+          type: string
+          enum:
+            - completed
+            - in_progress
+            - failed
+            - scheduled
+            - cancelled
+          description: Current status of the training job
+        scheduled_at:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the job was scheduled
+        started_at:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the job execution began
+        completed_at:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the job finished, if completed
+        resources_allocated:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Information about computational resources allocated to the
+            job
+        checkpoints:
+          type: array
+          items:
+            $ref: '#/components/schemas/Checkpoint'
+          description: >-
+            List of model checkpoints created during training
+      additionalProperties: false
+      required:
+        - job_uuid
+        - status
+        - checkpoints
+      title: PostTrainingJobStatusResponse
+      description: Status of a finetuning job.
+    ListPostTrainingJobsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              job_uuid:
+                type: string
+            additionalProperties: false
+            required:
+              - job_uuid
+            title: PostTrainingJob
+      additionalProperties: false
+      required:
+        - data
+      title: ListPostTrainingJobsResponse
+    VectorDB:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: vector_db
+          default: vector_db
+          description: >-
+            Type of resource, always 'vector_db' for vector databases
+        embedding_model:
+          type: string
+          description: >-
+            Name of the embedding model to use for vector generation
+        embedding_dimension:
+          type: integer
+          description: Dimension of the embedding vectors
+        vector_db_name:
+          type: string
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - embedding_model
+        - embedding_dimension
+      title: VectorDB
+      description: >-
+        Vector database resource for storing and querying vector embeddings.
+    HealthInfo:
+      type: object
+      properties:
+        status:
+          type: string
+          enum:
+            - OK
+            - Error
+            - Not Implemented
+          description: Current health status of the service
+      additionalProperties: false
+      required:
+        - status
+      title: HealthInfo
+      description: >-
+        Health status information for the service.
+    RAGDocument:
+      type: object
+      properties:
+        document_id:
+          type: string
+          description: The unique identifier for the document.
+        content:
+          oneOf:
+            - type: string
+            - $ref: '#/components/schemas/InterleavedContentItem'
+            - type: array
+              items:
+                $ref: '#/components/schemas/InterleavedContentItem'
+            - $ref: '#/components/schemas/URL'
+          description: The content of the document.
+        mime_type:
+          type: string
+          description: The MIME type of the document.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Additional metadata for the document.
+      additionalProperties: false
+      required:
+        - document_id
+        - content
+        - metadata
+      title: RAGDocument
+      description: >-
+        A document to be used for document ingestion in the RAG Tool.
+    InsertRequest:
+      type: object
+      properties:
+        documents:
+          type: array
+          items:
+            $ref: '#/components/schemas/RAGDocument'
+          description: >-
+            List of documents to index in the RAG system
+        vector_db_id:
+          type: string
+          description: >-
+            ID of the vector database to store the document embeddings
+        chunk_size_in_tokens:
+          type: integer
+          description: >-
+            (Optional) Size in tokens for document chunking during indexing
+      additionalProperties: false
+      required:
+        - documents
+        - vector_db_id
+        - chunk_size_in_tokens
+      title: InsertRequest
+    Chunk:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The content of the chunk, which can be interleaved text, images, or other
+            types.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Metadata associated with the chunk that will be used in the model context
+            during inference.
+        embedding:
+          type: array
+          items:
+            type: number
+          description: >-
+            Optional embedding for the chunk. If not provided, it will be computed
+            later.
+        stored_chunk_id:
+          type: string
+          description: >-
+            The chunk ID that is stored in the vector database. Used for backend functionality.
+        chunk_metadata:
+          $ref: '#/components/schemas/ChunkMetadata'
+          description: >-
+            Metadata for the chunk that will NOT be used in the context during inference.
+            The `chunk_metadata` is required backend functionality.
+      additionalProperties: false
+      required:
+        - content
+        - metadata
+      title: Chunk
+      description: >-
+        A chunk of content that can be inserted into a vector database.
+    ChunkMetadata:
+      type: object
+      properties:
+        chunk_id:
+          type: string
+          description: >-
+            The ID of the chunk. If not set, it will be generated based on the document
+            ID and content.
+        document_id:
+          type: string
+          description: >-
+            The ID of the document this chunk belongs to.
+        source:
+          type: string
+          description: >-
+            The source of the content, such as a URL, file path, or other identifier.
+        created_timestamp:
+          type: integer
+          description: >-
+            An optional timestamp indicating when the chunk was created.
+        updated_timestamp:
+          type: integer
+          description: >-
+            An optional timestamp indicating when the chunk was last updated.
+        chunk_window:
+          type: string
+          description: >-
+            The window of the chunk, which can be used to group related chunks together.
+        chunk_tokenizer:
+          type: string
+          description: >-
+            The tokenizer used to create the chunk. Default is Tiktoken.
+        chunk_embedding_model:
+          type: string
+          description: >-
+            The embedding model used to create the chunk's embedding.
+        chunk_embedding_dimension:
+          type: integer
+          description: >-
+            The dimension of the embedding vector for the chunk.
+        content_token_count:
+          type: integer
+          description: >-
+            The number of tokens in the content of the chunk.
+        metadata_token_count:
+          type: integer
+          description: >-
+            The number of tokens in the metadata of the chunk.
+      additionalProperties: false
+      title: ChunkMetadata
+      description: >-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
+        information about the chunk that     will not be used in the context during
+        inference, but is required for backend functionality. The `ChunkMetadata`     is
+        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
+        expected to change after.     Use `Chunk.metadata` for metadata that will
+        be used in the context during inference.
+    InsertChunksRequest:
+      type: object
+      properties:
+        vector_db_id:
+          type: string
+          description: >-
+            The identifier of the vector database to insert the chunks into.
+        chunks:
+          type: array
+          items:
+            $ref: '#/components/schemas/Chunk'
+          description: >-
+            The chunks to insert. Each `Chunk` should contain content which can be
+            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
+            and `embedding`: `List[float]` are optional. If `metadata` is provided,
+            you configure how Llama Stack formats the chunk during generation. If
+            `embedding` is not provided, it will be computed later.
+        ttl_seconds:
+          type: integer
+          description: The time to live of the chunks.
+      additionalProperties: false
+      required:
+        - vector_db_id
+        - chunks
+      title: InsertChunksRequest
+    ProviderInfo:
+      type: object
+      properties:
+        api:
+          type: string
+          description: The API name this provider implements
+        provider_id:
+          type: string
+          description: Unique identifier for the provider
+        provider_type:
+          type: string
+          description: The type of provider implementation
+        config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Configuration parameters for the provider
+        health:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Current health status of the provider
+      additionalProperties: false
+      required:
+        - api
+        - provider_id
+        - provider_type
+        - config
+        - health
+      title: ProviderInfo
+      description: >-
+        Information about a registered provider including its configuration and health
+        status.
+    InvokeToolRequest:
+      type: object
+      properties:
+        tool_name:
+          type: string
+          description: The name of the tool to invoke.
+        kwargs:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool.
+      additionalProperties: false
+      required:
+        - tool_name
+        - kwargs
+      title: InvokeToolRequest
+    ToolInvocationResult:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            (Optional) The output content from the tool execution
+        error_message:
+          type: string
+          description: >-
+            (Optional) Error message if the tool execution failed
+        error_code:
+          type: integer
+          description: >-
+            (Optional) Numeric error code if the tool execution failed
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Additional metadata about the tool execution
+      additionalProperties: false
+      title: ToolInvocationResult
+      description: Result of a tool invocation.
+    PaginatedResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: The list of items for the current page
+        has_more:
+          type: boolean
+          description: >-
+            Whether there are more items available after this set
+        url:
+          type: string
+          description: The URL for accessing this list
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+      title: PaginatedResponse
+      description: >-
+        A generic paginated response that follows a simple format.
+    Job:
+      type: object
+      properties:
+        job_id:
+          type: string
+          description: Unique identifier for the job
+        status:
+          type: string
+          enum:
+            - completed
+            - in_progress
+            - failed
+            - scheduled
+            - cancelled
+          description: Current execution status of the job
+      additionalProperties: false
+      required:
+        - job_id
+        - status
+      title: Job
+      description: >-
+        A job execution instance with status tracking.
+    ListBenchmarksResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Benchmark'
+      additionalProperties: false
+      required:
+        - data
+      title: ListBenchmarksResponse
+    Order:
+      type: string
+      enum:
+        - asc
+        - desc
+      title: Order
+      description: Sort order for paginated responses.
+    ListOpenAIChatCompletionResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              id:
+                type: string
+                description: The ID of the chat completion
+              choices:
+                type: array
+                items:
+                  $ref: '#/components/schemas/OpenAIChoice'
+                description: List of choices
+              object:
+                type: string
+                const: chat.completion
+                default: chat.completion
+                description: >-
+                  The object type, which will be "chat.completion"
+              created:
+                type: integer
+                description: >-
+                  The Unix timestamp in seconds when the chat completion was created
+              model:
+                type: string
+                description: >-
+                  The model that was used to generate the chat completion
+              input_messages:
+                type: array
+                items:
+                  $ref: '#/components/schemas/OpenAIMessageParam'
+            additionalProperties: false
+            required:
+              - id
+              - choices
+              - object
+              - created
+              - model
+              - input_messages
+            title: OpenAICompletionWithInputMessages
+          description: >-
+            List of chat completion objects with their input messages
+        has_more:
+          type: boolean
+          description: >-
+            Whether there are more completions available beyond this list
+        first_id:
+          type: string
+          description: ID of the first completion in this list
+        last_id:
+          type: string
+          description: ID of the last completion in this list
+        object:
+          type: string
+          const: list
+          default: list
+          description: >-
+            Must be "list" to identify this as a list response
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+        - first_id
+        - last_id
+        - object
+      title: ListOpenAIChatCompletionResponse
+      description: >-
+        Response from listing OpenAI-compatible chat completions.
+    ListDatasetsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Dataset'
+          description: List of datasets
+      additionalProperties: false
+      required:
+        - data
+      title: ListDatasetsResponse
+      description: Response from listing datasets.
+    ListModelsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Model'
+      additionalProperties: false
+      required:
+        - data
+      title: ListModelsResponse
+    ListOpenAIResponseInputItem:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+          description: List of input items
+        object:
+          type: string
+          const: list
+          default: list
+          description: Object type identifier, always "list"
+      additionalProperties: false
+      required:
+        - data
+        - object
+      title: ListOpenAIResponseInputItem
+      description: >-
+        List container for OpenAI response input items.
+    ListOpenAIResponseObject:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
+          description: >-
+            List of response objects with their input context
+        has_more:
+          type: boolean
+          description: >-
+            Whether there are more results available beyond this page
+        first_id:
+          type: string
+          description: >-
+            Identifier of the first item in this page
+        last_id:
+          type: string
+          description: Identifier of the last item in this page
+        object:
+          type: string
+          const: list
+          default: list
+          description: Object type identifier, always "list"
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+        - first_id
+        - last_id
+        - object
+      title: ListOpenAIResponseObject
+      description: >-
+        Paginated list of OpenAI response objects with navigation metadata.
+    OpenAIResponseObjectWithInput:
+      type: object
+      properties:
+        created_at:
+          type: integer
+          description: >-
+            Unix timestamp when the response was created
+        error:
+          $ref: '#/components/schemas/OpenAIResponseError'
+          description: >-
+            (Optional) Error details if the response generation failed
+        id:
+          type: string
+          description: Unique identifier for this response
+        model:
+          type: string
+          description: Model identifier used for generation
+        object:
+          type: string
+          const: response
+          default: response
+          description: >-
+            Object type identifier, always "response"
+        output:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseOutput'
+          description: >-
+            List of generated output items (messages, tool calls, etc.)
+        parallel_tool_calls:
+          type: boolean
+          default: false
+          description: >-
+            Whether tool calls can be executed in parallel
+        previous_response_id:
+          type: string
+          description: >-
+            (Optional) ID of the previous response in a conversation
+        status:
+          type: string
+          description: >-
+            Current status of the response generation
+        temperature:
+          type: number
+          description: >-
+            (Optional) Sampling temperature used for generation
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
+          description: >-
+            Text formatting configuration for the response
+        top_p:
+          type: number
+          description: >-
+            (Optional) Nucleus sampling parameter used for generation
+        truncation:
+          type: string
+          description: >-
+            (Optional) Truncation strategy applied to the response
+        user:
+          type: string
+          description: >-
+            (Optional) User identifier associated with the request
+        input:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            List of input items that led to this response
+      additionalProperties: false
+      required:
+        - created_at
+        - id
+        - model
+        - object
+        - output
+        - parallel_tool_calls
+        - status
+        - text
+        - input
+      title: OpenAIResponseObjectWithInput
+      description: >-
+        OpenAI response object extended with input context information.
+    ListPromptsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Prompt'
+      additionalProperties: false
+      required:
+        - data
+      title: ListPromptsResponse
+      description: Response model to list prompts.
+    ListProvidersResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ProviderInfo'
+          description: List of provider information objects
+      additionalProperties: false
+      required:
+        - data
+      title: ListProvidersResponse
+      description: >-
+        Response containing a list of all available providers.
+    RouteInfo:
+      type: object
+      properties:
+        route:
+          type: string
+          description: The API endpoint path
+        method:
+          type: string
+          description: HTTP method for the route
+        provider_types:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of provider types that implement this route
+      additionalProperties: false
+      required:
+        - route
+        - method
+        - provider_types
+      title: RouteInfo
+      description: >-
+        Information about an API route including its path, method, and implementing
+        providers.
+    ListRoutesResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/RouteInfo'
+          description: >-
+            List of available route information objects
+      additionalProperties: false
+      required:
+        - data
+      title: ListRoutesResponse
+      description: >-
+        Response containing a list of all available API routes.
+    ListToolDefsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolDef'
+          description: List of tool definitions
+      additionalProperties: false
+      required:
+        - data
+      title: ListToolDefsResponse
+      description: >-
+        Response containing a list of tool definitions.
+    ListScoringFunctionsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ScoringFn'
+      additionalProperties: false
+      required:
+        - data
+      title: ListScoringFunctionsResponse
+    ListShieldsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Shield'
+      additionalProperties: false
+      required:
+        - data
+      title: ListShieldsResponse
+    ListToolGroupsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolGroup'
+          description: List of tool groups
+      additionalProperties: false
+      required:
+        - data
+      title: ListToolGroupsResponse
+      description: >-
+        Response containing a list of tool groups.
+    ListToolsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Tool'
+          description: List of tools
+      additionalProperties: false
+      required:
+        - data
+      title: ListToolsResponse
+      description: Response containing a list of tools.
+    ListVectorDBsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/VectorDB'
+          description: List of vector databases
+      additionalProperties: false
+      required:
+        - data
+      title: ListVectorDBsResponse
+      description: Response from listing vector databases.
+    Event:
+      oneOf:
+        - $ref: '#/components/schemas/UnstructuredLogEvent'
+        - $ref: '#/components/schemas/MetricEvent'
+        - $ref: '#/components/schemas/StructuredLogEvent'
+      discriminator:
+        propertyName: type
+        mapping:
+          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
+          metric: '#/components/schemas/MetricEvent'
+          structured_log: '#/components/schemas/StructuredLogEvent'
+    EventType:
+      type: string
+      enum:
+        - unstructured_log
+        - structured_log
+        - metric
+      title: EventType
+      description: >-
+        The type of telemetry event being logged.
+    LogSeverity:
+      type: string
+      enum:
+        - verbose
+        - debug
+        - info
+        - warn
+        - error
+        - critical
+      title: LogSeverity
+      description: The severity level of a log message.
+    MetricEvent:
+      type: object
+      properties:
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this event belongs to
+        span_id:
+          type: string
+          description: >-
+            Unique identifier for the span this event belongs to
+        timestamp:
+          type: string
+          format: date-time
+          description: Timestamp when the event occurred
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the event
+        type:
+          $ref: '#/components/schemas/EventType'
+          const: metric
+          default: metric
+          description: Event type identifier set to METRIC
+        metric:
+          type: string
+          description: The name of the metric being measured
+        value:
+          oneOf:
+            - type: integer
+            - type: number
+          description: >-
+            The numeric value of the metric measurement
+        unit:
+          type: string
+          description: >-
+            The unit of measurement for the metric value
+      additionalProperties: false
+      required:
+        - trace_id
+        - span_id
+        - timestamp
+        - type
+        - metric
+        - value
+        - unit
+      title: MetricEvent
+      description: >-
+        A metric event containing a measured value.
+    SpanEndPayload:
+      type: object
+      properties:
+        type:
+          $ref: '#/components/schemas/StructuredLogType'
+          const: span_end
+          default: span_end
+          description: Payload type identifier set to SPAN_END
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+          description: >-
+            The final status of the span indicating success or failure
+      additionalProperties: false
+      required:
+        - type
+        - status
+      title: SpanEndPayload
+      description: Payload for a span end event.
+    SpanStartPayload:
+      type: object
+      properties:
+        type:
+          $ref: '#/components/schemas/StructuredLogType'
+          const: span_start
+          default: span_start
+          description: >-
+            Payload type identifier set to SPAN_START
+        name:
+          type: string
+          description: >-
+            Human-readable name describing the operation this span represents
+        parent_span_id:
+          type: string
+          description: >-
+            (Optional) Unique identifier for the parent span, if this is a child span
+      additionalProperties: false
+      required:
+        - type
+        - name
+      title: SpanStartPayload
+      description: Payload for a span start event.
+    StructuredLogEvent:
+      type: object
+      properties:
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this event belongs to
+        span_id:
+          type: string
+          description: >-
+            Unique identifier for the span this event belongs to
+        timestamp:
+          type: string
+          format: date-time
+          description: Timestamp when the event occurred
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the event
+        type:
+          $ref: '#/components/schemas/EventType'
+          const: structured_log
+          default: structured_log
+          description: >-
+            Event type identifier set to STRUCTURED_LOG
+        payload:
+          $ref: '#/components/schemas/StructuredLogPayload'
+          description: >-
+            The structured payload data for the log event
+      additionalProperties: false
+      required:
+        - trace_id
+        - span_id
+        - timestamp
+        - type
+        - payload
+      title: StructuredLogEvent
+      description: >-
+        A structured log event containing typed payload data.
+    StructuredLogPayload:
+      oneOf:
+        - $ref: '#/components/schemas/SpanStartPayload'
+        - $ref: '#/components/schemas/SpanEndPayload'
+      discriminator:
+        propertyName: type
+        mapping:
+          span_start: '#/components/schemas/SpanStartPayload'
+          span_end: '#/components/schemas/SpanEndPayload'
+    StructuredLogType:
+      type: string
+      enum:
+        - span_start
+        - span_end
+      title: StructuredLogType
+      description: >-
+        The type of structured log event payload.
+    UnstructuredLogEvent:
+      type: object
+      properties:
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this event belongs to
+        span_id:
+          type: string
+          description: >-
+            Unique identifier for the span this event belongs to
+        timestamp:
+          type: string
+          format: date-time
+          description: Timestamp when the event occurred
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the event
+        type:
+          $ref: '#/components/schemas/EventType'
+          const: unstructured_log
+          default: unstructured_log
+          description: >-
+            Event type identifier set to UNSTRUCTURED_LOG
+        message:
+          type: string
+          description: The log message text
+        severity:
+          $ref: '#/components/schemas/LogSeverity'
+          description: The severity level of the log message
+      additionalProperties: false
+      required:
+        - trace_id
+        - span_id
+        - timestamp
+        - type
+        - message
+        - severity
+      title: UnstructuredLogEvent
+      description: >-
+        An unstructured log event containing a simple text message.
+    LogEventRequest:
+      type: object
+      properties:
+        event:
+          $ref: '#/components/schemas/Event'
+          description: The event to log.
+        ttl_seconds:
+          type: integer
+          description: The time to live of the event.
+      additionalProperties: false
+      required:
+        - event
+        - ttl_seconds
+      title: LogEventRequest
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+          description: >-
+            Strategy type, always "auto" for automatic chunking
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+      description: >-
+        Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+          description: >-
+            Strategy type, always "static" for static chunking
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+          description: >-
+            Configuration parameters for the static chunking strategy
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+      description: >-
+        Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+          description: >-
+            Number of tokens to overlap between adjacent chunks
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+          description: >-
+            Maximum number of tokens per chunk, must be between 100 and 4096
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: >-
+        Configuration for static chunking strategy.
+    OpenaiAttachFileToVectorStoreRequest:
+      type: object
+      properties:
+        file_id:
+          type: string
+          description: >-
+            The ID of the file to attach to the vector store.
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The key-value attributes stored with the file, which can be used for filtering.
+        chunking_strategy:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
+          description: >-
+            The chunking strategy to use for the file.
+      additionalProperties: false
+      required:
+        - file_id
+      title: OpenaiAttachFileToVectorStoreRequest
+    VectorStoreFileLastError:
+      type: object
+      properties:
+        code:
+          oneOf:
+            - type: string
+              const: server_error
+            - type: string
+              const: rate_limit_exceeded
+          description: >-
+            Error code indicating the type of failure
+        message:
+          type: string
+          description: >-
+            Human-readable error message describing the failure
+      additionalProperties: false
+      required:
+        - code
+        - message
+      title: VectorStoreFileLastError
+      description: >-
+        Error information for failed vector store file processing.
+    VectorStoreFileObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: Unique identifier for the file
+        object:
+          type: string
+          default: vector_store.file
+          description: >-
+            Object type identifier, always "vector_store.file"
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Key-value attributes associated with the file
+        chunking_strategy:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
+          description: >-
+            Strategy used for splitting the file into chunks
+        created_at:
+          type: integer
+          description: >-
+            Timestamp when the file was added to the vector store
+        last_error:
+          $ref: '#/components/schemas/VectorStoreFileLastError'
+          description: >-
+            (Optional) Error information if file processing failed
+        status:
+          $ref: '#/components/schemas/VectorStoreFileStatus'
+          description: Current processing status of the file
+        usage_bytes:
+          type: integer
+          default: 0
+          description: Storage space used by this file in bytes
+        vector_store_id:
+          type: string
+          description: >-
+            ID of the vector store containing this file
+      additionalProperties: false
+      required:
+        - id
+        - object
+        - attributes
+        - chunking_strategy
+        - created_at
+        - status
+        - usage_bytes
+        - vector_store_id
+      title: VectorStoreFileObject
+      description: OpenAI Vector Store File object.
+    VectorStoreFileStatus:
+      oneOf:
+        - type: string
+          const: completed
+        - type: string
+          const: in_progress
+        - type: string
+          const: cancelled
+        - type: string
+          const: failed
+>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
     OpenAIJSONSchema:
       type: object
       properties:
@@ -9582,6 +11706,1606 @@ components:
       title: VectorStoreSearchResponsePage
       description: >-
         Paginated response from searching a vector store.
+<<<<<<< HEAD
+=======
+    OpenaiUpdateVectorStoreRequest:
+      type: object
+      properties:
+        name:
+          type: string
+          description: The name of the vector store.
+        expires_after:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The expiration policy for a vector store.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Set of 16 key-value pairs that can be attached to an object.
+      additionalProperties: false
+      title: OpenaiUpdateVectorStoreRequest
+    OpenaiUpdateVectorStoreFileRequest:
+      type: object
+      properties:
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The updated key-value attributes to store with the file.
+      additionalProperties: false
+      required:
+        - attributes
+      title: OpenaiUpdateVectorStoreFileRequest
+    DPOAlignmentConfig:
+      type: object
+      properties:
+        beta:
+          type: number
+          description: Temperature parameter for the DPO loss
+        loss_type:
+          $ref: '#/components/schemas/DPOLossType'
+          default: sigmoid
+          description: The type of loss function to use for DPO
+      additionalProperties: false
+      required:
+        - beta
+        - loss_type
+      title: DPOAlignmentConfig
+      description: >-
+        Configuration for Direct Preference Optimization (DPO) alignment.
+    DPOLossType:
+      type: string
+      enum:
+        - sigmoid
+        - hinge
+        - ipo
+        - kto_pair
+      title: DPOLossType
+    DataConfig:
+      type: object
+      properties:
+        dataset_id:
+          type: string
+          description: >-
+            Unique identifier for the training dataset
+        batch_size:
+          type: integer
+          description: Number of samples per training batch
+        shuffle:
+          type: boolean
+          description: >-
+            Whether to shuffle the dataset during training
+        data_format:
+          $ref: '#/components/schemas/DatasetFormat'
+          description: >-
+            Format of the dataset (instruct or dialog)
+        validation_dataset_id:
+          type: string
+          description: >-
+            (Optional) Unique identifier for the validation dataset
+        packed:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to pack multiple samples into a single sequence for
+            efficiency
+        train_on_input:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to compute loss on input tokens as well as output tokens
+      additionalProperties: false
+      required:
+        - dataset_id
+        - batch_size
+        - shuffle
+        - data_format
+      title: DataConfig
+      description: >-
+        Configuration for training data and data loading.
+    DatasetFormat:
+      type: string
+      enum:
+        - instruct
+        - dialog
+      title: DatasetFormat
+      description: Format of the training dataset.
+    EfficiencyConfig:
+      type: object
+      properties:
+        enable_activation_checkpointing:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to use activation checkpointing to reduce memory usage
+        enable_activation_offloading:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to offload activations to CPU to save GPU memory
+        memory_efficient_fsdp_wrap:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to use memory-efficient FSDP wrapping
+        fsdp_cpu_offload:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to offload FSDP parameters to CPU
+      additionalProperties: false
+      title: EfficiencyConfig
+      description: >-
+        Configuration for memory and compute efficiency optimizations.
+    OptimizerConfig:
+      type: object
+      properties:
+        optimizer_type:
+          $ref: '#/components/schemas/OptimizerType'
+          description: >-
+            Type of optimizer to use (adam, adamw, or sgd)
+        lr:
+          type: number
+          description: Learning rate for the optimizer
+        weight_decay:
+          type: number
+          description: >-
+            Weight decay coefficient for regularization
+        num_warmup_steps:
+          type: integer
+          description: Number of steps for learning rate warmup
+      additionalProperties: false
+      required:
+        - optimizer_type
+        - lr
+        - weight_decay
+        - num_warmup_steps
+      title: OptimizerConfig
+      description: >-
+        Configuration parameters for the optimization algorithm.
+    OptimizerType:
+      type: string
+      enum:
+        - adam
+        - adamw
+        - sgd
+      title: OptimizerType
+      description: >-
+        Available optimizer algorithms for training.
+    TrainingConfig:
+      type: object
+      properties:
+        n_epochs:
+          type: integer
+          description: Number of training epochs to run
+        max_steps_per_epoch:
+          type: integer
+          default: 1
+          description: Maximum number of steps to run per epoch
+        gradient_accumulation_steps:
+          type: integer
+          default: 1
+          description: >-
+            Number of steps to accumulate gradients before updating
+        max_validation_steps:
+          type: integer
+          default: 1
+          description: >-
+            (Optional) Maximum number of validation steps per epoch
+        data_config:
+          $ref: '#/components/schemas/DataConfig'
+          description: >-
+            (Optional) Configuration for data loading and formatting
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+          description: >-
+            (Optional) Configuration for the optimization algorithm
+        efficiency_config:
+          $ref: '#/components/schemas/EfficiencyConfig'
+          description: >-
+            (Optional) Configuration for memory and compute optimizations
+        dtype:
+          type: string
+          default: bf16
+          description: >-
+            (Optional) Data type for model parameters (bf16, fp16, fp32)
+      additionalProperties: false
+      required:
+        - n_epochs
+        - max_steps_per_epoch
+        - gradient_accumulation_steps
+      title: TrainingConfig
+      description: >-
+        Comprehensive configuration for the training process.
+    PreferenceOptimizeRequest:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+          description: The UUID of the job to create.
+        finetuned_model:
+          type: string
+          description: The model to fine-tune.
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+          description: The algorithm configuration.
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+          description: The training configuration.
+        hyperparam_search_config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The hyperparam search configuration.
+        logger_config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The logger configuration.
+      additionalProperties: false
+      required:
+        - job_uuid
+        - finetuned_model
+        - algorithm_config
+        - training_config
+        - hyperparam_search_config
+        - logger_config
+      title: PreferenceOptimizeRequest
+    PostTrainingJob:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+      additionalProperties: false
+      required:
+        - job_uuid
+      title: PostTrainingJob
+    DefaultRAGQueryGeneratorConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: default
+          default: default
+          description: >-
+            Type of query generator, always 'default'
+        separator:
+          type: string
+          default: ' '
+          description: >-
+            String separator used to join query terms
+      additionalProperties: false
+      required:
+        - type
+        - separator
+      title: DefaultRAGQueryGeneratorConfig
+      description: >-
+        Configuration for the default RAG query generator.
+    LLMRAGQueryGeneratorConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: llm
+          default: llm
+          description: Type of query generator, always 'llm'
+        model:
+          type: string
+          description: >-
+            Name of the language model to use for query generation
+        template:
+          type: string
+          description: >-
+            Template string for formatting the query generation prompt
+      additionalProperties: false
+      required:
+        - type
+        - model
+        - template
+      title: LLMRAGQueryGeneratorConfig
+      description: >-
+        Configuration for the LLM-based RAG query generator.
+    RAGQueryConfig:
+      type: object
+      properties:
+        query_generator_config:
+          $ref: '#/components/schemas/RAGQueryGeneratorConfig'
+          description: Configuration for the query generator.
+        max_tokens_in_context:
+          type: integer
+          default: 4096
+          description: Maximum number of tokens in the context.
+        max_chunks:
+          type: integer
+          default: 5
+          description: Maximum number of chunks to retrieve.
+        chunk_template:
+          type: string
+          default: >
+            Result {index}
+
+            Content: {chunk.content}
+
+            Metadata: {metadata}
+          description: >-
+            Template for formatting each retrieved chunk in the context. Available
+            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
+            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
+            {chunk.content}\nMetadata: {metadata}\n"
+        mode:
+          $ref: '#/components/schemas/RAGSearchMode'
+          default: vector
+          description: >-
+            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
+            "vector".
+        ranker:
+          $ref: '#/components/schemas/Ranker'
+          description: >-
+            Configuration for the ranker to use in hybrid search. Defaults to RRF
+            ranker.
+      additionalProperties: false
+      required:
+        - query_generator_config
+        - max_tokens_in_context
+        - max_chunks
+        - chunk_template
+      title: RAGQueryConfig
+      description: >-
+        Configuration for the RAG query generation.
+    RAGQueryGeneratorConfig:
+      oneOf:
+        - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+        - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+      discriminator:
+        propertyName: type
+        mapping:
+          default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+          llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+    RAGSearchMode:
+      type: string
+      enum:
+        - vector
+        - keyword
+        - hybrid
+      title: RAGSearchMode
+      description: >-
+        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
+        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
+        - HYBRID: Combines both vector and keyword search for better results
+    RRFRanker:
+      type: object
+      properties:
+        type:
+          type: string
+          const: rrf
+          default: rrf
+          description: The type of ranker, always "rrf"
+        impact_factor:
+          type: number
+          default: 60.0
+          description: >-
+            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
+            results. Must be greater than 0
+      additionalProperties: false
+      required:
+        - type
+        - impact_factor
+      title: RRFRanker
+      description: >-
+        Reciprocal Rank Fusion (RRF) ranker configuration.
+    Ranker:
+      oneOf:
+        - $ref: '#/components/schemas/RRFRanker'
+        - $ref: '#/components/schemas/WeightedRanker'
+      discriminator:
+        propertyName: type
+        mapping:
+          rrf: '#/components/schemas/RRFRanker'
+          weighted: '#/components/schemas/WeightedRanker'
+    WeightedRanker:
+      type: object
+      properties:
+        type:
+          type: string
+          const: weighted
+          default: weighted
+          description: The type of ranker, always "weighted"
+        alpha:
+          type: number
+          default: 0.5
+          description: >-
+            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
+            only use vector scores, values in between blend both scores.
+      additionalProperties: false
+      required:
+        - type
+        - alpha
+      title: WeightedRanker
+      description: >-
+        Weighted ranker configuration that combines vector and keyword scores.
+    QueryRequest:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The query content to search for in the indexed documents
+        vector_db_ids:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of vector database IDs to search within
+        query_config:
+          $ref: '#/components/schemas/RAGQueryConfig'
+          description: >-
+            (Optional) Configuration parameters for the query operation
+      additionalProperties: false
+      required:
+        - content
+        - vector_db_ids
+      title: QueryRequest
+    RAGQueryResult:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            (Optional) The retrieved content from the query
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Additional metadata about the query result
+      additionalProperties: false
+      required:
+        - metadata
+      title: RAGQueryResult
+      description: >-
+        Result of a RAG query containing retrieved content and metadata.
+    QueryChunksRequest:
+      type: object
+      properties:
+        vector_db_id:
+          type: string
+          description: >-
+            The identifier of the vector database to query.
+        query:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: The query to search for.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the query.
+      additionalProperties: false
+      required:
+        - vector_db_id
+        - query
+      title: QueryChunksRequest
+    QueryChunksResponse:
+      type: object
+      properties:
+        chunks:
+          type: array
+          items:
+            $ref: '#/components/schemas/Chunk'
+          description: >-
+            List of content chunks returned from the query
+        scores:
+          type: array
+          items:
+            type: number
+          description: >-
+            Relevance scores corresponding to each returned chunk
+      additionalProperties: false
+      required:
+        - chunks
+        - scores
+      title: QueryChunksResponse
+      description: >-
+        Response from querying chunks in a vector database.
+    QueryMetricsRequest:
+      type: object
+      properties:
+        start_time:
+          type: integer
+          description: The start time of the metric to query.
+        end_time:
+          type: integer
+          description: The end time of the metric to query.
+        granularity:
+          type: string
+          description: The granularity of the metric to query.
+        query_type:
+          type: string
+          enum:
+            - range
+            - instant
+          description: The type of query to perform.
+        label_matchers:
+          type: array
+          items:
+            type: object
+            properties:
+              name:
+                type: string
+                description: The name of the label to match
+              value:
+                type: string
+                description: The value to match against
+              operator:
+                type: string
+                enum:
+                  - '='
+                  - '!='
+                  - =~
+                  - '!~'
+                description: >-
+                  The comparison operator to use for matching
+                default: '='
+            additionalProperties: false
+            required:
+              - name
+              - value
+              - operator
+            title: MetricLabelMatcher
+            description: >-
+              A matcher for filtering metrics by label values.
+          description: >-
+            The label matchers to apply to the metric.
+      additionalProperties: false
+      required:
+        - start_time
+        - query_type
+      title: QueryMetricsRequest
+    MetricDataPoint:
+      type: object
+      properties:
+        timestamp:
+          type: integer
+          description: >-
+            Unix timestamp when the metric value was recorded
+        value:
+          type: number
+          description: >-
+            The numeric value of the metric at this timestamp
+        unit:
+          type: string
+      additionalProperties: false
+      required:
+        - timestamp
+        - value
+        - unit
+      title: MetricDataPoint
+      description: >-
+        A single data point in a metric time series.
+    MetricLabel:
+      type: object
+      properties:
+        name:
+          type: string
+          description: The name of the label
+        value:
+          type: string
+          description: The value of the label
+      additionalProperties: false
+      required:
+        - name
+        - value
+      title: MetricLabel
+      description: A label associated with a metric.
+    MetricSeries:
+      type: object
+      properties:
+        metric:
+          type: string
+          description: The name of the metric
+        labels:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricLabel'
+          description: >-
+            List of labels associated with this metric series
+        values:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricDataPoint'
+          description: >-
+            List of data points in chronological order
+      additionalProperties: false
+      required:
+        - metric
+        - labels
+        - values
+      title: MetricSeries
+      description: A time series of metric data points.
+    QueryMetricsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricSeries'
+          description: >-
+            List of metric series matching the query criteria
+      additionalProperties: false
+      required:
+        - data
+      title: QueryMetricsResponse
+      description: >-
+        Response containing metric time series data.
+    QueryCondition:
+      type: object
+      properties:
+        key:
+          type: string
+          description: The attribute key to filter on
+        op:
+          $ref: '#/components/schemas/QueryConditionOp'
+          description: The comparison operator to apply
+        value:
+          oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          description: The value to compare against
+      additionalProperties: false
+      required:
+        - key
+        - op
+        - value
+      title: QueryCondition
+      description: A condition for filtering query results.
+    QueryConditionOp:
+      type: string
+      enum:
+        - eq
+        - ne
+        - gt
+        - lt
+      title: QueryConditionOp
+      description: >-
+        Comparison operators for query conditions.
+    QuerySpansRequest:
+      type: object
+      properties:
+        attribute_filters:
+          type: array
+          items:
+            $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the spans.
+        attributes_to_return:
+          type: array
+          items:
+            type: string
+          description: The attributes to return in the spans.
+        max_depth:
+          type: integer
+          description: The maximum depth of the tree.
+      additionalProperties: false
+      required:
+        - attribute_filters
+        - attributes_to_return
+      title: QuerySpansRequest
+    QuerySpansResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Span'
+          description: >-
+            List of spans matching the query criteria
+      additionalProperties: false
+      required:
+        - data
+      title: QuerySpansResponse
+      description: Response containing a list of spans.
+    QueryTracesRequest:
+      type: object
+      properties:
+        attribute_filters:
+          type: array
+          items:
+            $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the traces.
+        limit:
+          type: integer
+          description: The limit of traces to return.
+        offset:
+          type: integer
+          description: The offset of the traces to return.
+        order_by:
+          type: array
+          items:
+            type: string
+          description: The order by of the traces to return.
+      additionalProperties: false
+      title: QueryTracesRequest
+    QueryTracesResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Trace'
+          description: >-
+            List of traces matching the query criteria
+      additionalProperties: false
+      required:
+        - data
+      title: QueryTracesResponse
+      description: Response containing a list of traces.
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    RegisterModelRequest:
+      type: object
+      properties:
+        model_id:
+          type: string
+          description: The identifier of the model to register.
+        provider_model_id:
+          type: string
+          description: >-
+            The identifier of the model in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model.
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
+      additionalProperties: false
+      required:
+        - model_id
+      title: RegisterModelRequest
+    RegisterScoringFunctionRequest:
+      type: object
+      properties:
+        scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the scoring function to register.
+        description:
+          type: string
+          description: The description of the scoring function.
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
+        provider_scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
+      additionalProperties: false
+      required:
+        - scoring_fn_id
+        - description
+        - return_type
+      title: RegisterScoringFunctionRequest
+    RegisterShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: >-
+            The identifier of the shield to register.
+        provider_shield_id:
+          type: string
+          description: >-
+            The identifier of the shield in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+      title: RegisterShieldRequest
+    RegisterToolGroupRequest:
+      type: object
+      properties:
+        toolgroup_id:
+          type: string
+          description: The ID of the tool group to register.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the tool group.
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
+      additionalProperties: false
+      required:
+        - toolgroup_id
+        - provider_id
+      title: RegisterToolGroupRequest
+    RegisterVectorDbRequest:
+      type: object
+      properties:
+        vector_db_id:
+          type: string
+          description: >-
+            The identifier of the vector database to register.
+        embedding_model:
+          type: string
+          description: The embedding model to use.
+        embedding_dimension:
+          type: integer
+          description: The dimension of the embedding model.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        vector_db_name:
+          type: string
+          description: The name of the vector database.
+        provider_vector_db_id:
+          type: string
+          description: >-
+            The identifier of the vector database in the provider.
+      additionalProperties: false
+      required:
+        - vector_db_id
+        - embedding_model
+      title: RegisterVectorDbRequest
+    RerankRequest:
+      type: object
+      properties:
+        model:
+          type: string
+          description: >-
+            The identifier of the reranking model to use. The model must be a reranking
+            model registered with Llama Stack and available via the /models endpoint.
+        query:
+          oneOf:
+            - type: string
+            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          description: >-
+            The search query to rank items against. Can be a string, text content
+            part, or image content part. The input must not exceed the model's max
+            input token length.
+        items:
+          type: array
+          items:
+            oneOf:
+              - type: string
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          description: >-
+            List of items to rerank. Each item can be a string, text content part,
+            or image content part. Each input must not exceed the model's max input
+            token length.
+        max_num_results:
+          type: integer
+          description: >-
+            (Optional) Maximum number of results to return. Default: returns all.
+      additionalProperties: false
+      required:
+        - model
+        - query
+        - items
+      title: RerankRequest
+    RerankData:
+      type: object
+      properties:
+        index:
+          type: integer
+          description: >-
+            The original index of the document in the input list
+        relevance_score:
+          type: number
+          description: >-
+            The relevance score from the model output. Values are inverted when applicable
+            so that higher scores indicate greater relevance.
+      additionalProperties: false
+      required:
+        - index
+        - relevance_score
+      title: RerankData
+      description: >-
+        A single rerank result from a reranking response.
+    RerankResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/RerankData'
+          description: >-
+            List of rerank result objects, sorted by relevance score (descending)
+      additionalProperties: false
+      required:
+        - data
+      title: RerankResponse
+      description: Response from a reranking request.
+    ResumeAgentTurnRequest:
+      type: object
+      properties:
+        tool_responses:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolResponse'
+          description: >-
+            The tool call responses to resume the turn with.
+        stream:
+          type: boolean
+          description: Whether to stream the response.
+      additionalProperties: false
+      required:
+        - tool_responses
+      title: ResumeAgentTurnRequest
+    RunEvalRequest:
+      type: object
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+          description: The configuration for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_config
+      title: RunEvalRequest
+    RunModerationRequest:
+      type: object
+      properties:
+        input:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+          description: >-
+            Input (or inputs) to classify. Can be a single string, an array of strings,
+            or an array of multi-modal input objects similar to other models.
+        model:
+          type: string
+          description: >-
+            The content moderation model you would like to use.
+      additionalProperties: false
+      required:
+        - input
+        - model
+      title: RunModerationRequest
+    ModerationObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: >-
+            The unique identifier for the moderation request.
+        model:
+          type: string
+          description: >-
+            The model used to generate the moderation results.
+        results:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModerationObjectResults'
+          description: A list of moderation objects
+      additionalProperties: false
+      required:
+        - id
+        - model
+        - results
+      title: ModerationObject
+      description: A moderation object.
+    ModerationObjectResults:
+      type: object
+      properties:
+        flagged:
+          type: boolean
+          description: >-
+            Whether any of the below categories are flagged.
+        categories:
+          type: object
+          additionalProperties:
+            type: boolean
+          description: >-
+            A list of the categories, and whether they are flagged or not.
+        category_applied_input_types:
+          type: object
+          additionalProperties:
+            type: array
+            items:
+              type: string
+          description: >-
+            A list of the categories along with the input type(s) that the score applies
+            to.
+        category_scores:
+          type: object
+          additionalProperties:
+            type: number
+          description: >-
+            A list of the categories along with their scores as predicted by model.
+        user_message:
+          type: string
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+      additionalProperties: false
+      required:
+        - flagged
+        - metadata
+      title: ModerationObjectResults
+      description: A moderation object.
+    RunShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: The identifier of the shield to run.
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/Message'
+          description: The messages to run the shield on.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+        - messages
+        - params
+      title: RunShieldRequest
+    RunShieldResponse:
+      type: object
+      properties:
+        violation:
+          $ref: '#/components/schemas/SafetyViolation'
+          description: >-
+            (Optional) Safety violation detected by the shield, if any
+      additionalProperties: false
+      title: RunShieldResponse
+      description: Response from running a safety shield.
+    SaveSpansToDatasetRequest:
+      type: object
+      properties:
+        attribute_filters:
+          type: array
+          items:
+            $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the spans.
+        attributes_to_save:
+          type: array
+          items:
+            type: string
+          description: The attributes to save to the dataset.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to save the spans to.
+        max_depth:
+          type: integer
+          description: The maximum depth of the tree.
+      additionalProperties: false
+      required:
+        - attribute_filters
+        - attributes_to_save
+        - dataset_id
+      title: SaveSpansToDatasetRequest
+    ScoreRequest:
+      type: object
+      properties:
+        input_rows:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: The rows to score.
+        scoring_functions:
+          type: object
+          additionalProperties:
+            oneOf:
+              - $ref: '#/components/schemas/ScoringFnParams'
+              - type: 'null'
+          description: >-
+            The scoring functions to use for the scoring.
+      additionalProperties: false
+      required:
+        - input_rows
+        - scoring_functions
+      title: ScoreRequest
+    ScoreResponse:
+      type: object
+      properties:
+        results:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          description: >-
+            A map of scoring function name to ScoringResult.
+      additionalProperties: false
+      required:
+        - results
+      title: ScoreResponse
+      description: The response from scoring.
+    ScoreBatchRequest:
+      type: object
+      properties:
+        dataset_id:
+          type: string
+          description: The ID of the dataset to score.
+        scoring_functions:
+          type: object
+          additionalProperties:
+            oneOf:
+              - $ref: '#/components/schemas/ScoringFnParams'
+              - type: 'null'
+          description: >-
+            The scoring functions to use for the scoring.
+        save_results_dataset:
+          type: boolean
+          description: >-
+            Whether to save the results to a dataset.
+      additionalProperties: false
+      required:
+        - dataset_id
+        - scoring_functions
+        - save_results_dataset
+      title: ScoreBatchRequest
+    ScoreBatchResponse:
+      type: object
+      properties:
+        dataset_id:
+          type: string
+          description: >-
+            (Optional) The identifier of the dataset that was scored
+        results:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          description: >-
+            A map of scoring function name to ScoringResult
+      additionalProperties: false
+      required:
+        - results
+      title: ScoreBatchResponse
+      description: >-
+        Response from batch scoring operations on datasets.
+    SetDefaultVersionRequest:
+      type: object
+      properties:
+        version:
+          type: integer
+          description: The version to set as default.
+      additionalProperties: false
+      required:
+        - version
+      title: SetDefaultVersionRequest
+    AlgorithmConfig:
+      oneOf:
+        - $ref: '#/components/schemas/LoraFinetuningConfig'
+        - $ref: '#/components/schemas/QATFinetuningConfig'
+      discriminator:
+        propertyName: type
+        mapping:
+          LoRA: '#/components/schemas/LoraFinetuningConfig'
+          QAT: '#/components/schemas/QATFinetuningConfig'
+    LoraFinetuningConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: LoRA
+          default: LoRA
+          description: Algorithm type identifier, always "LoRA"
+        lora_attn_modules:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of attention module names to apply LoRA to
+        apply_lora_to_mlp:
+          type: boolean
+          description: Whether to apply LoRA to MLP layers
+        apply_lora_to_output:
+          type: boolean
+          description: >-
+            Whether to apply LoRA to output projection layers
+        rank:
+          type: integer
+          description: >-
+            Rank of the LoRA adaptation (lower rank = fewer parameters)
+        alpha:
+          type: integer
+          description: >-
+            LoRA scaling parameter that controls adaptation strength
+        use_dora:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
+        quantize_base:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to quantize the base model weights
+      additionalProperties: false
+      required:
+        - type
+        - lora_attn_modules
+        - apply_lora_to_mlp
+        - apply_lora_to_output
+        - rank
+        - alpha
+      title: LoraFinetuningConfig
+      description: >-
+        Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+    QATFinetuningConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: QAT
+          default: QAT
+          description: Algorithm type identifier, always "QAT"
+        quantizer_name:
+          type: string
+          description: >-
+            Name of the quantization algorithm to use
+        group_size:
+          type: integer
+          description: Size of groups for grouped quantization
+      additionalProperties: false
+      required:
+        - type
+        - quantizer_name
+        - group_size
+      title: QATFinetuningConfig
+      description: >-
+        Configuration for Quantization-Aware Training (QAT) fine-tuning.
+    SupervisedFineTuneRequest:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+          description: The UUID of the job to create.
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+          description: The training configuration.
+        hyperparam_search_config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The hyperparam search configuration.
+        logger_config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The logger configuration.
+        model:
+          type: string
+          description: The model to fine-tune.
+        checkpoint_dir:
+          type: string
+          description: The directory to save checkpoint(s) to.
+        algorithm_config:
+          $ref: '#/components/schemas/AlgorithmConfig'
+          description: The algorithm configuration.
+      additionalProperties: false
+      required:
+        - job_uuid
+        - training_config
+        - hyperparam_search_config
+        - logger_config
+      title: SupervisedFineTuneRequest
+    SyntheticDataGenerateRequest:
+      type: object
+      properties:
+        dialogs:
+          type: array
+          items:
+            $ref: '#/components/schemas/Message'
+          description: >-
+            List of conversation messages to use as input for synthetic data generation
+        filtering_function:
+          type: string
+          enum:
+            - none
+            - random
+            - top_k
+            - top_p
+            - top_k_top_p
+            - sigmoid
+          description: >-
+            Type of filtering to apply to generated synthetic data samples
+        model:
+          type: string
+          description: >-
+            (Optional) The identifier of the model to use. The model must be registered
+            with Llama Stack and available via the /models endpoint
+      additionalProperties: false
+      required:
+        - dialogs
+        - filtering_function
+      title: SyntheticDataGenerateRequest
+    SyntheticDataGenerationResponse:
+      type: object
+      properties:
+        synthetic_data:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: >-
+            List of generated synthetic data samples that passed the filtering criteria
+        statistics:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Statistical information about the generation process and filtering
+            results
+      additionalProperties: false
+      required:
+        - synthetic_data
+      title: SyntheticDataGenerationResponse
+      description: >-
+        Response from the synthetic data generation. Batch of (prompt, response, score)
+        tuples that pass the threshold.
+    UpdatePromptRequest:
+      type: object
+      properties:
+        prompt:
+          type: string
+          description: The updated prompt text content.
+        version:
+          type: integer
+          description: >-
+            The current version of the prompt being updated.
+        variables:
+          type: array
+          items:
+            type: string
+          description: >-
+            Updated list of variable names that can be used in the prompt template.
+        set_as_default:
+          type: boolean
+          description: >-
+            Set the new version as the default (default=True).
+      additionalProperties: false
+      required:
+        - prompt
+        - version
+        - set_as_default
+      title: UpdatePromptRequest
+>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
     VersionInfo:
       type: object
       properties:
diff --git a/example.py b/example.py
new file mode 100644
index 000000000..7e968e24a
--- /dev/null
+++ b/example.py
@@ -0,0 +1,257 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+os.environ["NVIDIA_API_KEY"] = "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"
+# Option 1: Use default NIM URL (will auto-switch to ai.api.nvidia.com for rerank)
+# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com"
+# Option 2: Use AI Foundation URL directly for rerank models
+# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com/v1"
+os.environ["NVIDIA_BASE_URL"] = "https://integrate.api.nvidia.com"
+
+import base64
+import io
+from PIL import Image
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+
+# # response = client.inference.completion(
+# #     model_id="meta/llama-3.1-8b-instruct",
+# #     content="Complete the sentence using one word: Roses are red, violets are :",
+# #     stream=False,
+# #     sampling_params={
+# #         "max_tokens": 50,
+# #     },
+# # )
+# # print(f"Response: {response.content}")
+
+
+# response = client.inference.chat_completion(
+#     model_id="nvidia/nvidia-nemotron-nano-9b-v2",
+#     messages=[
+#         {
+#             "role": "system",
+#             "content": "/think",
+#         },
+#         {
+#             "role": "user",
+#             "content": "How are you?",
+#         },
+#     ],
+#     stream=False,
+#     sampling_params={
+#         "max_tokens": 1024,
+#     },
+# )
+# print(f"Response: {response}")
+
+
+print(client.models.list())
+rerank_response = client.inference.rerank(
+    model="nvidia/llama-3.2-nv-rerankqa-1b-v2",
+    query="query",
+    items=[
+        "item_1",
+        "item_2",
+        "item_3",
+    ]
+)
+
+print(rerank_response)
+for i, result in enumerate(rerank_response):
+    print(f"{i+1}. [Index: {result.index}, "
+          f"Score: {(result.relevance_score):.3f}]")
+
+# # from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
+
+# # tool_definition = ToolDefinition(
+# #     tool_name="get_weather",
+# #     description="Get current weather information for a location",
+# #     parameters={
+# #         "location": ToolParamDefinition(
+# #             param_type="string",
+# #             description="The city and state, e.g. San Francisco, CA",
+# #             required=True
+# #         ),
+# #         "unit": ToolParamDefinition(
+# #             param_type="string",
+# #             description="Temperature unit (celsius or fahrenheit)",
+# #             required=False,
+# #             default="celsius"
+# #         )
+# #     }
+# # )
+
+# # # tool_response = client.inference.chat_completion(
+# # #     model_id="meta-llama/Llama-3.1-8B-Instruct",
+# # #     messages=[
+# # #         {"role": "user", "content": "What's the weather like in San Francisco?"}
+# # #     ],
+# # #     tools=[tool_definition],
+# # # )
+
+# # # print(f"Tool Response: {tool_response.completion_message.content}")
+# # # if tool_response.completion_message.tool_calls:
+# # #     for tool_call in tool_response.completion_message.tool_calls:
+# # #         print(f"Tool Called: {tool_call.tool_name}")
+# # #         print(f"Arguments: {tool_call.arguments}")
+
+
+# # # from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
+
+# # # person_schema = {
+# # #     "type": "object",
+# # #     "properties": {
+# # #         "name": {"type": "string"},
+# # #         "age": {"type": "integer"},
+# # #         "occupation": {"type": "string"},
+# # #     },
+# # #     "required": ["name", "age", "occupation"]
+# # # }
+
+# # # response_format = JsonSchemaResponseFormat(
+# # #     type=ResponseFormatType.json_schema,
+# # #     json_schema=person_schema
+# # # )
+
+# # # structured_response = client.inference.chat_completion(
+# # #     model_id="meta-llama/Llama-3.1-8B-Instruct",
+# # #     messages=[
+# # #         {
+# # #             "role": "user",
+# # #             "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. "
+# # #         }
+# # #     ],
+# # #     response_format=response_format,
+# # # )
+
+# # # print(f"Structured Response: {structured_response.completion_message.content}")
+
+# # # print("\n" + "="*50)
+# # # print("VISION LANGUAGE MODEL (VLM) EXAMPLE")
+# # # print("="*50)
+
+# # def load_image_as_base64(image_path):
+# #     with open(image_path, "rb") as image_file:
+# #         img_bytes = image_file.read()
+# #         return base64.b64encode(img_bytes).decode("utf-8")
+
+# # image_path = "/home/jiayin/llama-stack/docs/dog.jpg"
+# # demo_image_b64 = load_image_as_base64(image_path)
+
+# # vlm_response = client.inference.chat_completion(
+# #     model_id="nvidia/vila",
+# #     messages=[
+# #         {
+# #             "role": "user",
+# #             "content": [
+# #                 {
+# #                     "type": "image",
+# #                     "image": {
+# #                         "data": demo_image_b64,
+# #                     },
+# #                 },
+# #                 {
+# #                     "type": "text",
+# #                     "text": "Please describe what you see in this image in detail.",
+# #                 },
+# #             ],
+# #         }
+# #     ],
+# # )
+
+# # print(f"VLM Response: {vlm_response.completion_message.content}")
+
+# # # print("\n" + "="*50)
+# # # print("EMBEDDING EXAMPLE")
+# # # print("="*50)
+
+# # # # Embedding example
+# # # embedding_response = client.inference.embeddings(
+# # #     model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
+# # #     contents=["Hello world", "How are you today?"],
+# # #     task_type="query"
+# # # )
+
+# # # print(f"Number of embeddings: {len(embedding_response.embeddings)}")
+# # # print(f"Embedding dimension: {len(embedding_response.embeddings[0])}")
+# # # print(f"First few values: {embedding_response.embeddings[0][:5]}")
+
+# # # # from openai import OpenAI
+
+# # # # client = OpenAI(
+# # # #   base_url = "http://10.176.230.61:8000/v1",
+# # # #   api_key = "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"
+# # # # )
+
+# # # # # completion = client.completions.create(
+# # # # #   model="meta/llama-3.1-405b-instruct",
+# # # # #   prompt="How are you?",
+# # # # #   temperature=0.2,
+# # # # #   top_p=0.7,
+# # # # #   max_tokens=1024,
+# # # # #   stream=False
+# # # # # )
+
+# # # # # # completion = client.chat.completions.create(
+# # # # # #   model="meta/llama-3.1-8b-instruct",
+# # # # # #   messages=[{"role":"user","content":"hi"}],
+# # # # # #   temperature=0.2,
+# # # # # #   top_p=0.7,
+# # # # # #   max_tokens=1024,
+# # # # # #   stream=True
+# # # # # # )
+
+# # # # # for chunk in completion:
+# # # # #   if chunk.choices[0].delta.content is not None:
+# # # # #     print(chunk.choices[0].delta.content, end="")
+
+
+# # # # # response = client.inference.completion(
+# # # # #     model_id="meta/llama-3.1-8b-instruct",
+# # # # #     content="Complete the sentence using one word: Roses are red, violets are :",
+# # # # #     stream=False,
+# # # # #     sampling_params={
+# # # # #         "max_tokens": 50,
+# # # # #     },
+# # # # # )
+# # # # # print(f"Response: {response.content}")
+
+
+
+
+# from openai import OpenAI
+
+# client = OpenAI(
+#   base_url = "https://integrate.api.nvidia.com/v1",
+#   api_key = "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"
+# )
+
+# completion = client.chat.completions.create(
+#   model="nvidia/nvidia-nemotron-nano-9b-v2",
+#   messages=[{"role":"system","content":"/think"}],
+#   temperature=0.6,
+#   top_p=0.95,
+#   max_tokens=2048,
+#   frequency_penalty=0,
+#   presence_penalty=0,
+#   stream=True,
+#   extra_body={
+#     "min_thinking_tokens": 1024,
+#     "max_thinking_tokens": 2048
+#   }
+# )
+
+# for chunk in completion:
+#   reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None)
+#   if reasoning:
+#     print(reasoning, end="")
+#   if chunk.choices[0].delta.content is not None:
+#     print(chunk.choices[0].delta.content, end="")
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index e88a16315..e452d8157 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1016,7 +1016,7 @@ class InferenceProvider(Protocol):
     ) -> RerankResponse:
         """Rerank a list of documents based on their relevance to a query.
 
-        :param model: The identifier of the reranking model to use.
+        :param model: The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint.
         :param query: The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length.
         :param items: List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length.
         :param max_num_results: (Optional) Maximum number of results to return. Default: returns all.
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 210ed9246..359f5bf0c 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -27,10 +27,12 @@ class ModelType(StrEnum):
     """Enumeration of supported model types in Llama Stack.
     :cvar llm: Large language model for text generation and completion
     :cvar embedding: Embedding model for converting text to vector representations
+    :cvar rerank: Reranking model for reordering documents by relevance
     """
 
     llm = "llm"
     embedding = "embedding"
+    rerank = "rerank"
 
 
 @json_schema_type
diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index c4338e614..e5826685e 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -41,9 +41,14 @@ from llama_stack.apis.inference import (
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
     Order,
+    RerankResponse,
     StopReason,
     ToolPromptFormat,
 )
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.log import get_logger
@@ -179,6 +184,25 @@ class InferenceRouter(Inference):
             raise ModelTypeError(model_id, model.model_type, expected_model_type)
         return model
 
+    async def rerank(
+        self,
+        model: str,
+        query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
+        items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
+        max_num_results: int | None = None,
+    ) -> RerankResponse:
+        """Route rerank requests to the appropriate provider based on the model."""
+        logger.debug(f"InferenceRouter.rerank: {model}")
+        model_obj = await self._get_model(model, ModelType.rerank)
+        provider = await self.routing_table.get_provider_impl(model_obj.identifier)
+        return await provider.rerank(
+            model=model_obj.identifier,
+            query=query,
+            items=items,
+            max_num_results=max_num_results,
+        )
+
+
     async def openai_completion(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
new file mode 100644
index 000000000..a79a1c6aa
--- /dev/null
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -0,0 +1,131 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.models import ModelType
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    ProviderModelEntry,
+    build_hf_repo_model_entry,
+)
+
+SAFETY_MODELS_ENTRIES = []
+
+# https://docs.nvidia.com/nim/large-language-models/latest/supported-llm-agnostic-architectures.html
+MODEL_ENTRIES = [
+    build_hf_repo_model_entry(
+        "meta/llama3-8b-instruct",
+        CoreModelId.llama3_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama3-70b-instruct",
+        CoreModelId.llama3_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/vila",
+        model_type=ModelType.llm,
+    ),
+    # NeMo Retriever Text Embedding models -
+    #
+    # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
+    #
+    # +-----------------------------------+--------+-----------+-----------+------------+
+    # | Model ID                          | Max    | Publisher | Embedding | Dynamic    |
+    # |                                   | Tokens |           | Dimension | Embeddings |
+    # +-----------------------------------+--------+-----------+-----------+------------+
+    # | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192   | NVIDIA    | 2048      | Yes        |
+    # | nvidia/nv-embedqa-e5-v5           |  512   | NVIDIA    | 1024      |  No        |
+    # | nvidia/nv-embedqa-mistral-7b-v2   |  512   | NVIDIA    | 4096      |  No        |
+    # | snowflake/arctic-embed-l          |  512   | Snowflake | 1024      |  No        |
+    # +-----------------------------------+--------+-----------+-----------+------------+
+    ProviderModelEntry(
+        provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 2048,
+            "context_length": 8192,
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/nv-embedqa-e5-v5",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 1024,
+            "context_length": 512,
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/nv-embedqa-mistral-7b-v2",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 4096,
+            "context_length": 512,
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="snowflake/arctic-embed-l",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 1024,
+            "context_length": 512,
+        },
+    ),
+    # NVIDIA Reranking models
+    ProviderModelEntry(
+        provider_model_id="nv-rerank-qa-mistral-4b:1",
+        model_type=ModelType.rerank,
+        metadata={
+            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/nv-rerankqa-mistral-4b-v3",
+        model_type=ModelType.rerank,
+        metadata={
+            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/llama-3.2-nv-rerankqa-1b-v2",
+        model_type=ModelType.rerank,
+        metadata={
+            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
+        },
+    ),
+    # TODO(mf): how do we handle Nemotron models?
+    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
+] + SAFETY_MODELS_ENTRIES
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 2e6c3d769..b2fdec61f 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -12,6 +12,12 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    RerankData,
+    RerankResponse,
+)
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@@ -80,6 +86,80 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         """
         return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url
 
+    async def rerank(
+        self,
+        model: str,
+        query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
+        items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
+        max_num_results: int | None = None,
+    ) -> RerankResponse:
+        provider_model_id = await self._get_provider_model_id(model)
+
+        ranking_url = self.get_base_url()
+        model_obj = await self.model_store.get_model(model)
+
+        if _is_nvidia_hosted(self._config) and "endpoint" in model_obj.metadata:
+            ranking_url = model_obj.metadata["endpoint"]
+
+        logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}")
+
+        # Convert query to text format
+        if isinstance(query, str):
+            query_text = query
+        elif hasattr(query, "text"):
+            query_text = query.text
+        else:
+            raise ValueError("Query must be a string or text content part")
+
+        # Convert items to text format
+        passages = []
+        for item in items:
+            if isinstance(item, str):
+                passages.append({"text": item})
+            elif hasattr(item, "text"):
+                passages.append({"text": item.text})
+            else:
+                raise ValueError("Items must be strings or text content parts")
+
+        payload = {
+            "model": provider_model_id,
+            "query": {"text": query_text},
+            "passages": passages,
+        }
+
+        headers = {
+            "Authorization": f"Bearer {self.get_api_key()}",
+            "Content-Type": "application/json",
+        }
+
+        import aiohttp
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(ranking_url, headers=headers, json=payload) as response:
+                    if response.status != 200:
+                        response_text = await response.text()
+                        raise ConnectionError(
+                            f"NVIDIA rerank API request failed with status {response.status}: {response_text}"
+                        )
+
+                    result = await response.json()
+                    rankings = result.get("rankings", [])
+
+                    # Convert to RerankData format
+                    rerank_data = []
+                    for ranking in rankings:
+                        rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"]))
+
+                    # Apply max_num_results limit if specified
+                    if max_num_results is not None:
+                        rerank_data = rerank_data[:max_num_results]
+
+                    return RerankResponse(data=rerank_data)
+
+        except aiohttp.ClientError as e:
+            raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e
+
     async def openai_embeddings(
         self,
         model: str,

From d7cbeb4b8c5942cfda4f096dd1fd45eeb35d1349 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Thu, 4 Sep 2025 18:08:35 -0700
Subject: [PATCH 02/18] Add tests

---
 docs/docs/providers/inference/index.mdx       |   6 +-
 .../remote/inference/nvidia/nvidia.py         |   2 +-
 tests/integration/conftest.py                 |   5 +
 tests/integration/fixtures/common.py          |  10 +-
 tests/integration/inference/test_rerank.py    | 147 ++++++++++++++
 .../providers/nvidia/test_rerank_inference.py | 180 ++++++++++++++++++
 6 files changed, 345 insertions(+), 5 deletions(-)
 create mode 100644 tests/integration/inference/test_rerank.py
 create mode 100644 tests/unit/providers/nvidia/test_rerank_inference.py

diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index e96169cad..d9d30ab78 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -1,9 +1,9 @@
 ---
 description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search."
+    - Embedding models: these models generate embeddings to be used for semantic search.
     - Rerank models: these models rerank the documents by relevance."
 sidebar_label: Inference
 title: Inference
@@ -15,7 +15,7 @@ title: Inference
 
 Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
     - Rerank models: these models rerank the documents by relevance.
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index b2fdec61f..8dc5e0a11 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -151,7 +151,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
                     for ranking in rankings:
                         rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"]))
 
-                    # Apply max_num_results limit if specified
+                    # Apply max_num_results limit
                     if max_num_results is not None:
                         rerank_data = rerank_data[:max_num_results]
 
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
index 4735264c3..2ad4f7e4c 100644
--- a/tests/integration/conftest.py
+++ b/tests/integration/conftest.py
@@ -120,6 +120,10 @@ def pytest_addoption(parser):
         "--embedding-model",
         help="comma-separated list of embedding models. Fixture name: embedding_model_id",
     )
+    parser.addoption(
+        "--rerank-model",
+        help="comma-separated list of rerank models. Fixture name: rerank_model_id",
+    )
     parser.addoption(
         "--safety-shield",
         help="comma-separated list of safety shields. Fixture name: shield_id",
@@ -198,6 +202,7 @@ def pytest_generate_tests(metafunc):
         "shield_id": ("--safety-shield", "shield"),
         "judge_model_id": ("--judge-model", "judge"),
         "embedding_dimension": ("--embedding-dimension", "dim"),
+        "rerank_model_id": ("--rerank-model", "rerank"),
     }
 
     # Collect all parameters and their values
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 68aa2b60b..27283afe7 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -119,6 +119,7 @@ def client_with_models(
     embedding_model_id,
     embedding_dimension,
     judge_model_id,
+    rerank_model_id,
 ):
     client = llama_stack_client
 
@@ -151,6 +152,13 @@ def client_with_models(
             model_type="embedding",
             metadata={"embedding_dimension": embedding_dimension or 384},
         )
+    if rerank_model_id and rerank_model_id not in model_ids:
+        rerank_provider = providers[0]
+        client.models.register(
+            model_id=rerank_model_id,
+            provider_id=rerank_provider.provider_id,
+            model_type="rerank",
+        )
     return client
 
 
@@ -166,7 +174,7 @@ def model_providers(llama_stack_client):
 
 @pytest.fixture(autouse=True)
 def skip_if_no_model(request):
-    model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id"]
+    model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id", "rerank_model_id"]
     test_func = request.node.function
 
     actual_params = inspect.signature(test_func).parameters.keys()
diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py
new file mode 100644
index 000000000..0c536b539
--- /dev/null
+++ b/tests/integration/inference/test_rerank.py
@@ -0,0 +1,147 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import pytest
+from llama_stack_client import BadRequestError as LlamaStackBadRequestError
+from llama_stack_client.types import RerankResponse
+from llama_stack_client.types.shared.interleaved_content import (
+    ImageContentItem,
+    ImageContentItemImage,
+    ImageContentItemImageURL,
+    TextContentItem,
+)
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+
+# Test data
+DUMMY_STRING = "string_1"
+DUMMY_STRING2 = "string_2"
+DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
+DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
+DUMMY_IMAGE_URL = ImageContentItem(
+    image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
+)
+DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
+
+SUPPORTED_PROVIDERS = {"remote::nvidia"}
+PROVIDERS_SUPPORTING_MEDIA = {}  # Providers that support media input for rerank models
+
+
+def _validate_rerank_response(response: RerankResponse, items: list) -> None:
+    """
+    Validate that a rerank response has the correct structure and ordering.
+
+    Args:
+        response: The RerankResponse to validate
+        items: The original items list that was ranked
+
+    Raises:
+        AssertionError: If any validation fails
+    """
+    seen = set()
+    last_score = float("inf")
+    for d in response.data:
+        assert 0 <= d.index < len(items), f"Index {d.index} out of bounds for {len(items)} items"
+        assert d.index not in seen, f"Duplicate index {d.index} found"
+        seen.add(d.index)
+        assert isinstance(d.relevance_score, float), f"Score must be float, got {type(d.relevance_score)}"
+        assert d.relevance_score <= last_score, f"Scores not in descending order: {d.relevance_score} > {last_score}"
+        last_score = d.relevance_score
+
+
+@pytest.mark.parametrize(
+    "query,items",
+    [
+        (DUMMY_STRING, [DUMMY_STRING, DUMMY_STRING2]),
+        (DUMMY_TEXT, [DUMMY_TEXT, DUMMY_TEXT2]),
+        (DUMMY_STRING, [DUMMY_STRING2, DUMMY_TEXT]),
+        (DUMMY_TEXT, [DUMMY_STRING, DUMMY_TEXT2]),
+    ],
+    ids=[
+        "string-query-string-items",
+        "text-query-text-items",
+        "mixed-content-1",
+        "mixed-content-2",
+    ],
+)
+def test_rerank_text(llama_stack_client, rerank_model_id, query, items, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
+
+    response = llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items)
+    assert isinstance(response, RerankResponse)
+    assert len(response.data) <= len(items)
+    _validate_rerank_response(response, items)
+
+
+@pytest.mark.parametrize(
+    "query,items",
+    [
+        (DUMMY_IMAGE_URL, [DUMMY_STRING]),
+        (DUMMY_IMAGE_BASE64, [DUMMY_TEXT]),
+        (DUMMY_TEXT, [DUMMY_IMAGE_URL]),
+        (DUMMY_IMAGE_BASE64, [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT]),
+        (DUMMY_TEXT, [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT]),
+    ],
+    ids=[
+        "image-query-url",
+        "image-query-base64",
+        "text-query-image-item",
+        "mixed-content-1",
+        "mixed-content-2",
+    ],
+)
+def test_rerank_image(llama_stack_client, rerank_model_id, query, items, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
+
+    if rerank_model_id not in PROVIDERS_SUPPORTING_MEDIA:
+        error_type = (
+            ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
+        )
+        with pytest.raises(error_type):
+            llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items)
+    else:
+        response = llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items)
+
+        assert isinstance(response, RerankResponse)
+        assert len(response.data) <= len(items)
+        _validate_rerank_response(response, items)
+
+
+def test_rerank_max_results(llama_stack_client, rerank_model_id, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
+
+    items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2]
+    max_num_results = 2
+
+    response = llama_stack_client.inference.rerank(
+        model=rerank_model_id,
+        query=DUMMY_STRING,
+        items=items,
+        max_num_results=max_num_results,
+    )
+
+    assert isinstance(response, RerankResponse)
+    assert len(response.data) == max_num_results
+    _validate_rerank_response(response, items)
+
+
+def test_rerank_max_results_larger_than_items(llama_stack_client, rerank_model_id, inference_provider_type):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support rerank yet")
+
+    items = [DUMMY_STRING, DUMMY_STRING2]
+    response = llama_stack_client.inference.rerank(
+        model=rerank_model_id,
+        query=DUMMY_STRING,
+        items=items,
+        max_num_results=10,  # Larger than items length
+    )
+
+    assert isinstance(response, RerankResponse)
+    assert len(response.data) <= len(items)  # Should return at most len(items)
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
new file mode 100644
index 000000000..03c54a732
--- /dev/null
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -0,0 +1,180 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import AsyncMock, patch
+
+import aiohttp
+import pytest
+
+from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
+from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
+
+
+class MockResponse:
+    def __init__(self, status=200, json_data=None, text_data="OK"):
+        self.status = status
+        self._json_data = json_data or {"rankings": []}
+        self._text_data = text_data
+
+    async def json(self):
+        return self._json_data
+
+    async def text(self):
+        return self._text_data
+
+
+class MockSession:
+    def __init__(self, response):
+        self.response = response
+        self.post_calls = []
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        return False
+
+    def post(self, url, **kwargs):
+        self.post_calls.append((url, kwargs))
+
+        class PostContext:
+            def __init__(self, response):
+                self.response = response
+
+            async def __aenter__(self):
+                return self.response
+
+            async def __aexit__(self, exc_type, exc_val, exc_tb):
+                return False
+
+        return PostContext(self.response)
+
+
+def create_adapter(config=None, model_metadata=None):
+    if config is None:
+        config = NVIDIAConfig(api_key="test-key")
+
+    adapter = NVIDIAInferenceAdapter(config)
+
+    class MockModel:
+        provider_resource_id = "test-model"
+        metadata = model_metadata or {}
+
+    adapter.model_store = AsyncMock()
+    adapter.model_store.get_model = AsyncMock(return_value=MockModel())
+
+    return adapter
+
+
+@pytest.mark.asyncio
+async def test_rerank_basic_functionality():
+    adapter = create_adapter()
+    mock_response = MockResponse(json_data={"rankings": [{"index": 0, "logit": 0.5}]})
+    mock_session = MockSession(mock_response)
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        result = await adapter.rerank(model="test-model", query="test query", items=["item1", "item2"])
+
+    assert len(result.data) == 1
+    assert result.data[0].index == 0
+    assert result.data[0].relevance_score == 0.5
+
+    url, kwargs = mock_session.post_calls[0]
+    payload = kwargs["json"]
+    assert payload["model"] == "test-model"
+    assert payload["query"] == {"text": "test query"}
+    assert payload["passages"] == [{"text": "item1"}, {"text": "item2"}]
+
+
+@pytest.mark.asyncio
+async def test_missing_rankings_key():
+    adapter = create_adapter()
+    mock_session = MockSession(MockResponse(json_data={}))
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        result = await adapter.rerank(model="test-model", query="q", items=["a"])
+
+    assert len(result.data) == 0
+
+
+@pytest.mark.asyncio
+async def test_hosted_with_endpoint():
+    adapter = create_adapter(
+        config=NVIDIAConfig(api_key="key"), model_metadata={"endpoint": "https://model.endpoint/rerank"}
+    )
+    mock_session = MockSession(MockResponse())
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        await adapter.rerank(model="test-model", query="q", items=["a"])
+
+    url, _ = mock_session.post_calls[0]
+    assert url == "https://model.endpoint/rerank"
+
+
+@pytest.mark.asyncio
+async def test_hosted_without_endpoint():
+    adapter = create_adapter(
+        config=NVIDIAConfig(api_key="key"),  # This creates hosted config (integrate.api.nvidia.com).
+        model_metadata={},  # No "endpoint" key
+    )
+    mock_session = MockSession(MockResponse())
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        await adapter.rerank(model="test-model", query="q", items=["a"])
+
+    url, _ = mock_session.post_calls[0]
+    assert "https://integrate.api.nvidia.com" in url
+
+
+@pytest.mark.asyncio
+async def test_self_hosted_ignores_endpoint():
+    adapter = create_adapter(
+        config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
+        model_metadata={"endpoint": "https://model.endpoint/rerank"},  # This should be ignored.
+    )
+    mock_session = MockSession(MockResponse())
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        await adapter.rerank(model="test-model", query="q", items=["a"])
+
+    url, _ = mock_session.post_calls[0]
+    assert "http://localhost:8000" in url
+    assert "model.endpoint/rerank" not in url
+
+
+@pytest.mark.asyncio
+async def test_max_num_results():
+    adapter = create_adapter()
+    rankings = [{"index": 0, "logit": 0.8}, {"index": 1, "logit": 0.6}]
+    mock_session = MockSession(MockResponse(json_data={"rankings": rankings}))
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        result = await adapter.rerank(model="test-model", query="q", items=["a", "b"], max_num_results=1)
+
+    assert len(result.data) == 1
+    assert result.data[0].index == 0
+    assert result.data[0].relevance_score == 0.8
+
+
+@pytest.mark.asyncio
+async def test_http_error():
+    adapter = create_adapter()
+    mock_session = MockSession(MockResponse(status=500, text_data="Server Error"))
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        with pytest.raises(ConnectionError, match="status 500.*Server Error"):
+            await adapter.rerank(model="test-model", query="q", items=["a"])
+
+
+@pytest.mark.asyncio
+async def test_client_error():
+    adapter = create_adapter()
+    mock_session = AsyncMock()
+    mock_session.__aenter__.side_effect = aiohttp.ClientError("Network error")
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        with pytest.raises(ConnectionError, match="Failed to connect.*Network error"):
+            await adapter.rerank(model="test-model", query="q", items=["a"])

From 8c9b7aa764a936b9617d5f8a82d74beb302ed835 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Fri, 5 Sep 2025 16:52:42 -0700
Subject: [PATCH 03/18] Add example documentation

---
 .../remote/inference/nvidia/NVIDIA.md         | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
index 625be6088..c683c7a68 100644
--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -188,3 +188,22 @@ vlm_response = client.chat.completions.create(
 
 print(f"VLM Response: {vlm_response.choices[0].message.content}")
 ```
+
+### Rerank Example
+
+The following example shows how to rerank documents using an NVIDIA NIM.
+
+```python
+rerank_response = client.inference.rerank(
+    model="nvidia/llama-3.2-nv-rerankqa-1b-v2",
+    query="query",
+    items=[
+        "item_1",
+        "item_2",
+        "item_3",
+    ],
+)
+
+for i, result in enumerate(rerank_response.data):
+    print(f"{i+1}. [Index: {result.index}, Score: {result.relevance_score:.3f}]")
+```
\ No newline at end of file

From 3f6bbbb5609b6da2c5d46cc541fe414e859adccf Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Tue, 9 Sep 2025 14:15:24 -0700
Subject: [PATCH 04/18] Remove pre-commit auto formatted changes

---
 tests/unit/providers/nvidia/test_rerank_inference.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index 03c54a732..687ffd502 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -69,7 +69,6 @@ def create_adapter(config=None, model_metadata=None):
     return adapter
 
 
-@pytest.mark.asyncio
 async def test_rerank_basic_functionality():
     adapter = create_adapter()
     mock_response = MockResponse(json_data={"rankings": [{"index": 0, "logit": 0.5}]})
@@ -89,7 +88,6 @@ async def test_rerank_basic_functionality():
     assert payload["passages"] == [{"text": "item1"}, {"text": "item2"}]
 
 
-@pytest.mark.asyncio
 async def test_missing_rankings_key():
     adapter = create_adapter()
     mock_session = MockSession(MockResponse(json_data={}))
@@ -100,7 +98,6 @@ async def test_missing_rankings_key():
     assert len(result.data) == 0
 
 
-@pytest.mark.asyncio
 async def test_hosted_with_endpoint():
     adapter = create_adapter(
         config=NVIDIAConfig(api_key="key"), model_metadata={"endpoint": "https://model.endpoint/rerank"}
@@ -114,7 +111,6 @@ async def test_hosted_with_endpoint():
     assert url == "https://model.endpoint/rerank"
 
 
-@pytest.mark.asyncio
 async def test_hosted_without_endpoint():
     adapter = create_adapter(
         config=NVIDIAConfig(api_key="key"),  # This creates hosted config (integrate.api.nvidia.com).
@@ -129,7 +125,6 @@ async def test_hosted_without_endpoint():
     assert "https://integrate.api.nvidia.com" in url
 
 
-@pytest.mark.asyncio
 async def test_self_hosted_ignores_endpoint():
     adapter = create_adapter(
         config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
@@ -145,7 +140,6 @@ async def test_self_hosted_ignores_endpoint():
     assert "model.endpoint/rerank" not in url
 
 
-@pytest.mark.asyncio
 async def test_max_num_results():
     adapter = create_adapter()
     rankings = [{"index": 0, "logit": 0.8}, {"index": 1, "logit": 0.6}]
@@ -159,7 +153,6 @@ async def test_max_num_results():
     assert result.data[0].relevance_score == 0.8
 
 
-@pytest.mark.asyncio
 async def test_http_error():
     adapter = create_adapter()
     mock_session = MockSession(MockResponse(status=500, text_data="Server Error"))
@@ -169,7 +162,6 @@ async def test_http_error():
             await adapter.rerank(model="test-model", query="q", items=["a"])
 
 
-@pytest.mark.asyncio
 async def test_client_error():
     adapter = create_adapter()
     mock_session = AsyncMock()

From d78e30fe8b95a30a92f86e6b1ee9d69ceca2ae00 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Tue, 9 Sep 2025 15:01:49 -0700
Subject: [PATCH 05/18] Fix aiohttp location

---
 llama_stack/providers/remote/inference/nvidia/nvidia.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 8dc5e0a11..f629d8c19 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -5,6 +5,7 @@
 # the root directory of this source tree.
 
 
+import aiohttp
 from openai import NOT_GIVEN
 
 from llama_stack.apis.inference import (
@@ -132,8 +133,6 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
             "Content-Type": "application/json",
         }
 
-        import aiohttp
-
         try:
             async with aiohttp.ClientSession() as session:
                 async with session.post(ranking_url, headers=headers, json=payload) as response:

From f66718be8033b685abbe829753df539fc652e27d Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Tue, 9 Sep 2025 16:21:00 -0700
Subject: [PATCH 06/18] Update tests

---
 tests/integration/fixtures/common.py       | 11 +++++++++--
 tests/integration/inference/test_rerank.py | 20 ++++++++++----------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 27283afe7..8f4c564c8 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -153,10 +153,17 @@ def client_with_models(
             metadata={"embedding_dimension": embedding_dimension or 384},
         )
     if rerank_model_id and rerank_model_id not in model_ids:
-        rerank_provider = providers[0]
+        selected_provider = None
+        for p in providers:
+            # Currently only NVIDIA inference provider supports reranking
+            if p.provider_type == "remote::nvidia":
+                selected_provider = p
+                break
+
+        selected_provider = selected_provider or providers[0]
         client.models.register(
             model_id=rerank_model_id,
-            provider_id=rerank_provider.provider_id,
+            provider_id=selected_provider.provider_id,
             model_type="rerank",
         )
     return client
diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py
index 0c536b539..27f3074ad 100644
--- a/tests/integration/inference/test_rerank.py
+++ b/tests/integration/inference/test_rerank.py
@@ -67,11 +67,11 @@ def _validate_rerank_response(response: RerankResponse, items: list) -> None:
         "mixed-content-2",
     ],
 )
-def test_rerank_text(llama_stack_client, rerank_model_id, query, items, inference_provider_type):
+def test_rerank_text(client_with_models, rerank_model_id, query, items, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
 
-    response = llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items)
+    response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
     assert isinstance(response, RerankResponse)
     assert len(response.data) <= len(items)
     _validate_rerank_response(response, items)
@@ -94,32 +94,32 @@ def test_rerank_text(llama_stack_client, rerank_model_id, query, items, inferenc
         "mixed-content-2",
     ],
 )
-def test_rerank_image(llama_stack_client, rerank_model_id, query, items, inference_provider_type):
+def test_rerank_image(client_with_models, rerank_model_id, query, items, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
 
     if rerank_model_id not in PROVIDERS_SUPPORTING_MEDIA:
         error_type = (
-            ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
+            ValueError if isinstance(client_with_models, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
         )
         with pytest.raises(error_type):
-            llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items)
+            client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
     else:
-        response = llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items)
+        response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
 
         assert isinstance(response, RerankResponse)
         assert len(response.data) <= len(items)
         _validate_rerank_response(response, items)
 
 
-def test_rerank_max_results(llama_stack_client, rerank_model_id, inference_provider_type):
+def test_rerank_max_results(client_with_models, rerank_model_id, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
 
     items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2]
     max_num_results = 2
 
-    response = llama_stack_client.inference.rerank(
+    response = client_with_models.inference.rerank(
         model=rerank_model_id,
         query=DUMMY_STRING,
         items=items,
@@ -131,12 +131,12 @@ def test_rerank_max_results(llama_stack_client, rerank_model_id, inference_provi
     _validate_rerank_response(response, items)
 
 
-def test_rerank_max_results_larger_than_items(llama_stack_client, rerank_model_id, inference_provider_type):
+def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_id, inference_provider_type):
     if inference_provider_type not in SUPPORTED_PROVIDERS:
         pytest.xfail(f"{inference_provider_type} doesn't support rerank yet")
 
     items = [DUMMY_STRING, DUMMY_STRING2]
-    response = llama_stack_client.inference.rerank(
+    response = client_with_models.inference.rerank(
         model=rerank_model_id,
         query=DUMMY_STRING,
         items=items,

From 78375889ec474f9f1916a0e1163b7ef9b2b5eba5 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Wed, 10 Sep 2025 11:39:39 -0700
Subject: [PATCH 07/18] Update index.md

---
 docs/docs/providers/inference/index.mdx | 2 +-
 llama_stack/apis/inference/inference.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index d9d30ab78..1cbeb12f0 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -18,6 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe
     This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
-    - Rerank models: these models rerank the documents by relevance.
+    - Rerank models: these models reorder the documents by relevance.
 
 This section contains documentation for all available providers for the **inference** API.
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index e452d8157..7bd9f5918 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1159,9 +1159,10 @@ class InferenceProvider(Protocol):
 class Inference(InferenceProvider):
     """Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
+    This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models reorder the documents by relevance.
     """
 
     @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)

From d1b4e090effd61a29c7102ba9042b749e0727971 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Wed, 10 Sep 2025 11:50:55 -0700
Subject: [PATCH 08/18] Update docs to include rerank models

---
 docs/static/llama-stack-spec.html | 2 +-
 docs/static/llama-stack-spec.yaml | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index b260f01a7..8192a9cf6 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -17875,7 +17875,7 @@
         },
         {
             "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents by relevance.",
             "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
         },
         {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index ebe142557..895b939ab 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -13452,13 +13452,15 @@ tags:
     description: ''
   - name: Inference
     description: >-
-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:
 
       - LLM models: these models generate "raw" and "chat" (conversational) completions.
 
       - Embedding models: these models generate embeddings to be used for semantic
       search.
+
+      - Rerank models: these models reorder the documents by relevance.
     x-displayName: >-
       Llama Stack Inference API for generating completions, chat completions, and
       embeddings.

From a0e6e82c1e68b95934985e98e95f7d4ab7b7d53e Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Wed, 10 Sep 2025 17:18:59 -0700
Subject: [PATCH 09/18] Add rerank semantic validation tests

---
 tests/integration/inference/test_rerank.py | 66 ++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py
index 27f3074ad..f1b9311a4 100644
--- a/tests/integration/inference/test_rerank.py
+++ b/tests/integration/inference/test_rerank.py
@@ -52,6 +52,28 @@ def _validate_rerank_response(response: RerankResponse, items: list) -> None:
         last_score = d.relevance_score
 
 
+def _validate_semantic_ranking(response: RerankResponse, items: list, expected_first_item: str) -> None:
+    """
+    Validate that the expected most relevant item ranks first.
+
+    Args:
+        response: The RerankResponse to validate
+        items: The original items list that was ranked
+        expected_first_item: The expected first item in the ranking
+
+    Raises:
+        AssertionError: If any validation fails
+    """
+    if not response.data:
+        raise AssertionError("No ranking data returned in response")
+
+    actual_first_index = response.data[0].index
+    actual_first_item = items[actual_first_index]
+    assert actual_first_item == expected_first_item, (
+        f"Expected '{expected_first_item}' to rank first, but '{actual_first_item}' ranked first instead."
+    )
+
+
 @pytest.mark.parametrize(
     "query,items",
     [
@@ -145,3 +167,47 @@ def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_i
 
     assert isinstance(response, RerankResponse)
     assert len(response.data) <= len(items)  # Should return at most len(items)
+
+
+@pytest.mark.parametrize(
+    "query,items,expected_first_item",
+    [
+        (
+            "What is a reranking model? ",
+            [
+                "A reranking model reranks a list of items based on the query. ",
+                "Machine learning algorithms learn patterns from data. ",
+                "Python is a programming language. ",
+            ],
+            "A reranking model reranks a list of items based on the query. ",
+        ),
+        (
+            "What is C++?",
+            [
+                "Learning new things is interesting. ",
+                "C++ is a programming language. ",
+                "Books provide knowledge and entertainment. ",
+            ],
+            "C++ is a programming language. ",
+        ),
+        (
+            "What are good learning habits? ",
+            [
+                "Cooking pasta is a fun activity. ",
+                "Plants need water and sunlight. ",
+                "Good learning habits include reading daily and taking notes. ",
+            ],
+            "Good learning habits include reading daily and taking notes. ",
+        ),
+    ],
+)
+def test_rerank_semantic_correctness(
+    client_with_models, rerank_model_id, query, items, expected_first_item, inference_provider_type
+):
+    if inference_provider_type not in SUPPORTED_PROVIDERS:
+        pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet.")
+
+    response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
+
+    _validate_rerank_response(response, items)
+    _validate_semantic_ranking(response, items, expected_first_item)

From 35384770705f81702b1cbe3913bdece9191c53f0 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Fri, 12 Sep 2025 19:55:04 -0700
Subject: [PATCH 10/18] Update docs

---
 docs/docs/providers/inference/index.mdx       |   2 +-
 docs/static/llama-stack-spec.html             |   2 +-
 docs/static/llama-stack-spec.yaml             |   3 +-
 llama_stack/apis/inference/inference.py       |   2 +-
 llama_stack/apis/models/models.py             |   2 +-
 llama_stack/core/routers/inference.py         |   1 -
 .../remote/inference/nvidia/models.py         | 131 ------------------
 7 files changed, 6 insertions(+), 137 deletions(-)
 delete mode 100644 llama_stack/providers/remote/inference/nvidia/models.py

diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index 1cbeb12f0..98ba10cc7 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -18,6 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe
     This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
-    - Rerank models: these models reorder the documents by relevance.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
 
 This section contains documentation for all available providers for the **inference** API.
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 8192a9cf6..0fdf3f415 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -17875,7 +17875,7 @@
         },
         {
             "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents by relevance.",
+            "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
             "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
         },
         {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index 895b939ab..ec0409849 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -13460,7 +13460,8 @@ tags:
       - Embedding models: these models generate embeddings to be used for semantic
       search.
 
-      - Rerank models: these models reorder the documents by relevance.
+      - Rerank models: these models reorder the documents based on their relevance
+      to a query.
     x-displayName: >-
       Llama Stack Inference API for generating completions, chat completions, and
       embeddings.
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 7bd9f5918..6260ba552 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1162,7 +1162,7 @@ class Inference(InferenceProvider):
     This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
-    - Rerank models: these models reorder the documents by relevance.
+    - Rerank models: these models reorder the documents based on their relevance to a query.
     """
 
     @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 359f5bf0c..1275e90e3 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -27,7 +27,7 @@ class ModelType(StrEnum):
     """Enumeration of supported model types in Llama Stack.
     :cvar llm: Large language model for text generation and completion
     :cvar embedding: Embedding model for converting text to vector representations
-    :cvar rerank: Reranking model for reordering documents by relevance
+    :cvar rerank: Reranking model for reordering documents based on their relevance to a query
     """
 
     llm = "llm"
diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index e5826685e..c1d4203c2 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -191,7 +191,6 @@ class InferenceRouter(Inference):
         items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
         max_num_results: int | None = None,
     ) -> RerankResponse:
-        """Route rerank requests to the appropriate provider based on the model."""
         logger.debug(f"InferenceRouter.rerank: {model}")
         model_obj = await self._get_model(model, ModelType.rerank)
         provider = await self.routing_table.get_provider_impl(model_obj.identifier)
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
deleted file mode 100644
index a79a1c6aa..000000000
--- a/llama_stack/providers/remote/inference/nvidia/models.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_stack.apis.models import ModelType
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.utils.inference.model_registry import (
-    ProviderModelEntry,
-    build_hf_repo_model_entry,
-)
-
-SAFETY_MODELS_ENTRIES = []
-
-# https://docs.nvidia.com/nim/large-language-models/latest/supported-llm-agnostic-architectures.html
-MODEL_ENTRIES = [
-    build_hf_repo_model_entry(
-        "meta/llama3-8b-instruct",
-        CoreModelId.llama3_8b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama3-70b-instruct",
-        CoreModelId.llama3_70b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama-3.1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama-3.1-70b-instruct",
-        CoreModelId.llama3_1_70b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama-3.1-405b-instruct",
-        CoreModelId.llama3_1_405b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama-3.2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama-3.2-3b-instruct",
-        CoreModelId.llama3_2_3b_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama-3.2-11b-vision-instruct",
-        CoreModelId.llama3_2_11b_vision_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama-3.2-90b-vision-instruct",
-        CoreModelId.llama3_2_90b_vision_instruct.value,
-    ),
-    build_hf_repo_model_entry(
-        "meta/llama-3.3-70b-instruct",
-        CoreModelId.llama3_3_70b_instruct.value,
-    ),
-    ProviderModelEntry(
-        provider_model_id="nvidia/vila",
-        model_type=ModelType.llm,
-    ),
-    # NeMo Retriever Text Embedding models -
-    #
-    # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
-    #
-    # +-----------------------------------+--------+-----------+-----------+------------+
-    # | Model ID                          | Max    | Publisher | Embedding | Dynamic    |
-    # |                                   | Tokens |           | Dimension | Embeddings |
-    # +-----------------------------------+--------+-----------+-----------+------------+
-    # | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192   | NVIDIA    | 2048      | Yes        |
-    # | nvidia/nv-embedqa-e5-v5           |  512   | NVIDIA    | 1024      |  No        |
-    # | nvidia/nv-embedqa-mistral-7b-v2   |  512   | NVIDIA    | 4096      |  No        |
-    # | snowflake/arctic-embed-l          |  512   | Snowflake | 1024      |  No        |
-    # +-----------------------------------+--------+-----------+-----------+------------+
-    ProviderModelEntry(
-        provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 2048,
-            "context_length": 8192,
-        },
-    ),
-    ProviderModelEntry(
-        provider_model_id="nvidia/nv-embedqa-e5-v5",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 1024,
-            "context_length": 512,
-        },
-    ),
-    ProviderModelEntry(
-        provider_model_id="nvidia/nv-embedqa-mistral-7b-v2",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 4096,
-            "context_length": 512,
-        },
-    ),
-    ProviderModelEntry(
-        provider_model_id="snowflake/arctic-embed-l",
-        model_type=ModelType.embedding,
-        metadata={
-            "embedding_dimension": 1024,
-            "context_length": 512,
-        },
-    ),
-    # NVIDIA Reranking models
-    ProviderModelEntry(
-        provider_model_id="nv-rerank-qa-mistral-4b:1",
-        model_type=ModelType.rerank,
-        metadata={
-            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
-        },
-    ),
-    ProviderModelEntry(
-        provider_model_id="nvidia/nv-rerankqa-mistral-4b-v3",
-        model_type=ModelType.rerank,
-        metadata={
-            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
-        },
-    ),
-    ProviderModelEntry(
-        provider_model_id="nvidia/llama-3.2-nv-rerankqa-1b-v2",
-        model_type=ModelType.rerank,
-        metadata={
-            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
-        },
-    ),
-    # TODO(mf): how do we handle Nemotron models?
-    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
-] + SAFETY_MODELS_ENTRIES

From 816b68fdc7cc83288a4548f3c73c6285fe5c86d9 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Sun, 28 Sep 2025 14:45:16 -0700
Subject: [PATCH 11/18] Add rerank models to the dynamic model list; Fix
 integration tests

---
 docs/docs/providers/batches/index.mdx         |  12 +-
 docs/docs/providers/inference/index.mdx       |   1 +
 .../remote/inference/nvidia/NVIDIA.md         |   4 +-
 .../remote/inference/nvidia/nvidia.py         |  39 +++++
 .../providers/utils/inference/openai_mixin.py |  12 ++
 tests/integration/inference/test_rerank.py    |  33 ++---
 .../providers/nvidia/test_rerank_inference.py |  35 ++++-
 .../utils/inference/test_openai_mixin.py      | 136 ++++++++++++++++++
 8 files changed, 247 insertions(+), 25 deletions(-)

diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx
index 2c64b277f..85213ab17 100644
--- a/docs/docs/providers/batches/index.mdx
+++ b/docs/docs/providers/batches/index.mdx
@@ -18,14 +18,14 @@ title: Batches
 ## Overview
 
 The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+particularly useful for processing large datasets, batch evaluation workflows, and
+cost-effective inference at scale.
 
-    The API is designed to allow use of openai client libraries for seamless integration.
+The API is designed to allow use of openai client libraries for seamless integration.
 
-    This API provides the following extensions:
-     - idempotent batch creation
+This API provides the following extensions:
+ - idempotent batch creation
 
-    Note: This API is currently under active development and may undergo changes.
+Note: This API is currently under active development and may undergo changes.
 
 This section contains documentation for all available providers for the **batches** API.
diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index 98ba10cc7..065f620df 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -5,6 +5,7 @@ description: "Llama Stack Inference API for generating completions, chat complet
     - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
     - Rerank models: these models rerank the documents by relevance."
+
 sidebar_label: Inference
 title: Inference
 ---
diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
index c683c7a68..dcc9d3909 100644
--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@@ -204,6 +204,6 @@ rerank_response = client.inference.rerank(
     ],
 )
 
-for i, result in enumerate(rerank_response.data):
-    print(f"{i+1}. [Index: {result.index}, Score: {result.relevance_score:.3f}]")
+for i, result in enumerate(rerank_response):
+    print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]")
 ```
\ No newline at end of file
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index f629d8c19..ae9245bfe 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -20,6 +20,7 @@ from llama_stack.apis.inference.inference import (
     OpenAIChatCompletionContentPartImageParam,
     OpenAIChatCompletionContentPartTextParam,
 )
+from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 
@@ -51,6 +52,18 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         "snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
     }
 
+    rerank_model_list = [
+        "nv-rerank-qa-mistral-4b:1",
+        "nvidia/nv-rerankqa-mistral-4b-v3",
+        "nvidia/llama-3.2-nv-rerankqa-1b-v2",
+    ]
+
+    _rerank_model_endpoints = {
+        "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
+        "nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
+        "nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
+    }
+
     def __init__(self, config: NVIDIAConfig) -> None:
         logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
 
@@ -69,6 +82,8 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         #         "Consider removing the api_key from the configuration."
         #     )
 
+        super().__init__()
+
         self._config = config
 
     def get_api_key(self) -> str:
@@ -87,6 +102,30 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         """
         return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url
 
+    async def list_models(self) -> list[Model] | None:
+        """
+        List available NVIDIA models by combining:
+        1. Dynamic models from https://integrate.api.nvidia.com/v1/models
+        2. Static rerank models (which use different API endpoints)
+        """
+        models = await super().list_models() or []
+
+        existing_ids = {m.identifier for m in models}
+        for model_id, _ in self._rerank_model_endpoints.items():
+            if self.allowed_models and model_id not in self.allowed_models:
+                continue
+            if model_id not in existing_ids:
+                model = Model(
+                    provider_id=self.__provider_id__,  # type: ignore[attr-defined]
+                    provider_resource_id=model_id,
+                    identifier=model_id,
+                    model_type=ModelType.rerank,
+                )
+                models.append(model)
+                self._model_cache[model_id] = model
+
+        return models
+
     async def rerank(
         self,
         model: str,
diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py
index 4354b067e..da56374c5 100644
--- a/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/llama_stack/providers/utils/inference/openai_mixin.py
@@ -63,6 +63,10 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
     # Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
     embedding_model_metadata: dict[str, dict[str, int]] = {}
 
+    # List of rerank model IDs for this provider
+    # Can be set by subclasses or instances to provide rerank models
+    rerank_model_list: list[str] = []
+
     # Cache of available models keyed by model ID
     # This is set in list_models() and used in check_model_availability()
     _model_cache: dict[str, Model] = {}
@@ -400,6 +404,14 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC):
                     model_type=ModelType.embedding,
                     metadata=metadata,
                 )
+            elif m.id in self.rerank_model_list:
+                # This is a rerank model
+                model = Model(
+                    provider_id=self.__provider_id__,  # type: ignore[attr-defined]
+                    provider_resource_id=m.id,
+                    identifier=m.id,
+                    model_type=ModelType.rerank,
+                )
             else:
                 # This is an LLM
                 model = Model(
diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py
index f1b9311a4..ea17a54cb 100644
--- a/tests/integration/inference/test_rerank.py
+++ b/tests/integration/inference/test_rerank.py
@@ -6,7 +6,7 @@
 
 import pytest
 from llama_stack_client import BadRequestError as LlamaStackBadRequestError
-from llama_stack_client.types import RerankResponse
+from llama_stack_client.types import InferenceRerankResponse
 from llama_stack_client.types.shared.interleaved_content import (
     ImageContentItem,
     ImageContentItemImage,
@@ -30,12 +30,12 @@ SUPPORTED_PROVIDERS = {"remote::nvidia"}
 PROVIDERS_SUPPORTING_MEDIA = {}  # Providers that support media input for rerank models
 
 
-def _validate_rerank_response(response: RerankResponse, items: list) -> None:
+def _validate_rerank_response(response: InferenceRerankResponse, items: list) -> None:
     """
     Validate that a rerank response has the correct structure and ordering.
 
     Args:
-        response: The RerankResponse to validate
+        response: The InferenceRerankResponse to validate
         items: The original items list that was ranked
 
     Raises:
@@ -43,7 +43,7 @@ def _validate_rerank_response(response: RerankResponse, items: list) -> None:
     """
     seen = set()
     last_score = float("inf")
-    for d in response.data:
+    for d in response:
         assert 0 <= d.index < len(items), f"Index {d.index} out of bounds for {len(items)} items"
         assert d.index not in seen, f"Duplicate index {d.index} found"
         seen.add(d.index)
@@ -52,22 +52,22 @@ def _validate_rerank_response(response: RerankResponse, items: list) -> None:
         last_score = d.relevance_score
 
 
-def _validate_semantic_ranking(response: RerankResponse, items: list, expected_first_item: str) -> None:
+def _validate_semantic_ranking(response: InferenceRerankResponse, items: list, expected_first_item: str) -> None:
     """
     Validate that the expected most relevant item ranks first.
 
     Args:
-        response: The RerankResponse to validate
+        response: The InferenceRerankResponse to validate
         items: The original items list that was ranked
         expected_first_item: The expected first item in the ranking
 
     Raises:
         AssertionError: If any validation fails
     """
-    if not response.data:
+    if not response:
         raise AssertionError("No ranking data returned in response")
 
-    actual_first_index = response.data[0].index
+    actual_first_index = response[0].index
     actual_first_item = items[actual_first_index]
     assert actual_first_item == expected_first_item, (
         f"Expected '{expected_first_item}' to rank first, but '{actual_first_item}' ranked first instead."
@@ -94,8 +94,9 @@ def test_rerank_text(client_with_models, rerank_model_id, query, items, inferenc
         pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
 
     response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
-    assert isinstance(response, RerankResponse)
-    assert len(response.data) <= len(items)
+    assert isinstance(response, list)
+    # TODO: Add type validation for response items once InferenceRerankResponseItem is exported from llama stack client.
+    assert len(response) <= len(items)
     _validate_rerank_response(response, items)
 
 
@@ -129,8 +130,8 @@ def test_rerank_image(client_with_models, rerank_model_id, query, items, inferen
     else:
         response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
 
-        assert isinstance(response, RerankResponse)
-        assert len(response.data) <= len(items)
+        assert isinstance(response, list)
+        assert len(response) <= len(items)
         _validate_rerank_response(response, items)
 
 
@@ -148,8 +149,8 @@ def test_rerank_max_results(client_with_models, rerank_model_id, inference_provi
         max_num_results=max_num_results,
     )
 
-    assert isinstance(response, RerankResponse)
-    assert len(response.data) == max_num_results
+    assert isinstance(response, list)
+    assert len(response) == max_num_results
     _validate_rerank_response(response, items)
 
 
@@ -165,8 +166,8 @@ def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_i
         max_num_results=10,  # Larger than items length
     )
 
-    assert isinstance(response, RerankResponse)
-    assert len(response.data) <= len(items)  # Should return at most len(items)
+    assert isinstance(response, list)
+    assert len(response) <= len(items)  # Should return at most len(items)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index 687ffd502..f34518609 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -4,11 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import aiohttp
 import pytest
 
+from llama_stack.apis.models import ModelType
 from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig
 from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter
 
@@ -170,3 +171,35 @@ async def test_client_error():
     with patch("aiohttp.ClientSession", return_value=mock_session):
         with pytest.raises(ConnectionError, match="Failed to connect.*Network error"):
             await adapter.rerank(model="test-model", query="q", items=["a"])
+
+
+async def test_list_models_adds_rerank_models():
+    """Test that list_models adds rerank models to the dynamic model list."""
+    adapter = create_adapter()
+    adapter.__provider_id__ = "nvidia"
+
+    # Mock the list_models from the superclass to return some dynamic models
+    base_models = [
+        MagicMock(identifier="llm-1", model_type=ModelType.llm),
+        MagicMock(identifier="embedding-1", model_type=ModelType.embedding),
+    ]
+
+    with patch.object(NVIDIAInferenceAdapter.__bases__[0], "list_models", return_value=base_models):
+        result = await adapter.list_models()
+
+        assert result is not None
+
+        # Check that the rerank models are added
+        model_ids = [m.identifier for m in result]
+        assert "nv-rerank-qa-mistral-4b:1" in model_ids
+        assert "nvidia/nv-rerankqa-mistral-4b-v3" in model_ids
+        assert "nvidia/llama-3.2-nv-rerankqa-1b-v2" in model_ids
+
+        rerank_models = [m for m in result if m.model_type == ModelType.rerank]
+
+        assert len(rerank_models) == 3
+
+        for rerank_model in rerank_models:
+            assert rerank_model.provider_id == "nvidia"
+            assert rerank_model.metadata == {}
+            assert rerank_model.identifier in adapter._model_cache
diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index 4856f510b..ae723dcc2 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -35,6 +35,40 @@ class OpenAIMixinWithEmbeddingsImpl(OpenAIMixinImpl):
     }
 
 
+class OpenAIMixinWithRerankImpl(OpenAIMixin):
+    """Test implementation with rerank model list"""
+
+    rerank_model_list = ["rerank-model-1", "rerank-model-2"]
+
+    def __init__(self):
+        self.__provider_id__ = "test-provider"
+
+    def get_api_key(self) -> str:
+        raise NotImplementedError("This method should be mocked in tests")
+
+    def get_base_url(self) -> str:
+        raise NotImplementedError("This method should be mocked in tests")
+
+
+class OpenAIMixinWithEmbeddingsAndRerankImpl(OpenAIMixin):
+    """Test implementation with both embedding model metadata and rerank model list"""
+
+    embedding_model_metadata = {
+        "text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
+        "text-embedding-ada-002": {"embedding_dimension": 1536, "context_length": 8192},
+    }
+
+    rerank_model_list = ["rerank-model-1", "rerank-model-2"]
+
+    __provider_id__ = "test-provider"
+
+    def get_api_key(self) -> str:
+        raise NotImplementedError("This method should be mocked in tests")
+
+    def get_base_url(self) -> str:
+        raise NotImplementedError("This method should be mocked in tests")
+
+
 @pytest.fixture
 def mixin():
     """Create a test instance of OpenAIMixin with mocked model_store"""
@@ -56,6 +90,18 @@ def mixin_with_embeddings():
     return OpenAIMixinWithEmbeddingsImpl()
 
 
+@pytest.fixture
+def mixin_with_rerank():
+    """Create a test instance of OpenAIMixin with rerank model list"""
+    return OpenAIMixinWithRerankImpl()
+
+
+@pytest.fixture
+def mixin_with_embeddings_and_rerank():
+    """Create a test instance of OpenAIMixin with both embedding model metadata and rerank model list"""
+    return OpenAIMixinWithEmbeddingsAndRerankImpl()
+
+
 @pytest.fixture
 def mock_models():
     """Create multiple mock OpenAI model objects"""
@@ -317,6 +363,96 @@ class TestOpenAIMixinEmbeddingModelMetadata:
             assert llm_model.provider_resource_id == "gpt-4"
 
 
+class TestOpenAIMixinRerankModelList:
+    """Test cases for rerank_model_list attribute functionality"""
+
+    async def test_rerank_model_identified(self, mixin_with_rerank, mock_client_context):
+        """Test that models in rerank_model_list are correctly identified as rerank models"""
+        # Create mock models: 1 rerank model and 1 LLM
+        mock_rerank_model = MagicMock(id="rerank-model-1")
+        mock_llm_model = MagicMock(id="gpt-4")
+        mock_models = [mock_rerank_model, mock_llm_model]
+
+        mock_client = MagicMock()
+
+        async def mock_models_list():
+            for model in mock_models:
+                yield model
+
+        mock_client.models.list.return_value = mock_models_list()
+
+        with mock_client_context(mixin_with_rerank, mock_client):
+            result = await mixin_with_rerank.list_models()
+
+            assert result is not None
+            assert len(result) == 2
+
+            # Find the models in the result
+            rerank_model = next(m for m in result if m.identifier == "rerank-model-1")
+            llm_model = next(m for m in result if m.identifier == "gpt-4")
+
+            # Check rerank model
+            assert rerank_model.model_type == ModelType.rerank
+            assert rerank_model.metadata == {}  # No metadata for rerank models
+            assert rerank_model.provider_id == "test-provider"
+            assert rerank_model.provider_resource_id == "rerank-model-1"
+
+            # Check LLM model
+            assert llm_model.model_type == ModelType.llm
+            assert llm_model.metadata == {}  # No metadata for LLMs
+            assert llm_model.provider_id == "test-provider"
+            assert llm_model.provider_resource_id == "gpt-4"
+
+
+class TestOpenAIMixinMixedModelTypes:
+    """Test cases for mixed model types (LLM, embedding, rerank)"""
+
+    async def test_mixed_model_types_identification(self, mixin_with_embeddings_and_rerank, mock_client_context):
+        """Test that LLM, embedding, and rerank models are correctly identified with proper types and metadata"""
+        # Create mock models: 1 embedding, 1 rerank, 1 LLM
+        mock_embedding_model = MagicMock(id="text-embedding-3-small")
+        mock_rerank_model = MagicMock(id="rerank-model-1")
+        mock_llm_model = MagicMock(id="gpt-4")
+        mock_models = [mock_embedding_model, mock_rerank_model, mock_llm_model]
+
+        mock_client = MagicMock()
+
+        async def mock_models_list():
+            for model in mock_models:
+                yield model
+
+        mock_client.models.list.return_value = mock_models_list()
+
+        with mock_client_context(mixin_with_embeddings_and_rerank, mock_client):
+            result = await mixin_with_embeddings_and_rerank.list_models()
+
+            assert result is not None
+            assert len(result) == 3
+
+            # Find the models in the result
+            embedding_model = next(m for m in result if m.identifier == "text-embedding-3-small")
+            rerank_model = next(m for m in result if m.identifier == "rerank-model-1")
+            llm_model = next(m for m in result if m.identifier == "gpt-4")
+
+            # Check embedding model
+            assert embedding_model.model_type == ModelType.embedding
+            assert embedding_model.metadata == {"embedding_dimension": 1536, "context_length": 8192}
+            assert embedding_model.provider_id == "test-provider"
+            assert embedding_model.provider_resource_id == "text-embedding-3-small"
+
+            # Check rerank model
+            assert rerank_model.model_type == ModelType.rerank
+            assert rerank_model.metadata == {}  # No metadata for rerank models
+            assert rerank_model.provider_id == "test-provider"
+            assert rerank_model.provider_resource_id == "rerank-model-1"
+
+            # Check LLM model
+            assert llm_model.model_type == ModelType.llm
+            assert llm_model.metadata == {}  # No metadata for LLMs
+            assert llm_model.provider_id == "test-provider"
+            assert llm_model.provider_resource_id == "gpt-4"
+
+
 class TestOpenAIMixinAllowedModels:
     """Test cases for allowed_models filtering functionality"""
 

From cf386ad8f8072dbb0609b4792a002689d769fa49 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Sun, 28 Sep 2025 15:11:23 -0700
Subject: [PATCH 12/18] Address comments

---
 docs/docs/providers/batches/index.mdx                | 12 ++++++------
 .../providers/remote/inference/nvidia/nvidia.py      |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx
index 85213ab17..2c64b277f 100644
--- a/docs/docs/providers/batches/index.mdx
+++ b/docs/docs/providers/batches/index.mdx
@@ -18,14 +18,14 @@ title: Batches
 ## Overview
 
 The Batches API enables efficient processing of multiple requests in a single operation,
-particularly useful for processing large datasets, batch evaluation workflows, and
-cost-effective inference at scale.
+    particularly useful for processing large datasets, batch evaluation workflows, and
+    cost-effective inference at scale.
 
-The API is designed to allow use of openai client libraries for seamless integration.
+    The API is designed to allow use of openai client libraries for seamless integration.
 
-This API provides the following extensions:
- - idempotent batch creation
+    This API provides the following extensions:
+     - idempotent batch creation
 
-Note: This API is currently under active development and may undergo changes.
+    Note: This API is currently under active development and may undergo changes.
 
 This section contains documentation for all available providers for the **batches** API.
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index ae9245bfe..1fc6a23b1 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -146,7 +146,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         # Convert query to text format
         if isinstance(query, str):
             query_text = query
-        elif hasattr(query, "text"):
+        elif isinstance(query, OpenAIChatCompletionContentPartTextParam):
             query_text = query.text
         else:
             raise ValueError("Query must be a string or text content part")
@@ -156,7 +156,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         for item in items:
             if isinstance(item, str):
                 passages.append({"text": item})
-            elif hasattr(item, "text"):
+            elif isinstance(item, OpenAIChatCompletionContentPartTextParam):
                 passages.append({"text": item.text})
             else:
                 raise ValueError("Items must be strings or text content parts")

From f85743dcca1e8d594d7c54c05d52224128db3682 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Sun, 28 Sep 2025 15:48:29 -0700
Subject: [PATCH 13/18] Add nvidia model cache

---
 llama_stack/providers/remote/inference/nvidia/nvidia.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 1fc6a23b1..f6fca4014 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -108,8 +108,10 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         1. Dynamic models from https://integrate.api.nvidia.com/v1/models
         2. Static rerank models (which use different API endpoints)
         """
-        models = await super().list_models() or []
+        self._model_cache = {}
+        models = await super().list_models()
 
+        # Add rerank models
         existing_ids = {m.identifier for m in models}
         for model_id, _ in self._rerank_model_endpoints.items():
             if self.allowed_models and model_id not in self.allowed_models:

From 2fb8756fe2e1f4d157d2bcae0363bd74345ca9dc Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Sun, 28 Sep 2025 17:45:54 -0700
Subject: [PATCH 14/18] Fix rerank model endpoint issue

---
 .../remote/inference/nvidia/nvidia.py         |  5 ++--
 .../providers/nvidia/test_rerank_inference.py | 27 +++++++++++++++----
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index f6fca4014..15e50ff97 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -138,10 +138,9 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         provider_model_id = await self._get_provider_model_id(model)
 
         ranking_url = self.get_base_url()
-        model_obj = await self.model_store.get_model(model)
 
-        if _is_nvidia_hosted(self._config) and "endpoint" in model_obj.metadata:
-            ranking_url = model_obj.metadata["endpoint"]
+        if _is_nvidia_hosted(self._config) and provider_model_id in self._rerank_model_endpoints:
+            ranking_url = self._rerank_model_endpoints[provider_model_id]
 
         logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}")
 
diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py
index f34518609..60891e496 100644
--- a/tests/unit/providers/nvidia/test_rerank_inference.py
+++ b/tests/unit/providers/nvidia/test_rerank_inference.py
@@ -54,7 +54,7 @@ class MockSession:
         return PostContext(self.response)
 
 
-def create_adapter(config=None, model_metadata=None):
+def create_adapter(config=None, rerank_endpoints=None):
     if config is None:
         config = NVIDIAConfig(api_key="test-key")
 
@@ -62,11 +62,14 @@ def create_adapter(config=None, model_metadata=None):
 
     class MockModel:
         provider_resource_id = "test-model"
-        metadata = model_metadata or {}
+        metadata = {}
 
     adapter.model_store = AsyncMock()
     adapter.model_store.get_model = AsyncMock(return_value=MockModel())
 
+    if rerank_endpoints is not None:
+        adapter._rerank_model_endpoints = rerank_endpoints
+
     return adapter
 
 
@@ -101,7 +104,7 @@ async def test_missing_rankings_key():
 
 async def test_hosted_with_endpoint():
     adapter = create_adapter(
-        config=NVIDIAConfig(api_key="key"), model_metadata={"endpoint": "https://model.endpoint/rerank"}
+        config=NVIDIAConfig(api_key="key"), rerank_endpoints={"test-model": "https://model.endpoint/rerank"}
     )
     mock_session = MockSession(MockResponse())
 
@@ -115,7 +118,7 @@ async def test_hosted_with_endpoint():
 async def test_hosted_without_endpoint():
     adapter = create_adapter(
         config=NVIDIAConfig(api_key="key"),  # This creates hosted config (integrate.api.nvidia.com).
-        model_metadata={},  # No "endpoint" key
+        rerank_endpoints={},  # No endpoint mapping for test-model
     )
     mock_session = MockSession(MockResponse())
 
@@ -126,10 +129,24 @@ async def test_hosted_without_endpoint():
     assert "https://integrate.api.nvidia.com" in url
 
 
+async def test_hosted_model_not_in_endpoint_mapping():
+    adapter = create_adapter(
+        config=NVIDIAConfig(api_key="key"), rerank_endpoints={"other-model": "https://other.endpoint/rerank"}
+    )
+    mock_session = MockSession(MockResponse())
+
+    with patch("aiohttp.ClientSession", return_value=mock_session):
+        await adapter.rerank(model="test-model", query="q", items=["a"])
+
+    url, _ = mock_session.post_calls[0]
+    assert "https://integrate.api.nvidia.com" in url
+    assert url != "https://other.endpoint/rerank"
+
+
 async def test_self_hosted_ignores_endpoint():
     adapter = create_adapter(
         config=NVIDIAConfig(url="http://localhost:8000", api_key=None),
-        model_metadata={"endpoint": "https://model.endpoint/rerank"},  # This should be ignored.
+        rerank_endpoints={"test-model": "https://model.endpoint/rerank"},  # This should be ignored for self-hosted.
     )
     mock_session = MockSession(MockResponse())
 

From f2a398dcba567290f1d290d660dff4bab0c2e699 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Mon, 29 Sep 2025 11:30:46 -0700
Subject: [PATCH 15/18] Add skip_if_provider_doesnt_support_rerank

---
 tests/integration/inference/test_rerank.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py
index ea17a54cb..4931c3d6c 100644
--- a/tests/integration/inference/test_rerank.py
+++ b/tests/integration/inference/test_rerank.py
@@ -26,10 +26,15 @@ DUMMY_IMAGE_URL = ImageContentItem(
 )
 DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
 
-SUPPORTED_PROVIDERS = {"remote::nvidia"}
 PROVIDERS_SUPPORTING_MEDIA = {}  # Providers that support media input for rerank models
 
 
+def skip_if_provider_doesnt_support_rerank(inference_provider_type):
+    supported_providers = {"remote::nvidia"}
+    if inference_provider_type not in supported_providers:
+        pytest.skip(f"{inference_provider_type} doesn't support rerank models")
+
+
 def _validate_rerank_response(response: InferenceRerankResponse, items: list) -> None:
     """
     Validate that a rerank response has the correct structure and ordering.
@@ -90,8 +95,7 @@ def _validate_semantic_ranking(response: InferenceRerankResponse, items: list, e
     ],
 )
 def test_rerank_text(client_with_models, rerank_model_id, query, items, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
+    skip_if_provider_doesnt_support_rerank(inference_provider_type)
 
     response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
     assert isinstance(response, list)
@@ -118,8 +122,7 @@ def test_rerank_text(client_with_models, rerank_model_id, query, items, inferenc
     ],
 )
 def test_rerank_image(client_with_models, rerank_model_id, query, items, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
+    skip_if_provider_doesnt_support_rerank(inference_provider_type)
 
     if rerank_model_id not in PROVIDERS_SUPPORTING_MEDIA:
         error_type = (
@@ -136,8 +139,7 @@ def test_rerank_image(client_with_models, rerank_model_id, query, items, inferen
 
 
 def test_rerank_max_results(client_with_models, rerank_model_id, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ")
+    skip_if_provider_doesnt_support_rerank(inference_provider_type)
 
     items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2]
     max_num_results = 2
@@ -155,8 +157,7 @@ def test_rerank_max_results(client_with_models, rerank_model_id, inference_provi
 
 
 def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_id, inference_provider_type):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support rerank yet")
+    skip_if_provider_doesnt_support_rerank(inference_provider_type)
 
     items = [DUMMY_STRING, DUMMY_STRING2]
     response = client_with_models.inference.rerank(
@@ -205,8 +206,7 @@ def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_i
 def test_rerank_semantic_correctness(
     client_with_models, rerank_model_id, query, items, expected_first_item, inference_provider_type
 ):
-    if inference_provider_type not in SUPPORTED_PROVIDERS:
-        pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet.")
+    skip_if_provider_doesnt_support_rerank(inference_provider_type)
 
     response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
 

From 96bd6c1836f7476fef280af8bbaa045c1d31d5db Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Mon, 29 Sep 2025 11:43:49 -0700
Subject: [PATCH 16/18] Refactor openai mixin tests with model checking loop

---
 .../utils/inference/test_openai_mixin.py      | 113 ++++++++++--------
 1 file changed, 64 insertions(+), 49 deletions(-)

diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py
index ae723dcc2..937caa1c0 100644
--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@@ -153,6 +153,19 @@ def mock_client_context():
     return _mock_client_context
 
 
+def _assert_models_match_expected(actual_models, expected_models):
+    """Verify the models match expected attributes.
+
+    Args:
+        actual_models: List of models to verify
+        expected_models: Mapping of model identifier to expected attribute values
+    """
+    for identifier, expected_attrs in expected_models.items():
+        model = next(m for m in actual_models if m.identifier == identifier)
+        for attr_name, expected_value in expected_attrs.items():
+            assert getattr(model, attr_name) == expected_value
+
+
 class TestOpenAIMixinListModels:
     """Test cases for the list_models method"""
 
@@ -346,21 +359,22 @@ class TestOpenAIMixinEmbeddingModelMetadata:
             assert result is not None
             assert len(result) == 2
 
-            # Find the models in the result
-            embedding_model = next(m for m in result if m.identifier == "text-embedding-3-small")
-            llm_model = next(m for m in result if m.identifier == "gpt-4")
+            expected_models = {
+                "text-embedding-3-small": {
+                    "model_type": ModelType.embedding,
+                    "metadata": {"embedding_dimension": 1536, "context_length": 8192},
+                    "provider_id": "test-provider",
+                    "provider_resource_id": "text-embedding-3-small",
+                },
+                "gpt-4": {
+                    "model_type": ModelType.llm,
+                    "metadata": {},
+                    "provider_id": "test-provider",
+                    "provider_resource_id": "gpt-4",
+                },
+            }
 
-            # Check embedding model
-            assert embedding_model.model_type == ModelType.embedding
-            assert embedding_model.metadata == {"embedding_dimension": 1536, "context_length": 8192}
-            assert embedding_model.provider_id == "test-provider"
-            assert embedding_model.provider_resource_id == "text-embedding-3-small"
-
-            # Check LLM model
-            assert llm_model.model_type == ModelType.llm
-            assert llm_model.metadata == {}  # No metadata for LLMs
-            assert llm_model.provider_id == "test-provider"
-            assert llm_model.provider_resource_id == "gpt-4"
+            _assert_models_match_expected(result, expected_models)
 
 
 class TestOpenAIMixinRerankModelList:
@@ -387,21 +401,22 @@ class TestOpenAIMixinRerankModelList:
             assert result is not None
             assert len(result) == 2
 
-            # Find the models in the result
-            rerank_model = next(m for m in result if m.identifier == "rerank-model-1")
-            llm_model = next(m for m in result if m.identifier == "gpt-4")
+            expected_models = {
+                "rerank-model-1": {
+                    "model_type": ModelType.rerank,
+                    "metadata": {},
+                    "provider_id": "test-provider",
+                    "provider_resource_id": "rerank-model-1",
+                },
+                "gpt-4": {
+                    "model_type": ModelType.llm,
+                    "metadata": {},
+                    "provider_id": "test-provider",
+                    "provider_resource_id": "gpt-4",
+                },
+            }
 
-            # Check rerank model
-            assert rerank_model.model_type == ModelType.rerank
-            assert rerank_model.metadata == {}  # No metadata for rerank models
-            assert rerank_model.provider_id == "test-provider"
-            assert rerank_model.provider_resource_id == "rerank-model-1"
-
-            # Check LLM model
-            assert llm_model.model_type == ModelType.llm
-            assert llm_model.metadata == {}  # No metadata for LLMs
-            assert llm_model.provider_id == "test-provider"
-            assert llm_model.provider_resource_id == "gpt-4"
+            _assert_models_match_expected(result, expected_models)
 
 
 class TestOpenAIMixinMixedModelTypes:
@@ -429,28 +444,28 @@ class TestOpenAIMixinMixedModelTypes:
             assert result is not None
             assert len(result) == 3
 
-            # Find the models in the result
-            embedding_model = next(m for m in result if m.identifier == "text-embedding-3-small")
-            rerank_model = next(m for m in result if m.identifier == "rerank-model-1")
-            llm_model = next(m for m in result if m.identifier == "gpt-4")
+            expected_models = {
+                "text-embedding-3-small": {
+                    "model_type": ModelType.embedding,
+                    "metadata": {"embedding_dimension": 1536, "context_length": 8192},
+                    "provider_id": "test-provider",
+                    "provider_resource_id": "text-embedding-3-small",
+                },
+                "rerank-model-1": {
+                    "model_type": ModelType.rerank,
+                    "metadata": {},
+                    "provider_id": "test-provider",
+                    "provider_resource_id": "rerank-model-1",
+                },
+                "gpt-4": {
+                    "model_type": ModelType.llm,
+                    "metadata": {},
+                    "provider_id": "test-provider",
+                    "provider_resource_id": "gpt-4",
+                },
+            }
 
-            # Check embedding model
-            assert embedding_model.model_type == ModelType.embedding
-            assert embedding_model.metadata == {"embedding_dimension": 1536, "context_length": 8192}
-            assert embedding_model.provider_id == "test-provider"
-            assert embedding_model.provider_resource_id == "text-embedding-3-small"
-
-            # Check rerank model
-            assert rerank_model.model_type == ModelType.rerank
-            assert rerank_model.metadata == {}  # No metadata for rerank models
-            assert rerank_model.provider_id == "test-provider"
-            assert rerank_model.provider_resource_id == "rerank-model-1"
-
-            # Check LLM model
-            assert llm_model.model_type == ModelType.llm
-            assert llm_model.metadata == {}  # No metadata for LLMs
-            assert llm_model.provider_id == "test-provider"
-            assert llm_model.provider_resource_id == "gpt-4"
+            _assert_models_match_expected(result, expected_models)
 
 
 class TestOpenAIMixinAllowedModels:

From bb2eb33fc3509028f932a8e32f9cf66e383ba53b Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Tue, 30 Sep 2025 12:09:09 -0700
Subject: [PATCH 17/18] Fix pre-commit after rebasing

---
 docs/docs/providers/agents/index.mdx          |    2 +-
 .../static/experimental-llama-stack-spec.html |    2 +-
 .../static/experimental-llama-stack-spec.yaml |    3 +-
 docs/static/llama-stack-spec.html             | 4995 +----------------
 docs/static/llama-stack-spec.yaml             | 3725 +-----------
 llama_stack/core/routers/inference.py         |    1 -
 tests/integration/fixtures/common.py          |    9 +-
 7 files changed, 15 insertions(+), 8722 deletions(-)

diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx
index 06eb104af..200d0119f 100644
--- a/docs/docs/providers/agents/index.mdx
+++ b/docs/docs/providers/agents/index.mdx
@@ -14,4 +14,4 @@ Agents
 
     APIs for creating and interacting with agentic systems.
 
-This section contains documentation for all available providers for the **agents** API.
+This section contains documentation for all available providers for the **agents** API.
\ No newline at end of file
diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html
index a84226c05..574107a6d 100644
--- a/docs/static/experimental-llama-stack-spec.html
+++ b/docs/static/experimental-llama-stack-spec.html
@@ -4992,7 +4992,7 @@
                 "properties": {
                     "model": {
                         "type": "string",
-                        "description": "The identifier of the reranking model to use."
+                        "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint."
                     },
                     "query": {
                         "oneOf": [
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index a08c0cc87..aae356d6d 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -3657,7 +3657,8 @@ components:
         model:
           type: string
           description: >-
-            The identifier of the reranking model to use.
+            The identifier of the reranking model to use. The model must be a reranking
+            model registered with Llama Stack and available via the /models endpoint.
         query:
           oneOf:
             - type: string
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 0fdf3f415..2ee665123 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -4819,2834 +4819,6 @@
                 "title": "OpenAIUserMessageParam",
                 "description": "A message from the user in an OpenAI-compatible chat completion request."
             },
-            "OpenAICompletionWithInputMessages": {
-                "type": "object",
-                "properties": {
-                    "id": {
-                        "type": "string",
-                        "description": "The ID of the chat completion"
-                    },
-                    "choices": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIChoice"
-                        },
-                        "description": "List of choices"
-                    },
-                    "object": {
-                        "type": "string",
-                        "const": "chat.completion",
-                        "default": "chat.completion",
-                        "description": "The object type, which will be \"chat.completion\""
-                    },
-                    "created": {
-                        "type": "integer",
-                        "description": "The Unix timestamp in seconds when the chat completion was created"
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "The model that was used to generate the chat completion"
-                    },
-                    "input_messages": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIMessageParam"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "id",
-                    "choices",
-                    "object",
-                    "created",
-                    "model",
-                    "input_messages"
-                ],
-                "title": "OpenAICompletionWithInputMessages"
-            },
-            "DataSource": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/URIDataSource"
-                    },
-                    {
-                        "$ref": "#/components/schemas/RowsDataSource"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "uri": "#/components/schemas/URIDataSource",
-                        "rows": "#/components/schemas/RowsDataSource"
-                    }
-                }
-            },
-            "Dataset": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string"
-                    },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "model",
-                            "shield",
-                            "vector_db",
-                            "dataset",
-                            "scoring_function",
-                            "benchmark",
-                            "tool",
-                            "tool_group",
-                            "prompt"
-                        ],
-                        "const": "dataset",
-                        "default": "dataset",
-                        "description": "Type of resource, always 'dataset' for datasets"
-                    },
-                    "purpose": {
-                        "type": "string",
-                        "enum": [
-                            "post-training/messages",
-                            "eval/question-answer",
-                            "eval/messages-answer"
-                        ],
-                        "description": "Purpose of the dataset indicating its intended use"
-                    },
-                    "source": {
-                        "$ref": "#/components/schemas/DataSource",
-                        "description": "Data source configuration for the dataset"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Additional metadata for the dataset"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "provider_id",
-                    "type",
-                    "purpose",
-                    "source",
-                    "metadata"
-                ],
-                "title": "Dataset",
-                "description": "Dataset resource for storing and accessing training or evaluation data."
-            },
-            "RowsDataSource": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "rows",
-                        "default": "rows"
-                    },
-                    "rows": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "rows"
-                ],
-                "title": "RowsDataSource",
-                "description": "A dataset stored in rows."
-            },
-            "URIDataSource": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "uri",
-                        "default": "uri"
-                    },
-                    "uri": {
-                        "type": "string",
-                        "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "uri"
-                ],
-                "title": "URIDataSource",
-                "description": "A dataset that can be obtained from a URI."
-            },
-            "Model": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string",
-                        "description": "Unique identifier for this resource in llama stack"
-                    },
-                    "provider_resource_id": {
-                        "type": "string",
-                        "description": "Unique identifier for this resource in the provider"
-                    },
-                    "provider_id": {
-                        "type": "string",
-                        "description": "ID of the provider that owns this resource"
-                    },
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "model",
-                            "shield",
-                            "vector_db",
-                            "dataset",
-                            "scoring_function",
-                            "benchmark",
-                            "tool",
-                            "tool_group",
-                            "prompt"
-                        ],
-                        "const": "model",
-                        "default": "model",
-                        "description": "The resource type, always 'model' for model resources"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Any additional metadata for this model"
-                    },
-                    "model_type": {
-                        "$ref": "#/components/schemas/ModelType",
-                        "default": "llm",
-                        "description": "The type of model (LLM or embedding model)"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "provider_id",
-                    "type",
-                    "metadata",
-                    "model_type"
-                ],
-                "title": "Model",
-                "description": "A model resource representing an AI model registered in Llama Stack."
-            },
-            "ModelType": {
-                "type": "string",
-                "enum": [
-                    "llm",
-                    "embedding",
-                    "rerank"
-                ],
-                "title": "ModelType",
-                "description": "Enumeration of supported model types in Llama Stack."
-            },
-            "AgentTurnInputType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "agent_turn_input",
-                        "default": "agent_turn_input",
-                        "description": "Discriminator type. Always \"agent_turn_input\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "AgentTurnInputType",
-                "description": "Parameter type for agent turn input."
-            },
-            "ArrayType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "array",
-                        "default": "array",
-                        "description": "Discriminator type. Always \"array\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "ArrayType",
-                "description": "Parameter type for array values."
-            },
-            "BooleanType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "boolean",
-                        "default": "boolean",
-                        "description": "Discriminator type. Always \"boolean\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "BooleanType",
-                "description": "Parameter type for boolean values."
-            },
-            "ChatCompletionInputType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "chat_completion_input",
-                        "default": "chat_completion_input",
-                        "description": "Discriminator type. Always \"chat_completion_input\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "ChatCompletionInputType",
-                "description": "Parameter type for chat completion input."
-            },
-            "CompletionInputType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "completion_input",
-                        "default": "completion_input",
-                        "description": "Discriminator type. Always \"completion_input\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "CompletionInputType",
-                "description": "Parameter type for completion input."
-            },
-            "JsonType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "json",
-                        "default": "json",
-                        "description": "Discriminator type. Always \"json\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "JsonType",
-                "description": "Parameter type for JSON values."
-            },
-            "NumberType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "number",
-                        "default": "number",
-                        "description": "Discriminator type. Always \"number\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "NumberType",
-                "description": "Parameter type for numeric values."
-            },
-            "ObjectType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "object",
-                        "default": "object",
-                        "description": "Discriminator type. Always \"object\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "ObjectType",
-                "description": "Parameter type for object values."
-            },
-            "ParamType": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/StringType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/NumberType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/BooleanType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ArrayType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ObjectType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/JsonType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/UnionType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/ChatCompletionInputType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/CompletionInputType"
-                    },
-                    {
-                        "$ref": "#/components/schemas/AgentTurnInputType"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "string": "#/components/schemas/StringType",
-                        "number": "#/components/schemas/NumberType",
-                        "boolean": "#/components/schemas/BooleanType",
-                        "array": "#/components/schemas/ArrayType",
-                        "object": "#/components/schemas/ObjectType",
-                        "json": "#/components/schemas/JsonType",
-                        "union": "#/components/schemas/UnionType",
-                        "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
-                        "completion_input": "#/components/schemas/CompletionInputType",
-                        "agent_turn_input": "#/components/schemas/AgentTurnInputType"
-                    }
-                }
-            },
-            "ScoringFn": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string"
-                    },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "model",
-                            "shield",
-                            "vector_db",
-                            "dataset",
-                            "scoring_function",
-                            "benchmark",
-                            "tool",
-                            "tool_group",
-                            "prompt"
-                        ],
-                        "const": "scoring_function",
-                        "default": "scoring_function",
-                        "description": "The resource type, always scoring_function"
-                    },
-                    "description": {
-                        "type": "string"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    },
-                    "return_type": {
-                        "$ref": "#/components/schemas/ParamType"
-                    },
-                    "params": {
-                        "$ref": "#/components/schemas/ScoringFnParams"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "provider_id",
-                    "type",
-                    "metadata",
-                    "return_type"
-                ],
-                "title": "ScoringFn",
-                "description": "A scoring function resource for evaluating model outputs."
-            },
-            "StringType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "string",
-                        "default": "string",
-                        "description": "Discriminator type. Always \"string\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "StringType",
-                "description": "Parameter type for string values."
-            },
-            "UnionType": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "union",
-                        "default": "union",
-                        "description": "Discriminator type. Always \"union\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "UnionType",
-                "description": "Parameter type for union values."
-            },
-            "Shield": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string"
-                    },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "model",
-                            "shield",
-                            "vector_db",
-                            "dataset",
-                            "scoring_function",
-                            "benchmark",
-                            "tool",
-                            "tool_group",
-                            "prompt"
-                        ],
-                        "const": "shield",
-                        "default": "shield",
-                        "description": "The resource type, always shield"
-                    },
-                    "params": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Configuration parameters for the shield"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "provider_id",
-                    "type"
-                ],
-                "title": "Shield",
-                "description": "A safety shield resource that can be used to check content."
-            },
-            "Span": {
-                "type": "object",
-                "properties": {
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span"
-                    },
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this span belongs to"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the operation began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the operation finished, if completed"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "span_id",
-                    "trace_id",
-                    "name",
-                    "start_time"
-                ],
-                "title": "Span",
-                "description": "A span representing a single operation within a trace."
-            },
-            "GetSpanTreeRequest": {
-                "type": "object",
-                "properties": {
-                    "attributes_to_return": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to return in the tree."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "GetSpanTreeRequest"
-            },
-            "SpanStatus": {
-                "type": "string",
-                "enum": [
-                    "ok",
-                    "error"
-                ],
-                "title": "SpanStatus",
-                "description": "The status of a span indicating whether it completed successfully or with an error."
-            },
-            "SpanWithStatus": {
-                "type": "object",
-                "properties": {
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span"
-                    },
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this span belongs to"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the operation began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the operation finished, if completed"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
-                    },
-                    "status": {
-                        "$ref": "#/components/schemas/SpanStatus",
-                        "description": "(Optional) The current status of the span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "span_id",
-                    "trace_id",
-                    "name",
-                    "start_time"
-                ],
-                "title": "SpanWithStatus",
-                "description": "A span that includes status information."
-            },
-            "QuerySpanTreeResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/SpanWithStatus"
-                        },
-                        "description": "Dictionary mapping span IDs to spans with status information"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QuerySpanTreeResponse",
-                "description": "Response containing a tree structure of spans."
-            },
-            "Tool": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string"
-                    },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "model",
-                            "shield",
-                            "vector_db",
-                            "dataset",
-                            "scoring_function",
-                            "benchmark",
-                            "tool",
-                            "tool_group",
-                            "prompt"
-                        ],
-                        "const": "tool",
-                        "default": "tool",
-                        "description": "Type of resource, always 'tool'"
-                    },
-                    "toolgroup_id": {
-                        "type": "string",
-                        "description": "ID of the tool group this tool belongs to"
-                    },
-                    "description": {
-                        "type": "string",
-                        "description": "Human-readable description of what the tool does"
-                    },
-                    "parameters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ToolParameter"
-                        },
-                        "description": "List of parameters this tool accepts"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Additional metadata about the tool"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "provider_id",
-                    "type",
-                    "toolgroup_id",
-                    "description",
-                    "parameters"
-                ],
-                "title": "Tool",
-                "description": "A tool that can be invoked by agents."
-            },
-            "ToolGroup": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string"
-                    },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "model",
-                            "shield",
-                            "vector_db",
-                            "dataset",
-                            "scoring_function",
-                            "benchmark",
-                            "tool",
-                            "tool_group",
-                            "prompt"
-                        ],
-                        "const": "tool_group",
-                        "default": "tool_group",
-                        "description": "Type of resource, always 'tool_group'"
-                    },
-                    "mcp_endpoint": {
-                        "$ref": "#/components/schemas/URL",
-                        "description": "(Optional) Model Context Protocol endpoint for remote tools"
-                    },
-                    "args": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Additional arguments for the tool group"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "provider_id",
-                    "type"
-                ],
-                "title": "ToolGroup",
-                "description": "A group of related tools managed together."
-            },
-            "Trace": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace"
-                    },
-                    "root_span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the root span that started this trace"
-                    },
-                    "start_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the trace began"
-                    },
-                    "end_time": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the trace finished, if completed"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "root_span_id",
-                    "start_time"
-                ],
-                "title": "Trace",
-                "description": "A trace representing the complete execution path of a request across multiple operations."
-            },
-            "Checkpoint": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string",
-                        "description": "Unique identifier for the checkpoint"
-                    },
-                    "created_at": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the checkpoint was created"
-                    },
-                    "epoch": {
-                        "type": "integer",
-                        "description": "Training epoch when the checkpoint was saved"
-                    },
-                    "post_training_job_id": {
-                        "type": "string",
-                        "description": "Identifier of the training job that created this checkpoint"
-                    },
-                    "path": {
-                        "type": "string",
-                        "description": "File system path where the checkpoint is stored"
-                    },
-                    "training_metrics": {
-                        "$ref": "#/components/schemas/PostTrainingMetric",
-                        "description": "(Optional) Training metrics associated with this checkpoint"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "created_at",
-                    "epoch",
-                    "post_training_job_id",
-                    "path"
-                ],
-                "title": "Checkpoint",
-                "description": "Checkpoint created during training runs."
-            },
-            "PostTrainingJobArtifactsResponse": {
-                "type": "object",
-                "properties": {
-                    "job_uuid": {
-                        "type": "string",
-                        "description": "Unique identifier for the training job"
-                    },
-                    "checkpoints": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Checkpoint"
-                        },
-                        "description": "List of model checkpoints created during training"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_uuid",
-                    "checkpoints"
-                ],
-                "title": "PostTrainingJobArtifactsResponse",
-                "description": "Artifacts of a finetuning job."
-            },
-            "PostTrainingMetric": {
-                "type": "object",
-                "properties": {
-                    "epoch": {
-                        "type": "integer",
-                        "description": "Training epoch number"
-                    },
-                    "train_loss": {
-                        "type": "number",
-                        "description": "Loss value on the training dataset"
-                    },
-                    "validation_loss": {
-                        "type": "number",
-                        "description": "Loss value on the validation dataset"
-                    },
-                    "perplexity": {
-                        "type": "number",
-                        "description": "Perplexity metric indicating model confidence"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "epoch",
-                    "train_loss",
-                    "validation_loss",
-                    "perplexity"
-                ],
-                "title": "PostTrainingMetric",
-                "description": "Training metrics captured during post-training jobs."
-            },
-            "PostTrainingJobStatusResponse": {
-                "type": "object",
-                "properties": {
-                    "job_uuid": {
-                        "type": "string",
-                        "description": "Unique identifier for the training job"
-                    },
-                    "status": {
-                        "type": "string",
-                        "enum": [
-                            "completed",
-                            "in_progress",
-                            "failed",
-                            "scheduled",
-                            "cancelled"
-                        ],
-                        "description": "Current status of the training job"
-                    },
-                    "scheduled_at": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the job was scheduled"
-                    },
-                    "started_at": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the job execution began"
-                    },
-                    "completed_at": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "(Optional) Timestamp when the job finished, if completed"
-                    },
-                    "resources_allocated": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Information about computational resources allocated to the job"
-                    },
-                    "checkpoints": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Checkpoint"
-                        },
-                        "description": "List of model checkpoints created during training"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_uuid",
-                    "status",
-                    "checkpoints"
-                ],
-                "title": "PostTrainingJobStatusResponse",
-                "description": "Status of a finetuning job."
-            },
-            "ListPostTrainingJobsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "job_uuid": {
-                                    "type": "string"
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "job_uuid"
-                            ],
-                            "title": "PostTrainingJob"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListPostTrainingJobsResponse"
-            },
-            "VectorDB": {
-                "type": "object",
-                "properties": {
-                    "identifier": {
-                        "type": "string"
-                    },
-                    "provider_resource_id": {
-                        "type": "string"
-                    },
-                    "provider_id": {
-                        "type": "string"
-                    },
-                    "type": {
-                        "type": "string",
-                        "enum": [
-                            "model",
-                            "shield",
-                            "vector_db",
-                            "dataset",
-                            "scoring_function",
-                            "benchmark",
-                            "tool",
-                            "tool_group",
-                            "prompt"
-                        ],
-                        "const": "vector_db",
-                        "default": "vector_db",
-                        "description": "Type of resource, always 'vector_db' for vector databases"
-                    },
-                    "embedding_model": {
-                        "type": "string",
-                        "description": "Name of the embedding model to use for vector generation"
-                    },
-                    "embedding_dimension": {
-                        "type": "integer",
-                        "description": "Dimension of the embedding vectors"
-                    },
-                    "vector_db_name": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "identifier",
-                    "provider_id",
-                    "type",
-                    "embedding_model",
-                    "embedding_dimension"
-                ],
-                "title": "VectorDB",
-                "description": "Vector database resource for storing and querying vector embeddings."
-            },
-            "HealthInfo": {
-                "type": "object",
-                "properties": {
-                    "status": {
-                        "type": "string",
-                        "enum": [
-                            "OK",
-                            "Error",
-                            "Not Implemented"
-                        ],
-                        "description": "Current health status of the service"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "status"
-                ],
-                "title": "HealthInfo",
-                "description": "Health status information for the service."
-            },
-            "RAGDocument": {
-                "type": "object",
-                "properties": {
-                    "document_id": {
-                        "type": "string",
-                        "description": "The unique identifier for the document."
-                    },
-                    "content": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/InterleavedContentItem"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "$ref": "#/components/schemas/InterleavedContentItem"
-                                }
-                            },
-                            {
-                                "$ref": "#/components/schemas/URL"
-                            }
-                        ],
-                        "description": "The content of the document."
-                    },
-                    "mime_type": {
-                        "type": "string",
-                        "description": "The MIME type of the document."
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Additional metadata for the document."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "document_id",
-                    "content",
-                    "metadata"
-                ],
-                "title": "RAGDocument",
-                "description": "A document to be used for document ingestion in the RAG Tool."
-            },
-            "InsertRequest": {
-                "type": "object",
-                "properties": {
-                    "documents": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/RAGDocument"
-                        },
-                        "description": "List of documents to index in the RAG system"
-                    },
-                    "vector_db_id": {
-                        "type": "string",
-                        "description": "ID of the vector database to store the document embeddings"
-                    },
-                    "chunk_size_in_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) Size in tokens for document chunking during indexing"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "documents",
-                    "vector_db_id",
-                    "chunk_size_in_tokens"
-                ],
-                "title": "InsertRequest"
-            },
-            "Chunk": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Metadata associated with the chunk that will be used in the model context during inference."
-                    },
-                    "embedding": {
-                        "type": "array",
-                        "items": {
-                            "type": "number"
-                        },
-                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
-                    },
-                    "stored_chunk_id": {
-                        "type": "string",
-                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
-                    },
-                    "chunk_metadata": {
-                        "$ref": "#/components/schemas/ChunkMetadata",
-                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content",
-                    "metadata"
-                ],
-                "title": "Chunk",
-                "description": "A chunk of content that can be inserted into a vector database."
-            },
-            "ChunkMetadata": {
-                "type": "object",
-                "properties": {
-                    "chunk_id": {
-                        "type": "string",
-                        "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content."
-                    },
-                    "document_id": {
-                        "type": "string",
-                        "description": "The ID of the document this chunk belongs to."
-                    },
-                    "source": {
-                        "type": "string",
-                        "description": "The source of the content, such as a URL, file path, or other identifier."
-                    },
-                    "created_timestamp": {
-                        "type": "integer",
-                        "description": "An optional timestamp indicating when the chunk was created."
-                    },
-                    "updated_timestamp": {
-                        "type": "integer",
-                        "description": "An optional timestamp indicating when the chunk was last updated."
-                    },
-                    "chunk_window": {
-                        "type": "string",
-                        "description": "The window of the chunk, which can be used to group related chunks together."
-                    },
-                    "chunk_tokenizer": {
-                        "type": "string",
-                        "description": "The tokenizer used to create the chunk. Default is Tiktoken."
-                    },
-                    "chunk_embedding_model": {
-                        "type": "string",
-                        "description": "The embedding model used to create the chunk's embedding."
-                    },
-                    "chunk_embedding_dimension": {
-                        "type": "integer",
-                        "description": "The dimension of the embedding vector for the chunk."
-                    },
-                    "content_token_count": {
-                        "type": "integer",
-                        "description": "The number of tokens in the content of the chunk."
-                    },
-                    "metadata_token_count": {
-                        "type": "integer",
-                        "description": "The number of tokens in the metadata of the chunk."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "ChunkMetadata",
-                "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that     will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`     is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.     Use `Chunk.metadata` for metadata that will be used in the context during inference."
-            },
-            "InsertChunksRequest": {
-                "type": "object",
-                "properties": {
-                    "vector_db_id": {
-                        "type": "string",
-                        "description": "The identifier of the vector database to insert the chunks into."
-                    },
-                    "chunks": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Chunk"
-                        },
-                        "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
-                    },
-                    "ttl_seconds": {
-                        "type": "integer",
-                        "description": "The time to live of the chunks."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "vector_db_id",
-                    "chunks"
-                ],
-                "title": "InsertChunksRequest"
-            },
-            "ProviderInfo": {
-                "type": "object",
-                "properties": {
-                    "api": {
-                        "type": "string",
-                        "description": "The API name this provider implements"
-                    },
-                    "provider_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the provider"
-                    },
-                    "provider_type": {
-                        "type": "string",
-                        "description": "The type of provider implementation"
-                    },
-                    "config": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Configuration parameters for the provider"
-                    },
-                    "health": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Current health status of the provider"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "api",
-                    "provider_id",
-                    "provider_type",
-                    "config",
-                    "health"
-                ],
-                "title": "ProviderInfo",
-                "description": "Information about a registered provider including its configuration and health status."
-            },
-            "InvokeToolRequest": {
-                "type": "object",
-                "properties": {
-                    "tool_name": {
-                        "type": "string",
-                        "description": "The name of the tool to invoke."
-                    },
-                    "kwargs": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "A dictionary of arguments to pass to the tool."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "tool_name",
-                    "kwargs"
-                ],
-                "title": "InvokeToolRequest"
-            },
-            "ToolInvocationResult": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "(Optional) The output content from the tool execution"
-                    },
-                    "error_message": {
-                        "type": "string",
-                        "description": "(Optional) Error message if the tool execution failed"
-                    },
-                    "error_code": {
-                        "type": "integer",
-                        "description": "(Optional) Numeric error code if the tool execution failed"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Additional metadata about the tool execution"
-                    }
-                },
-                "additionalProperties": false,
-                "title": "ToolInvocationResult",
-                "description": "Result of a tool invocation."
-            },
-            "PaginatedResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "The list of items for the current page"
-                    },
-                    "has_more": {
-                        "type": "boolean",
-                        "description": "Whether there are more items available after this set"
-                    },
-                    "url": {
-                        "type": "string",
-                        "description": "The URL for accessing this list"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data",
-                    "has_more"
-                ],
-                "title": "PaginatedResponse",
-                "description": "A generic paginated response that follows a simple format."
-            },
-            "Job": {
-                "type": "object",
-                "properties": {
-                    "job_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the job"
-                    },
-                    "status": {
-                        "type": "string",
-                        "enum": [
-                            "completed",
-                            "in_progress",
-                            "failed",
-                            "scheduled",
-                            "cancelled"
-                        ],
-                        "description": "Current execution status of the job"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_id",
-                    "status"
-                ],
-                "title": "Job",
-                "description": "A job execution instance with status tracking."
-            },
-            "ListBenchmarksResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Benchmark"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListBenchmarksResponse"
-            },
-            "Order": {
-                "type": "string",
-                "enum": [
-                    "asc",
-                    "desc"
-                ],
-                "title": "Order",
-                "description": "Sort order for paginated responses."
-            },
-            "ListOpenAIChatCompletionResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "id": {
-                                    "type": "string",
-                                    "description": "The ID of the chat completion"
-                                },
-                                "choices": {
-                                    "type": "array",
-                                    "items": {
-                                        "$ref": "#/components/schemas/OpenAIChoice"
-                                    },
-                                    "description": "List of choices"
-                                },
-                                "object": {
-                                    "type": "string",
-                                    "const": "chat.completion",
-                                    "default": "chat.completion",
-                                    "description": "The object type, which will be \"chat.completion\""
-                                },
-                                "created": {
-                                    "type": "integer",
-                                    "description": "The Unix timestamp in seconds when the chat completion was created"
-                                },
-                                "model": {
-                                    "type": "string",
-                                    "description": "The model that was used to generate the chat completion"
-                                },
-                                "input_messages": {
-                                    "type": "array",
-                                    "items": {
-                                        "$ref": "#/components/schemas/OpenAIMessageParam"
-                                    }
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "id",
-                                "choices",
-                                "object",
-                                "created",
-                                "model",
-                                "input_messages"
-                            ],
-                            "title": "OpenAICompletionWithInputMessages"
-                        },
-                        "description": "List of chat completion objects with their input messages"
-                    },
-                    "has_more": {
-                        "type": "boolean",
-                        "description": "Whether there are more completions available beyond this list"
-                    },
-                    "first_id": {
-                        "type": "string",
-                        "description": "ID of the first completion in this list"
-                    },
-                    "last_id": {
-                        "type": "string",
-                        "description": "ID of the last completion in this list"
-                    },
-                    "object": {
-                        "type": "string",
-                        "const": "list",
-                        "default": "list",
-                        "description": "Must be \"list\" to identify this as a list response"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data",
-                    "has_more",
-                    "first_id",
-                    "last_id",
-                    "object"
-                ],
-                "title": "ListOpenAIChatCompletionResponse",
-                "description": "Response from listing OpenAI-compatible chat completions."
-            },
-            "ListDatasetsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Dataset"
-                        },
-                        "description": "List of datasets"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListDatasetsResponse",
-                "description": "Response from listing datasets."
-            },
-            "ListModelsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Model"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListModelsResponse"
-            },
-            "ListOpenAIResponseInputItem": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIResponseInput"
-                        },
-                        "description": "List of input items"
-                    },
-                    "object": {
-                        "type": "string",
-                        "const": "list",
-                        "default": "list",
-                        "description": "Object type identifier, always \"list\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data",
-                    "object"
-                ],
-                "title": "ListOpenAIResponseInputItem",
-                "description": "List container for OpenAI response input items."
-            },
-            "ListOpenAIResponseObject": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIResponseObjectWithInput"
-                        },
-                        "description": "List of response objects with their input context"
-                    },
-                    "has_more": {
-                        "type": "boolean",
-                        "description": "Whether there are more results available beyond this page"
-                    },
-                    "first_id": {
-                        "type": "string",
-                        "description": "Identifier of the first item in this page"
-                    },
-                    "last_id": {
-                        "type": "string",
-                        "description": "Identifier of the last item in this page"
-                    },
-                    "object": {
-                        "type": "string",
-                        "const": "list",
-                        "default": "list",
-                        "description": "Object type identifier, always \"list\""
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data",
-                    "has_more",
-                    "first_id",
-                    "last_id",
-                    "object"
-                ],
-                "title": "ListOpenAIResponseObject",
-                "description": "Paginated list of OpenAI response objects with navigation metadata."
-            },
-            "OpenAIResponseObjectWithInput": {
-                "type": "object",
-                "properties": {
-                    "created_at": {
-                        "type": "integer",
-                        "description": "Unix timestamp when the response was created"
-                    },
-                    "error": {
-                        "$ref": "#/components/schemas/OpenAIResponseError",
-                        "description": "(Optional) Error details if the response generation failed"
-                    },
-                    "id": {
-                        "type": "string",
-                        "description": "Unique identifier for this response"
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "Model identifier used for generation"
-                    },
-                    "object": {
-                        "type": "string",
-                        "const": "response",
-                        "default": "response",
-                        "description": "Object type identifier, always \"response\""
-                    },
-                    "output": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIResponseOutput"
-                        },
-                        "description": "List of generated output items (messages, tool calls, etc.)"
-                    },
-                    "parallel_tool_calls": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "Whether tool calls can be executed in parallel"
-                    },
-                    "previous_response_id": {
-                        "type": "string",
-                        "description": "(Optional) ID of the previous response in a conversation"
-                    },
-                    "status": {
-                        "type": "string",
-                        "description": "Current status of the response generation"
-                    },
-                    "temperature": {
-                        "type": "number",
-                        "description": "(Optional) Sampling temperature used for generation"
-                    },
-                    "text": {
-                        "$ref": "#/components/schemas/OpenAIResponseText",
-                        "description": "Text formatting configuration for the response"
-                    },
-                    "top_p": {
-                        "type": "number",
-                        "description": "(Optional) Nucleus sampling parameter used for generation"
-                    },
-                    "truncation": {
-                        "type": "string",
-                        "description": "(Optional) Truncation strategy applied to the response"
-                    },
-                    "user": {
-                        "type": "string",
-                        "description": "(Optional) User identifier associated with the request"
-                    },
-                    "input": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIResponseInput"
-                        },
-                        "description": "List of input items that led to this response"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "created_at",
-                    "id",
-                    "model",
-                    "object",
-                    "output",
-                    "parallel_tool_calls",
-                    "status",
-                    "text",
-                    "input"
-                ],
-                "title": "OpenAIResponseObjectWithInput",
-                "description": "OpenAI response object extended with input context information."
-            },
-            "ListPromptsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Prompt"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListPromptsResponse",
-                "description": "Response model to list prompts."
-            },
-            "ListProvidersResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ProviderInfo"
-                        },
-                        "description": "List of provider information objects"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListProvidersResponse",
-                "description": "Response containing a list of all available providers."
-            },
-            "RouteInfo": {
-                "type": "object",
-                "properties": {
-                    "route": {
-                        "type": "string",
-                        "description": "The API endpoint path"
-                    },
-                    "method": {
-                        "type": "string",
-                        "description": "HTTP method for the route"
-                    },
-                    "provider_types": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "List of provider types that implement this route"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "route",
-                    "method",
-                    "provider_types"
-                ],
-                "title": "RouteInfo",
-                "description": "Information about an API route including its path, method, and implementing providers."
-            },
-            "ListRoutesResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/RouteInfo"
-                        },
-                        "description": "List of available route information objects"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListRoutesResponse",
-                "description": "Response containing a list of all available API routes."
-            },
-            "ListToolDefsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ToolDef"
-                        },
-                        "description": "List of tool definitions"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListToolDefsResponse",
-                "description": "Response containing a list of tool definitions."
-            },
-            "ListScoringFunctionsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ScoringFn"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListScoringFunctionsResponse"
-            },
-            "ListShieldsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Shield"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListShieldsResponse"
-            },
-            "ListToolGroupsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ToolGroup"
-                        },
-                        "description": "List of tool groups"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListToolGroupsResponse",
-                "description": "Response containing a list of tool groups."
-            },
-            "ListToolsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Tool"
-                        },
-                        "description": "List of tools"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListToolsResponse",
-                "description": "Response containing a list of tools."
-            },
-            "ListVectorDBsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/VectorDB"
-                        },
-                        "description": "List of vector databases"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "ListVectorDBsResponse",
-                "description": "Response from listing vector databases."
-            },
-            "Event": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/UnstructuredLogEvent"
-                    },
-                    {
-                        "$ref": "#/components/schemas/MetricEvent"
-                    },
-                    {
-                        "$ref": "#/components/schemas/StructuredLogEvent"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "unstructured_log": "#/components/schemas/UnstructuredLogEvent",
-                        "metric": "#/components/schemas/MetricEvent",
-                        "structured_log": "#/components/schemas/StructuredLogEvent"
-                    }
-                }
-            },
-            "EventType": {
-                "type": "string",
-                "enum": [
-                    "unstructured_log",
-                    "structured_log",
-                    "metric"
-                ],
-                "title": "EventType",
-                "description": "The type of telemetry event being logged."
-            },
-            "LogSeverity": {
-                "type": "string",
-                "enum": [
-                    "verbose",
-                    "debug",
-                    "info",
-                    "warn",
-                    "error",
-                    "critical"
-                ],
-                "title": "LogSeverity",
-                "description": "The severity level of a log message."
-            },
-            "MetricEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "metric",
-                        "default": "metric",
-                        "description": "Event type identifier set to METRIC"
-                    },
-                    "metric": {
-                        "type": "string",
-                        "description": "The name of the metric being measured"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "integer"
-                            },
-                            {
-                                "type": "number"
-                            }
-                        ],
-                        "description": "The numeric value of the metric measurement"
-                    },
-                    "unit": {
-                        "type": "string",
-                        "description": "The unit of measurement for the metric value"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "metric",
-                    "value",
-                    "unit"
-                ],
-                "title": "MetricEvent",
-                "description": "A metric event containing a measured value."
-            },
-            "SpanEndPayload": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "$ref": "#/components/schemas/StructuredLogType",
-                        "const": "span_end",
-                        "default": "span_end",
-                        "description": "Payload type identifier set to SPAN_END"
-                    },
-                    "status": {
-                        "$ref": "#/components/schemas/SpanStatus",
-                        "description": "The final status of the span indicating success or failure"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "status"
-                ],
-                "title": "SpanEndPayload",
-                "description": "Payload for a span end event."
-            },
-            "SpanStartPayload": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "$ref": "#/components/schemas/StructuredLogType",
-                        "const": "span_start",
-                        "default": "span_start",
-                        "description": "Payload type identifier set to SPAN_START"
-                    },
-                    "name": {
-                        "type": "string",
-                        "description": "Human-readable name describing the operation this span represents"
-                    },
-                    "parent_span_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "name"
-                ],
-                "title": "SpanStartPayload",
-                "description": "Payload for a span start event."
-            },
-            "StructuredLogEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "structured_log",
-                        "default": "structured_log",
-                        "description": "Event type identifier set to STRUCTURED_LOG"
-                    },
-                    "payload": {
-                        "$ref": "#/components/schemas/StructuredLogPayload",
-                        "description": "The structured payload data for the log event"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "payload"
-                ],
-                "title": "StructuredLogEvent",
-                "description": "A structured log event containing typed payload data."
-            },
-            "StructuredLogPayload": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/SpanStartPayload"
-                    },
-                    {
-                        "$ref": "#/components/schemas/SpanEndPayload"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "span_start": "#/components/schemas/SpanStartPayload",
-                        "span_end": "#/components/schemas/SpanEndPayload"
-                    }
-                }
-            },
-            "StructuredLogType": {
-                "type": "string",
-                "enum": [
-                    "span_start",
-                    "span_end"
-                ],
-                "title": "StructuredLogType",
-                "description": "The type of structured log event payload."
-            },
-            "UnstructuredLogEvent": {
-                "type": "object",
-                "properties": {
-                    "trace_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the trace this event belongs to"
-                    },
-                    "span_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the span this event belongs to"
-                    },
-                    "timestamp": {
-                        "type": "string",
-                        "format": "date-time",
-                        "description": "Timestamp when the event occurred"
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "integer"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
-                    },
-                    "type": {
-                        "$ref": "#/components/schemas/EventType",
-                        "const": "unstructured_log",
-                        "default": "unstructured_log",
-                        "description": "Event type identifier set to UNSTRUCTURED_LOG"
-                    },
-                    "message": {
-                        "type": "string",
-                        "description": "The log message text"
-                    },
-                    "severity": {
-                        "$ref": "#/components/schemas/LogSeverity",
-                        "description": "The severity level of the log message"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "trace_id",
-                    "span_id",
-                    "timestamp",
-                    "type",
-                    "message",
-                    "severity"
-                ],
-                "title": "UnstructuredLogEvent",
-                "description": "An unstructured log event containing a simple text message."
-            },
-            "LogEventRequest": {
-                "type": "object",
-                "properties": {
-                    "event": {
-                        "$ref": "#/components/schemas/Event",
-                        "description": "The event to log."
-                    },
-                    "ttl_seconds": {
-                        "type": "integer",
-                        "description": "The time to live of the event."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event",
-                    "ttl_seconds"
-                ],
-                "title": "LogEventRequest"
-            },
-            "VectorStoreChunkingStrategy": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto"
-                    },
-                    {
-                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "auto": "#/components/schemas/VectorStoreChunkingStrategyAuto",
-                        "static": "#/components/schemas/VectorStoreChunkingStrategyStatic"
-                    }
-                }
-            },
-            "VectorStoreChunkingStrategyAuto": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "auto",
-                        "default": "auto",
-                        "description": "Strategy type, always \"auto\" for automatic chunking"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type"
-                ],
-                "title": "VectorStoreChunkingStrategyAuto",
-                "description": "Automatic chunking strategy for vector store files."
-            },
-            "VectorStoreChunkingStrategyStatic": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "static",
-                        "default": "static",
-                        "description": "Strategy type, always \"static\" for static chunking"
-                    },
-                    "static": {
-                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyStaticConfig",
-                        "description": "Configuration parameters for the static chunking strategy"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "static"
-                ],
-                "title": "VectorStoreChunkingStrategyStatic",
-                "description": "Static chunking strategy with configurable parameters."
-            },
-            "VectorStoreChunkingStrategyStaticConfig": {
-                "type": "object",
-                "properties": {
-                    "chunk_overlap_tokens": {
-                        "type": "integer",
-                        "default": 400,
-                        "description": "Number of tokens to overlap between adjacent chunks"
-                    },
-                    "max_chunk_size_tokens": {
-                        "type": "integer",
-                        "default": 800,
-                        "description": "Maximum number of tokens per chunk, must be between 100 and 4096"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "chunk_overlap_tokens",
-                    "max_chunk_size_tokens"
-                ],
-                "title": "VectorStoreChunkingStrategyStaticConfig",
-                "description": "Configuration for static chunking strategy."
-            },
-            "OpenaiAttachFileToVectorStoreRequest": {
-                "type": "object",
-                "properties": {
-                    "file_id": {
-                        "type": "string",
-                        "description": "The ID of the file to attach to the vector store."
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The key-value attributes stored with the file, which can be used for filtering."
-                    },
-                    "chunking_strategy": {
-                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
-                        "description": "The chunking strategy to use for the file."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "file_id"
-                ],
-                "title": "OpenaiAttachFileToVectorStoreRequest"
-            },
-            "VectorStoreFileLastError": {
-                "type": "object",
-                "properties": {
-                    "code": {
-                        "oneOf": [
-                            {
-                                "type": "string",
-                                "const": "server_error"
-                            },
-                            {
-                                "type": "string",
-                                "const": "rate_limit_exceeded"
-                            }
-                        ],
-                        "description": "Error code indicating the type of failure"
-                    },
-                    "message": {
-                        "type": "string",
-                        "description": "Human-readable error message describing the failure"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "code",
-                    "message"
-                ],
-                "title": "VectorStoreFileLastError",
-                "description": "Error information for failed vector store file processing."
-            },
-            "VectorStoreFileObject": {
-                "type": "object",
-                "properties": {
-                    "id": {
-                        "type": "string",
-                        "description": "Unique identifier for the file"
-                    },
-                    "object": {
-                        "type": "string",
-                        "default": "vector_store.file",
-                        "description": "Object type identifier, always \"vector_store.file\""
-                    },
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Key-value attributes associated with the file"
-                    },
-                    "chunking_strategy": {
-                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
-                        "description": "Strategy used for splitting the file into chunks"
-                    },
-                    "created_at": {
-                        "type": "integer",
-                        "description": "Timestamp when the file was added to the vector store"
-                    },
-                    "last_error": {
-                        "$ref": "#/components/schemas/VectorStoreFileLastError",
-                        "description": "(Optional) Error information if file processing failed"
-                    },
-                    "status": {
-                        "$ref": "#/components/schemas/VectorStoreFileStatus",
-                        "description": "Current processing status of the file"
-                    },
-                    "usage_bytes": {
-                        "type": "integer",
-                        "default": 0,
-                        "description": "Storage space used by this file in bytes"
-                    },
-                    "vector_store_id": {
-                        "type": "string",
-                        "description": "ID of the vector store containing this file"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "id",
-                    "object",
-                    "attributes",
-                    "chunking_strategy",
-                    "created_at",
-                    "status",
-                    "usage_bytes",
-                    "vector_store_id"
-                ],
-                "title": "VectorStoreFileObject",
-                "description": "OpenAI Vector Store File object."
-            },
-            "VectorStoreFileStatus": {
-                "oneOf": [
-                    {
-                        "type": "string",
-                        "const": "completed"
-                    },
-                    {
-                        "type": "string",
-                        "const": "in_progress"
-                    },
-                    {
-                        "type": "string",
-                        "const": "cancelled"
-                    },
-                    {
-                        "type": "string",
-                        "const": "failed"
-                    }
-                ]
-            },
             "OpenAIJSONSchema": {
                 "type": "object",
                 "properties": {
@@ -9657,7 +6829,8 @@
                 "type": "string",
                 "enum": [
                     "llm",
-                    "embedding"
+                    "embedding",
+                    "rerank"
                 ],
                 "title": "ModelType",
                 "description": "Enumeration of supported model types in Llama Stack."
@@ -15610,2170 +12783,6 @@
                 "title": "VectorStoreSearchResponsePage",
                 "description": "Paginated response from searching a vector store."
             },
-<<<<<<< HEAD
-=======
-            "OpenaiUpdateVectorStoreRequest": {
-                "type": "object",
-                "properties": {
-                    "name": {
-                        "type": "string",
-                        "description": "The name of the vector store."
-                    },
-                    "expires_after": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The expiration policy for a vector store."
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Set of 16 key-value pairs that can be attached to an object."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "OpenaiUpdateVectorStoreRequest"
-            },
-            "OpenaiUpdateVectorStoreFileRequest": {
-                "type": "object",
-                "properties": {
-                    "attributes": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The updated key-value attributes to store with the file."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attributes"
-                ],
-                "title": "OpenaiUpdateVectorStoreFileRequest"
-            },
-            "DPOAlignmentConfig": {
-                "type": "object",
-                "properties": {
-                    "beta": {
-                        "type": "number",
-                        "description": "Temperature parameter for the DPO loss"
-                    },
-                    "loss_type": {
-                        "$ref": "#/components/schemas/DPOLossType",
-                        "default": "sigmoid",
-                        "description": "The type of loss function to use for DPO"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "beta",
-                    "loss_type"
-                ],
-                "title": "DPOAlignmentConfig",
-                "description": "Configuration for Direct Preference Optimization (DPO) alignment."
-            },
-            "DPOLossType": {
-                "type": "string",
-                "enum": [
-                    "sigmoid",
-                    "hinge",
-                    "ipo",
-                    "kto_pair"
-                ],
-                "title": "DPOLossType"
-            },
-            "DataConfig": {
-                "type": "object",
-                "properties": {
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "Unique identifier for the training dataset"
-                    },
-                    "batch_size": {
-                        "type": "integer",
-                        "description": "Number of samples per training batch"
-                    },
-                    "shuffle": {
-                        "type": "boolean",
-                        "description": "Whether to shuffle the dataset during training"
-                    },
-                    "data_format": {
-                        "$ref": "#/components/schemas/DatasetFormat",
-                        "description": "Format of the dataset (instruct or dialog)"
-                    },
-                    "validation_dataset_id": {
-                        "type": "string",
-                        "description": "(Optional) Unique identifier for the validation dataset"
-                    },
-                    "packed": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency"
-                    },
-                    "train_on_input": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "(Optional) Whether to compute loss on input tokens as well as output tokens"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dataset_id",
-                    "batch_size",
-                    "shuffle",
-                    "data_format"
-                ],
-                "title": "DataConfig",
-                "description": "Configuration for training data and data loading."
-            },
-            "DatasetFormat": {
-                "type": "string",
-                "enum": [
-                    "instruct",
-                    "dialog"
-                ],
-                "title": "DatasetFormat",
-                "description": "Format of the training dataset."
-            },
-            "EfficiencyConfig": {
-                "type": "object",
-                "properties": {
-                    "enable_activation_checkpointing": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "(Optional) Whether to use activation checkpointing to reduce memory usage"
-                    },
-                    "enable_activation_offloading": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "(Optional) Whether to offload activations to CPU to save GPU memory"
-                    },
-                    "memory_efficient_fsdp_wrap": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "(Optional) Whether to use memory-efficient FSDP wrapping"
-                    },
-                    "fsdp_cpu_offload": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "(Optional) Whether to offload FSDP parameters to CPU"
-                    }
-                },
-                "additionalProperties": false,
-                "title": "EfficiencyConfig",
-                "description": "Configuration for memory and compute efficiency optimizations."
-            },
-            "OptimizerConfig": {
-                "type": "object",
-                "properties": {
-                    "optimizer_type": {
-                        "$ref": "#/components/schemas/OptimizerType",
-                        "description": "Type of optimizer to use (adam, adamw, or sgd)"
-                    },
-                    "lr": {
-                        "type": "number",
-                        "description": "Learning rate for the optimizer"
-                    },
-                    "weight_decay": {
-                        "type": "number",
-                        "description": "Weight decay coefficient for regularization"
-                    },
-                    "num_warmup_steps": {
-                        "type": "integer",
-                        "description": "Number of steps for learning rate warmup"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "optimizer_type",
-                    "lr",
-                    "weight_decay",
-                    "num_warmup_steps"
-                ],
-                "title": "OptimizerConfig",
-                "description": "Configuration parameters for the optimization algorithm."
-            },
-            "OptimizerType": {
-                "type": "string",
-                "enum": [
-                    "adam",
-                    "adamw",
-                    "sgd"
-                ],
-                "title": "OptimizerType",
-                "description": "Available optimizer algorithms for training."
-            },
-            "TrainingConfig": {
-                "type": "object",
-                "properties": {
-                    "n_epochs": {
-                        "type": "integer",
-                        "description": "Number of training epochs to run"
-                    },
-                    "max_steps_per_epoch": {
-                        "type": "integer",
-                        "default": 1,
-                        "description": "Maximum number of steps to run per epoch"
-                    },
-                    "gradient_accumulation_steps": {
-                        "type": "integer",
-                        "default": 1,
-                        "description": "Number of steps to accumulate gradients before updating"
-                    },
-                    "max_validation_steps": {
-                        "type": "integer",
-                        "default": 1,
-                        "description": "(Optional) Maximum number of validation steps per epoch"
-                    },
-                    "data_config": {
-                        "$ref": "#/components/schemas/DataConfig",
-                        "description": "(Optional) Configuration for data loading and formatting"
-                    },
-                    "optimizer_config": {
-                        "$ref": "#/components/schemas/OptimizerConfig",
-                        "description": "(Optional) Configuration for the optimization algorithm"
-                    },
-                    "efficiency_config": {
-                        "$ref": "#/components/schemas/EfficiencyConfig",
-                        "description": "(Optional) Configuration for memory and compute optimizations"
-                    },
-                    "dtype": {
-                        "type": "string",
-                        "default": "bf16",
-                        "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "n_epochs",
-                    "max_steps_per_epoch",
-                    "gradient_accumulation_steps"
-                ],
-                "title": "TrainingConfig",
-                "description": "Comprehensive configuration for the training process."
-            },
-            "PreferenceOptimizeRequest": {
-                "type": "object",
-                "properties": {
-                    "job_uuid": {
-                        "type": "string",
-                        "description": "The UUID of the job to create."
-                    },
-                    "finetuned_model": {
-                        "type": "string",
-                        "description": "The model to fine-tune."
-                    },
-                    "algorithm_config": {
-                        "$ref": "#/components/schemas/DPOAlignmentConfig",
-                        "description": "The algorithm configuration."
-                    },
-                    "training_config": {
-                        "$ref": "#/components/schemas/TrainingConfig",
-                        "description": "The training configuration."
-                    },
-                    "hyperparam_search_config": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The hyperparam search configuration."
-                    },
-                    "logger_config": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The logger configuration."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_uuid",
-                    "finetuned_model",
-                    "algorithm_config",
-                    "training_config",
-                    "hyperparam_search_config",
-                    "logger_config"
-                ],
-                "title": "PreferenceOptimizeRequest"
-            },
-            "PostTrainingJob": {
-                "type": "object",
-                "properties": {
-                    "job_uuid": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_uuid"
-                ],
-                "title": "PostTrainingJob"
-            },
-            "DefaultRAGQueryGeneratorConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "default",
-                        "default": "default",
-                        "description": "Type of query generator, always 'default'"
-                    },
-                    "separator": {
-                        "type": "string",
-                        "default": " ",
-                        "description": "String separator used to join query terms"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "separator"
-                ],
-                "title": "DefaultRAGQueryGeneratorConfig",
-                "description": "Configuration for the default RAG query generator."
-            },
-            "LLMRAGQueryGeneratorConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "llm",
-                        "default": "llm",
-                        "description": "Type of query generator, always 'llm'"
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "Name of the language model to use for query generation"
-                    },
-                    "template": {
-                        "type": "string",
-                        "description": "Template string for formatting the query generation prompt"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "model",
-                    "template"
-                ],
-                "title": "LLMRAGQueryGeneratorConfig",
-                "description": "Configuration for the LLM-based RAG query generator."
-            },
-            "RAGQueryConfig": {
-                "type": "object",
-                "properties": {
-                    "query_generator_config": {
-                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig",
-                        "description": "Configuration for the query generator."
-                    },
-                    "max_tokens_in_context": {
-                        "type": "integer",
-                        "default": 4096,
-                        "description": "Maximum number of tokens in the context."
-                    },
-                    "max_chunks": {
-                        "type": "integer",
-                        "default": 5,
-                        "description": "Maximum number of chunks to retrieve."
-                    },
-                    "chunk_template": {
-                        "type": "string",
-                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
-                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
-                    },
-                    "mode": {
-                        "$ref": "#/components/schemas/RAGSearchMode",
-                        "default": "vector",
-                        "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
-                    },
-                    "ranker": {
-                        "$ref": "#/components/schemas/Ranker",
-                        "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "query_generator_config",
-                    "max_tokens_in_context",
-                    "max_chunks",
-                    "chunk_template"
-                ],
-                "title": "RAGQueryConfig",
-                "description": "Configuration for the RAG query generation."
-            },
-            "RAGQueryGeneratorConfig": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
-                    },
-                    {
-                        "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
-                        "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
-                    }
-                }
-            },
-            "RAGSearchMode": {
-                "type": "string",
-                "enum": [
-                    "vector",
-                    "keyword",
-                    "hybrid"
-                ],
-                "title": "RAGSearchMode",
-                "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
-            },
-            "RRFRanker": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "rrf",
-                        "default": "rrf",
-                        "description": "The type of ranker, always \"rrf\""
-                    },
-                    "impact_factor": {
-                        "type": "number",
-                        "default": 60.0,
-                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "impact_factor"
-                ],
-                "title": "RRFRanker",
-                "description": "Reciprocal Rank Fusion (RRF) ranker configuration."
-            },
-            "Ranker": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/RRFRanker"
-                    },
-                    {
-                        "$ref": "#/components/schemas/WeightedRanker"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "rrf": "#/components/schemas/RRFRanker",
-                        "weighted": "#/components/schemas/WeightedRanker"
-                    }
-                }
-            },
-            "WeightedRanker": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "weighted",
-                        "default": "weighted",
-                        "description": "The type of ranker, always \"weighted\""
-                    },
-                    "alpha": {
-                        "type": "number",
-                        "default": 0.5,
-                        "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "alpha"
-                ],
-                "title": "WeightedRanker",
-                "description": "Weighted ranker configuration that combines vector and keyword scores."
-            },
-            "QueryRequest": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The query content to search for in the indexed documents"
-                    },
-                    "vector_db_ids": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "List of vector database IDs to search within"
-                    },
-                    "query_config": {
-                        "$ref": "#/components/schemas/RAGQueryConfig",
-                        "description": "(Optional) Configuration parameters for the query operation"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content",
-                    "vector_db_ids"
-                ],
-                "title": "QueryRequest"
-            },
-            "RAGQueryResult": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "(Optional) The retrieved content from the query"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Additional metadata about the query result"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metadata"
-                ],
-                "title": "RAGQueryResult",
-                "description": "Result of a RAG query containing retrieved content and metadata."
-            },
-            "QueryChunksRequest": {
-                "type": "object",
-                "properties": {
-                    "vector_db_id": {
-                        "type": "string",
-                        "description": "The identifier of the vector database to query."
-                    },
-                    "query": {
-                        "$ref": "#/components/schemas/InterleavedContent",
-                        "description": "The query to search for."
-                    },
-                    "params": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The parameters of the query."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "vector_db_id",
-                    "query"
-                ],
-                "title": "QueryChunksRequest"
-            },
-            "QueryChunksResponse": {
-                "type": "object",
-                "properties": {
-                    "chunks": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Chunk"
-                        },
-                        "description": "List of content chunks returned from the query"
-                    },
-                    "scores": {
-                        "type": "array",
-                        "items": {
-                            "type": "number"
-                        },
-                        "description": "Relevance scores corresponding to each returned chunk"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "chunks",
-                    "scores"
-                ],
-                "title": "QueryChunksResponse",
-                "description": "Response from querying chunks in a vector database."
-            },
-            "QueryMetricsRequest": {
-                "type": "object",
-                "properties": {
-                    "start_time": {
-                        "type": "integer",
-                        "description": "The start time of the metric to query."
-                    },
-                    "end_time": {
-                        "type": "integer",
-                        "description": "The end time of the metric to query."
-                    },
-                    "granularity": {
-                        "type": "string",
-                        "description": "The granularity of the metric to query."
-                    },
-                    "query_type": {
-                        "type": "string",
-                        "enum": [
-                            "range",
-                            "instant"
-                        ],
-                        "description": "The type of query to perform."
-                    },
-                    "label_matchers": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "properties": {
-                                "name": {
-                                    "type": "string",
-                                    "description": "The name of the label to match"
-                                },
-                                "value": {
-                                    "type": "string",
-                                    "description": "The value to match against"
-                                },
-                                "operator": {
-                                    "type": "string",
-                                    "enum": [
-                                        "=",
-                                        "!=",
-                                        "=~",
-                                        "!~"
-                                    ],
-                                    "description": "The comparison operator to use for matching",
-                                    "default": "="
-                                }
-                            },
-                            "additionalProperties": false,
-                            "required": [
-                                "name",
-                                "value",
-                                "operator"
-                            ],
-                            "title": "MetricLabelMatcher",
-                            "description": "A matcher for filtering metrics by label values."
-                        },
-                        "description": "The label matchers to apply to the metric."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "start_time",
-                    "query_type"
-                ],
-                "title": "QueryMetricsRequest"
-            },
-            "MetricDataPoint": {
-                "type": "object",
-                "properties": {
-                    "timestamp": {
-                        "type": "integer",
-                        "description": "Unix timestamp when the metric value was recorded"
-                    },
-                    "value": {
-                        "type": "number",
-                        "description": "The numeric value of the metric at this timestamp"
-                    },
-                    "unit": {
-                        "type": "string"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "timestamp",
-                    "value",
-                    "unit"
-                ],
-                "title": "MetricDataPoint",
-                "description": "A single data point in a metric time series."
-            },
-            "MetricLabel": {
-                "type": "object",
-                "properties": {
-                    "name": {
-                        "type": "string",
-                        "description": "The name of the label"
-                    },
-                    "value": {
-                        "type": "string",
-                        "description": "The value of the label"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "name",
-                    "value"
-                ],
-                "title": "MetricLabel",
-                "description": "A label associated with a metric."
-            },
-            "MetricSeries": {
-                "type": "object",
-                "properties": {
-                    "metric": {
-                        "type": "string",
-                        "description": "The name of the metric"
-                    },
-                    "labels": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricLabel"
-                        },
-                        "description": "List of labels associated with this metric series"
-                    },
-                    "values": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricDataPoint"
-                        },
-                        "description": "List of data points in chronological order"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "metric",
-                    "labels",
-                    "values"
-                ],
-                "title": "MetricSeries",
-                "description": "A time series of metric data points."
-            },
-            "QueryMetricsResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/MetricSeries"
-                        },
-                        "description": "List of metric series matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QueryMetricsResponse",
-                "description": "Response containing metric time series data."
-            },
-            "QueryCondition": {
-                "type": "object",
-                "properties": {
-                    "key": {
-                        "type": "string",
-                        "description": "The attribute key to filter on"
-                    },
-                    "op": {
-                        "$ref": "#/components/schemas/QueryConditionOp",
-                        "description": "The comparison operator to apply"
-                    },
-                    "value": {
-                        "oneOf": [
-                            {
-                                "type": "null"
-                            },
-                            {
-                                "type": "boolean"
-                            },
-                            {
-                                "type": "number"
-                            },
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array"
-                            },
-                            {
-                                "type": "object"
-                            }
-                        ],
-                        "description": "The value to compare against"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "key",
-                    "op",
-                    "value"
-                ],
-                "title": "QueryCondition",
-                "description": "A condition for filtering query results."
-            },
-            "QueryConditionOp": {
-                "type": "string",
-                "enum": [
-                    "eq",
-                    "ne",
-                    "gt",
-                    "lt"
-                ],
-                "title": "QueryConditionOp",
-                "description": "Comparison operators for query conditions."
-            },
-            "QuerySpansRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the spans."
-                    },
-                    "attributes_to_return": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to return in the spans."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attribute_filters",
-                    "attributes_to_return"
-                ],
-                "title": "QuerySpansRequest"
-            },
-            "QuerySpansResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Span"
-                        },
-                        "description": "List of spans matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QuerySpansResponse",
-                "description": "Response containing a list of spans."
-            },
-            "QueryTracesRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the traces."
-                    },
-                    "limit": {
-                        "type": "integer",
-                        "description": "The limit of traces to return."
-                    },
-                    "offset": {
-                        "type": "integer",
-                        "description": "The offset of the traces to return."
-                    },
-                    "order_by": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The order by of the traces to return."
-                    }
-                },
-                "additionalProperties": false,
-                "title": "QueryTracesRequest"
-            },
-            "QueryTracesResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Trace"
-                        },
-                        "description": "List of traces matching the query criteria"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "QueryTracesResponse",
-                "description": "Response containing a list of traces."
-            },
-            "RegisterBenchmarkRequest": {
-                "type": "object",
-                "properties": {
-                    "benchmark_id": {
-                        "type": "string",
-                        "description": "The ID of the benchmark to register."
-                    },
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "The ID of the dataset to use for the benchmark."
-                    },
-                    "scoring_functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The scoring functions to use for the benchmark."
-                    },
-                    "provider_benchmark_id": {
-                        "type": "string",
-                        "description": "The ID of the provider benchmark to use for the benchmark."
-                    },
-                    "provider_id": {
-                        "type": "string",
-                        "description": "The ID of the provider to use for the benchmark."
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The metadata to use for the benchmark."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "benchmark_id",
-                    "dataset_id",
-                    "scoring_functions"
-                ],
-                "title": "RegisterBenchmarkRequest"
-            },
-            "RegisterDatasetRequest": {
-                "type": "object",
-                "properties": {
-                    "purpose": {
-                        "type": "string",
-                        "enum": [
-                            "post-training/messages",
-                            "eval/question-answer",
-                            "eval/messages-answer"
-                        ],
-                        "description": "The purpose of the dataset. One of: - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }"
-                    },
-                    "source": {
-                        "$ref": "#/components/schemas/DataSource",
-                        "description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}."
-                    },
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "The ID of the dataset. If not provided, an ID will be generated."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "purpose",
-                    "source"
-                ],
-                "title": "RegisterDatasetRequest"
-            },
-            "RegisterModelRequest": {
-                "type": "object",
-                "properties": {
-                    "model_id": {
-                        "type": "string",
-                        "description": "The identifier of the model to register."
-                    },
-                    "provider_model_id": {
-                        "type": "string",
-                        "description": "The identifier of the model in the provider."
-                    },
-                    "provider_id": {
-                        "type": "string",
-                        "description": "The identifier of the provider."
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "Any additional metadata for this model."
-                    },
-                    "model_type": {
-                        "$ref": "#/components/schemas/ModelType",
-                        "description": "The type of model to register."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model_id"
-                ],
-                "title": "RegisterModelRequest"
-            },
-            "RegisterScoringFunctionRequest": {
-                "type": "object",
-                "properties": {
-                    "scoring_fn_id": {
-                        "type": "string",
-                        "description": "The ID of the scoring function to register."
-                    },
-                    "description": {
-                        "type": "string",
-                        "description": "The description of the scoring function."
-                    },
-                    "return_type": {
-                        "$ref": "#/components/schemas/ParamType",
-                        "description": "The return type of the scoring function."
-                    },
-                    "provider_scoring_fn_id": {
-                        "type": "string",
-                        "description": "The ID of the provider scoring function to use for the scoring function."
-                    },
-                    "provider_id": {
-                        "type": "string",
-                        "description": "The ID of the provider to use for the scoring function."
-                    },
-                    "params": {
-                        "$ref": "#/components/schemas/ScoringFnParams",
-                        "description": "The parameters for the scoring function for benchmark eval, these can be overridden for app eval."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "scoring_fn_id",
-                    "description",
-                    "return_type"
-                ],
-                "title": "RegisterScoringFunctionRequest"
-            },
-            "RegisterShieldRequest": {
-                "type": "object",
-                "properties": {
-                    "shield_id": {
-                        "type": "string",
-                        "description": "The identifier of the shield to register."
-                    },
-                    "provider_shield_id": {
-                        "type": "string",
-                        "description": "The identifier of the shield in the provider."
-                    },
-                    "provider_id": {
-                        "type": "string",
-                        "description": "The identifier of the provider."
-                    },
-                    "params": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The parameters of the shield."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "shield_id"
-                ],
-                "title": "RegisterShieldRequest"
-            },
-            "RegisterToolGroupRequest": {
-                "type": "object",
-                "properties": {
-                    "toolgroup_id": {
-                        "type": "string",
-                        "description": "The ID of the tool group to register."
-                    },
-                    "provider_id": {
-                        "type": "string",
-                        "description": "The ID of the provider to use for the tool group."
-                    },
-                    "mcp_endpoint": {
-                        "$ref": "#/components/schemas/URL",
-                        "description": "The MCP endpoint to use for the tool group."
-                    },
-                    "args": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "A dictionary of arguments to pass to the tool group."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "toolgroup_id",
-                    "provider_id"
-                ],
-                "title": "RegisterToolGroupRequest"
-            },
-            "RegisterVectorDbRequest": {
-                "type": "object",
-                "properties": {
-                    "vector_db_id": {
-                        "type": "string",
-                        "description": "The identifier of the vector database to register."
-                    },
-                    "embedding_model": {
-                        "type": "string",
-                        "description": "The embedding model to use."
-                    },
-                    "embedding_dimension": {
-                        "type": "integer",
-                        "description": "The dimension of the embedding model."
-                    },
-                    "provider_id": {
-                        "type": "string",
-                        "description": "The identifier of the provider."
-                    },
-                    "vector_db_name": {
-                        "type": "string",
-                        "description": "The name of the vector database."
-                    },
-                    "provider_vector_db_id": {
-                        "type": "string",
-                        "description": "The identifier of the vector database in the provider."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "vector_db_id",
-                    "embedding_model"
-                ],
-                "title": "RegisterVectorDbRequest"
-            },
-            "RerankRequest": {
-                "type": "object",
-                "properties": {
-                    "model": {
-                        "type": "string",
-                        "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint."
-                    },
-                    "query": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
-                            },
-                            {
-                                "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
-                            }
-                        ],
-                        "description": "The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length."
-                    },
-                    "items": {
-                        "type": "array",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
-                                }
-                            ]
-                        },
-                        "description": "List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length."
-                    },
-                    "max_num_results": {
-                        "type": "integer",
-                        "description": "(Optional) Maximum number of results to return. Default: returns all."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model",
-                    "query",
-                    "items"
-                ],
-                "title": "RerankRequest"
-            },
-            "RerankData": {
-                "type": "object",
-                "properties": {
-                    "index": {
-                        "type": "integer",
-                        "description": "The original index of the document in the input list"
-                    },
-                    "relevance_score": {
-                        "type": "number",
-                        "description": "The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "index",
-                    "relevance_score"
-                ],
-                "title": "RerankData",
-                "description": "A single rerank result from a reranking response."
-            },
-            "RerankResponse": {
-                "type": "object",
-                "properties": {
-                    "data": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/RerankData"
-                        },
-                        "description": "List of rerank result objects, sorted by relevance score (descending)"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "data"
-                ],
-                "title": "RerankResponse",
-                "description": "Response from a reranking request."
-            },
-            "ResumeAgentTurnRequest": {
-                "type": "object",
-                "properties": {
-                    "tool_responses": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ToolResponse"
-                        },
-                        "description": "The tool call responses to resume the turn with."
-                    },
-                    "stream": {
-                        "type": "boolean",
-                        "description": "Whether to stream the response."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "tool_responses"
-                ],
-                "title": "ResumeAgentTurnRequest"
-            },
-            "RunEvalRequest": {
-                "type": "object",
-                "properties": {
-                    "benchmark_config": {
-                        "$ref": "#/components/schemas/BenchmarkConfig",
-                        "description": "The configuration for the benchmark."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "benchmark_config"
-                ],
-                "title": "RunEvalRequest"
-            },
-            "RunModerationRequest": {
-                "type": "object",
-                "properties": {
-                    "input": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            }
-                        ],
-                        "description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "The content moderation model you would like to use."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "input",
-                    "model"
-                ],
-                "title": "RunModerationRequest"
-            },
-            "ModerationObject": {
-                "type": "object",
-                "properties": {
-                    "id": {
-                        "type": "string",
-                        "description": "The unique identifier for the moderation request."
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "The model used to generate the moderation results."
-                    },
-                    "results": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ModerationObjectResults"
-                        },
-                        "description": "A list of moderation objects"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "id",
-                    "model",
-                    "results"
-                ],
-                "title": "ModerationObject",
-                "description": "A moderation object."
-            },
-            "ModerationObjectResults": {
-                "type": "object",
-                "properties": {
-                    "flagged": {
-                        "type": "boolean",
-                        "description": "Whether any of the below categories are flagged."
-                    },
-                    "categories": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "boolean"
-                        },
-                        "description": "A list of the categories, and whether they are flagged or not."
-                    },
-                    "category_applied_input_types": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "array",
-                            "items": {
-                                "type": "string"
-                            }
-                        },
-                        "description": "A list of the categories along with the input type(s) that the score applies to."
-                    },
-                    "category_scores": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "number"
-                        },
-                        "description": "A list of the categories along with their scores as predicted by model."
-                    },
-                    "user_message": {
-                        "type": "string"
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "flagged",
-                    "metadata"
-                ],
-                "title": "ModerationObjectResults",
-                "description": "A moderation object."
-            },
-            "RunShieldRequest": {
-                "type": "object",
-                "properties": {
-                    "shield_id": {
-                        "type": "string",
-                        "description": "The identifier of the shield to run."
-                    },
-                    "messages": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Message"
-                        },
-                        "description": "The messages to run the shield on."
-                    },
-                    "params": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The parameters of the shield."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "shield_id",
-                    "messages",
-                    "params"
-                ],
-                "title": "RunShieldRequest"
-            },
-            "RunShieldResponse": {
-                "type": "object",
-                "properties": {
-                    "violation": {
-                        "$ref": "#/components/schemas/SafetyViolation",
-                        "description": "(Optional) Safety violation detected by the shield, if any"
-                    }
-                },
-                "additionalProperties": false,
-                "title": "RunShieldResponse",
-                "description": "Response from running a safety shield."
-            },
-            "SaveSpansToDatasetRequest": {
-                "type": "object",
-                "properties": {
-                    "attribute_filters": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/QueryCondition"
-                        },
-                        "description": "The attribute filters to apply to the spans."
-                    },
-                    "attributes_to_save": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "The attributes to save to the dataset."
-                    },
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "The ID of the dataset to save the spans to."
-                    },
-                    "max_depth": {
-                        "type": "integer",
-                        "description": "The maximum depth of the tree."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "attribute_filters",
-                    "attributes_to_save",
-                    "dataset_id"
-                ],
-                "title": "SaveSpansToDatasetRequest"
-            },
-            "ScoreRequest": {
-                "type": "object",
-                "properties": {
-                    "input_rows": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "The rows to score."
-                    },
-                    "scoring_functions": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/ScoringFnParams"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "The scoring functions to use for the scoring."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "input_rows",
-                    "scoring_functions"
-                ],
-                "title": "ScoreRequest"
-            },
-            "ScoreResponse": {
-                "type": "object",
-                "properties": {
-                    "results": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ScoringResult"
-                        },
-                        "description": "A map of scoring function name to ScoringResult."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "results"
-                ],
-                "title": "ScoreResponse",
-                "description": "The response from scoring."
-            },
-            "ScoreBatchRequest": {
-                "type": "object",
-                "properties": {
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "The ID of the dataset to score."
-                    },
-                    "scoring_functions": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/ScoringFnParams"
-                                },
-                                {
-                                    "type": "null"
-                                }
-                            ]
-                        },
-                        "description": "The scoring functions to use for the scoring."
-                    },
-                    "save_results_dataset": {
-                        "type": "boolean",
-                        "description": "Whether to save the results to a dataset."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dataset_id",
-                    "scoring_functions",
-                    "save_results_dataset"
-                ],
-                "title": "ScoreBatchRequest"
-            },
-            "ScoreBatchResponse": {
-                "type": "object",
-                "properties": {
-                    "dataset_id": {
-                        "type": "string",
-                        "description": "(Optional) The identifier of the dataset that was scored"
-                    },
-                    "results": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ScoringResult"
-                        },
-                        "description": "A map of scoring function name to ScoringResult"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "results"
-                ],
-                "title": "ScoreBatchResponse",
-                "description": "Response from batch scoring operations on datasets."
-            },
-            "SetDefaultVersionRequest": {
-                "type": "object",
-                "properties": {
-                    "version": {
-                        "type": "integer",
-                        "description": "The version to set as default."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "version"
-                ],
-                "title": "SetDefaultVersionRequest"
-            },
-            "AlgorithmConfig": {
-                "oneOf": [
-                    {
-                        "$ref": "#/components/schemas/LoraFinetuningConfig"
-                    },
-                    {
-                        "$ref": "#/components/schemas/QATFinetuningConfig"
-                    }
-                ],
-                "discriminator": {
-                    "propertyName": "type",
-                    "mapping": {
-                        "LoRA": "#/components/schemas/LoraFinetuningConfig",
-                        "QAT": "#/components/schemas/QATFinetuningConfig"
-                    }
-                }
-            },
-            "LoraFinetuningConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "LoRA",
-                        "default": "LoRA",
-                        "description": "Algorithm type identifier, always \"LoRA\""
-                    },
-                    "lora_attn_modules": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "List of attention module names to apply LoRA to"
-                    },
-                    "apply_lora_to_mlp": {
-                        "type": "boolean",
-                        "description": "Whether to apply LoRA to MLP layers"
-                    },
-                    "apply_lora_to_output": {
-                        "type": "boolean",
-                        "description": "Whether to apply LoRA to output projection layers"
-                    },
-                    "rank": {
-                        "type": "integer",
-                        "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)"
-                    },
-                    "alpha": {
-                        "type": "integer",
-                        "description": "LoRA scaling parameter that controls adaptation strength"
-                    },
-                    "use_dora": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)"
-                    },
-                    "quantize_base": {
-                        "type": "boolean",
-                        "default": false,
-                        "description": "(Optional) Whether to quantize the base model weights"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "lora_attn_modules",
-                    "apply_lora_to_mlp",
-                    "apply_lora_to_output",
-                    "rank",
-                    "alpha"
-                ],
-                "title": "LoraFinetuningConfig",
-                "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning."
-            },
-            "QATFinetuningConfig": {
-                "type": "object",
-                "properties": {
-                    "type": {
-                        "type": "string",
-                        "const": "QAT",
-                        "default": "QAT",
-                        "description": "Algorithm type identifier, always \"QAT\""
-                    },
-                    "quantizer_name": {
-                        "type": "string",
-                        "description": "Name of the quantization algorithm to use"
-                    },
-                    "group_size": {
-                        "type": "integer",
-                        "description": "Size of groups for grouped quantization"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "type",
-                    "quantizer_name",
-                    "group_size"
-                ],
-                "title": "QATFinetuningConfig",
-                "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning."
-            },
-            "SupervisedFineTuneRequest": {
-                "type": "object",
-                "properties": {
-                    "job_uuid": {
-                        "type": "string",
-                        "description": "The UUID of the job to create."
-                    },
-                    "training_config": {
-                        "$ref": "#/components/schemas/TrainingConfig",
-                        "description": "The training configuration."
-                    },
-                    "hyperparam_search_config": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The hyperparam search configuration."
-                    },
-                    "logger_config": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "The logger configuration."
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "The model to fine-tune."
-                    },
-                    "checkpoint_dir": {
-                        "type": "string",
-                        "description": "The directory to save checkpoint(s) to."
-                    },
-                    "algorithm_config": {
-                        "$ref": "#/components/schemas/AlgorithmConfig",
-                        "description": "The algorithm configuration."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "job_uuid",
-                    "training_config",
-                    "hyperparam_search_config",
-                    "logger_config"
-                ],
-                "title": "SupervisedFineTuneRequest"
-            },
-            "SyntheticDataGenerateRequest": {
-                "type": "object",
-                "properties": {
-                    "dialogs": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/Message"
-                        },
-                        "description": "List of conversation messages to use as input for synthetic data generation"
-                    },
-                    "filtering_function": {
-                        "type": "string",
-                        "enum": [
-                            "none",
-                            "random",
-                            "top_k",
-                            "top_p",
-                            "top_k_top_p",
-                            "sigmoid"
-                        ],
-                        "description": "Type of filtering to apply to generated synthetic data samples"
-                    },
-                    "model": {
-                        "type": "string",
-                        "description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "dialogs",
-                    "filtering_function"
-                ],
-                "title": "SyntheticDataGenerateRequest"
-            },
-            "SyntheticDataGenerationResponse": {
-                "type": "object",
-                "properties": {
-                    "synthetic_data": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "List of generated synthetic data samples that passed the filtering criteria"
-                    },
-                    "statistics": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) Statistical information about the generation process and filtering results"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "synthetic_data"
-                ],
-                "title": "SyntheticDataGenerationResponse",
-                "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
-            },
-            "UpdatePromptRequest": {
-                "type": "object",
-                "properties": {
-                    "prompt": {
-                        "type": "string",
-                        "description": "The updated prompt text content."
-                    },
-                    "version": {
-                        "type": "integer",
-                        "description": "The current version of the prompt being updated."
-                    },
-                    "variables": {
-                        "type": "array",
-                        "items": {
-                            "type": "string"
-                        },
-                        "description": "Updated list of variable names that can be used in the prompt template."
-                    },
-                    "set_as_default": {
-                        "type": "boolean",
-                        "description": "Set the new version as the default (default=True)."
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "prompt",
-                    "version",
-                    "set_as_default"
-                ],
-                "title": "UpdatePromptRequest"
-            },
->>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
             "VersionInfo": {
                 "type": "object",
                 "properties": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index ec0409849..566ac7de9 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -3634,2130 +3634,6 @@ components:
       title: OpenAIUserMessageParam
       description: >-
         A message from the user in an OpenAI-compatible chat completion request.
-<<<<<<< HEAD
-=======
-    OpenAICompletionWithInputMessages:
-      type: object
-      properties:
-        id:
-          type: string
-          description: The ID of the chat completion
-        choices:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIChoice'
-          description: List of choices
-        object:
-          type: string
-          const: chat.completion
-          default: chat.completion
-          description: >-
-            The object type, which will be "chat.completion"
-        created:
-          type: integer
-          description: >-
-            The Unix timestamp in seconds when the chat completion was created
-        model:
-          type: string
-          description: >-
-            The model that was used to generate the chat completion
-        input_messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-      additionalProperties: false
-      required:
-        - id
-        - choices
-        - object
-        - created
-        - model
-        - input_messages
-      title: OpenAICompletionWithInputMessages
-    DataSource:
-      oneOf:
-        - $ref: '#/components/schemas/URIDataSource'
-        - $ref: '#/components/schemas/RowsDataSource'
-      discriminator:
-        propertyName: type
-        mapping:
-          uri: '#/components/schemas/URIDataSource'
-          rows: '#/components/schemas/RowsDataSource'
-    Dataset:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_db
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: dataset
-          default: dataset
-          description: >-
-            Type of resource, always 'dataset' for datasets
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            Purpose of the dataset indicating its intended use
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            Data source configuration for the dataset
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the dataset
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - purpose
-        - source
-        - metadata
-      title: Dataset
-      description: >-
-        Dataset resource for storing and accessing training or evaluation data.
-    RowsDataSource:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rows
-          default: rows
-        rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
-            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
-            world!"}]} ]
-      additionalProperties: false
-      required:
-        - type
-        - rows
-      title: RowsDataSource
-      description: A dataset stored in rows.
-    URIDataSource:
-      type: object
-      properties:
-        type:
-          type: string
-          const: uri
-          default: uri
-        uri:
-          type: string
-          description: >-
-            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
-            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
-      additionalProperties: false
-      required:
-        - type
-        - uri
-      title: URIDataSource
-      description: >-
-        A dataset that can be obtained from a URI.
-    Model:
-      type: object
-      properties:
-        identifier:
-          type: string
-          description: >-
-            Unique identifier for this resource in llama stack
-        provider_resource_id:
-          type: string
-          description: >-
-            Unique identifier for this resource in the provider
-        provider_id:
-          type: string
-          description: >-
-            ID of the provider that owns this resource
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_db
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: model
-          default: model
-          description: >-
-            The resource type, always 'model' for model resources
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          default: llm
-          description: >-
-            The type of model (LLM or embedding model)
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - model_type
-      title: Model
-      description: >-
-        A model resource representing an AI model registered in Llama Stack.
-    ModelType:
-      type: string
-      enum:
-        - llm
-        - embedding
-        - rerank
-      title: ModelType
-      description: >-
-        Enumeration of supported model types in Llama Stack.
-    AgentTurnInputType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: agent_turn_input
-          default: agent_turn_input
-          description: >-
-            Discriminator type. Always "agent_turn_input"
-      additionalProperties: false
-      required:
-        - type
-      title: AgentTurnInputType
-      description: Parameter type for agent turn input.
-    ArrayType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: array
-          default: array
-          description: Discriminator type. Always "array"
-      additionalProperties: false
-      required:
-        - type
-      title: ArrayType
-      description: Parameter type for array values.
-    BooleanType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: boolean
-          default: boolean
-          description: Discriminator type. Always "boolean"
-      additionalProperties: false
-      required:
-        - type
-      title: BooleanType
-      description: Parameter type for boolean values.
-    ChatCompletionInputType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: chat_completion_input
-          default: chat_completion_input
-          description: >-
-            Discriminator type. Always "chat_completion_input"
-      additionalProperties: false
-      required:
-        - type
-      title: ChatCompletionInputType
-      description: >-
-        Parameter type for chat completion input.
-    CompletionInputType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: completion_input
-          default: completion_input
-          description: >-
-            Discriminator type. Always "completion_input"
-      additionalProperties: false
-      required:
-        - type
-      title: CompletionInputType
-      description: Parameter type for completion input.
-    JsonType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: json
-          default: json
-          description: Discriminator type. Always "json"
-      additionalProperties: false
-      required:
-        - type
-      title: JsonType
-      description: Parameter type for JSON values.
-    NumberType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: number
-          default: number
-          description: Discriminator type. Always "number"
-      additionalProperties: false
-      required:
-        - type
-      title: NumberType
-      description: Parameter type for numeric values.
-    ObjectType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: object
-          default: object
-          description: Discriminator type. Always "object"
-      additionalProperties: false
-      required:
-        - type
-      title: ObjectType
-      description: Parameter type for object values.
-    ParamType:
-      oneOf:
-        - $ref: '#/components/schemas/StringType'
-        - $ref: '#/components/schemas/NumberType'
-        - $ref: '#/components/schemas/BooleanType'
-        - $ref: '#/components/schemas/ArrayType'
-        - $ref: '#/components/schemas/ObjectType'
-        - $ref: '#/components/schemas/JsonType'
-        - $ref: '#/components/schemas/UnionType'
-        - $ref: '#/components/schemas/ChatCompletionInputType'
-        - $ref: '#/components/schemas/CompletionInputType'
-        - $ref: '#/components/schemas/AgentTurnInputType'
-      discriminator:
-        propertyName: type
-        mapping:
-          string: '#/components/schemas/StringType'
-          number: '#/components/schemas/NumberType'
-          boolean: '#/components/schemas/BooleanType'
-          array: '#/components/schemas/ArrayType'
-          object: '#/components/schemas/ObjectType'
-          json: '#/components/schemas/JsonType'
-          union: '#/components/schemas/UnionType'
-          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
-          completion_input: '#/components/schemas/CompletionInputType'
-          agent_turn_input: '#/components/schemas/AgentTurnInputType'
-    ScoringFn:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_db
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: scoring_function
-          default: scoring_function
-          description: >-
-            The resource type, always scoring_function
-        description:
-          type: string
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - metadata
-        - return_type
-      title: ScoringFn
-      description: >-
-        A scoring function resource for evaluating model outputs.
-    StringType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: string
-          default: string
-          description: Discriminator type. Always "string"
-      additionalProperties: false
-      required:
-        - type
-      title: StringType
-      description: Parameter type for string values.
-    UnionType:
-      type: object
-      properties:
-        type:
-          type: string
-          const: union
-          default: union
-          description: Discriminator type. Always "union"
-      additionalProperties: false
-      required:
-        - type
-      title: UnionType
-      description: Parameter type for union values.
-    Shield:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_db
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: shield
-          default: shield
-          description: The resource type, always shield
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Configuration parameters for the shield
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: Shield
-      description: >-
-        A safety shield resource that can be used to check content.
-    Span:
-      type: object
-      properties:
-        span_id:
-          type: string
-          description: Unique identifier for the span
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this span belongs to
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the operation began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the operation finished, if completed
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the span
-      additionalProperties: false
-      required:
-        - span_id
-        - trace_id
-        - name
-        - start_time
-      title: Span
-      description: >-
-        A span representing a single operation within a trace.
-    GetSpanTreeRequest:
-      type: object
-      properties:
-        attributes_to_return:
-          type: array
-          items:
-            type: string
-          description: The attributes to return in the tree.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      title: GetSpanTreeRequest
-    SpanStatus:
-      type: string
-      enum:
-        - ok
-        - error
-      title: SpanStatus
-      description: >-
-        The status of a span indicating whether it completed successfully or with
-        an error.
-    SpanWithStatus:
-      type: object
-      properties:
-        span_id:
-          type: string
-          description: Unique identifier for the span
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this span belongs to
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the operation began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the operation finished, if completed
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the span
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-          description: >-
-            (Optional) The current status of the span
-      additionalProperties: false
-      required:
-        - span_id
-        - trace_id
-        - name
-        - start_time
-      title: SpanWithStatus
-      description: A span that includes status information.
-    QuerySpanTreeResponse:
-      type: object
-      properties:
-        data:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/SpanWithStatus'
-          description: >-
-            Dictionary mapping span IDs to spans with status information
-      additionalProperties: false
-      required:
-        - data
-      title: QuerySpanTreeResponse
-      description: >-
-        Response containing a tree structure of spans.
-    Tool:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_db
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: tool
-          default: tool
-          description: Type of resource, always 'tool'
-        toolgroup_id:
-          type: string
-          description: >-
-            ID of the tool group this tool belongs to
-        description:
-          type: string
-          description: >-
-            Human-readable description of what the tool does
-        parameters:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolParameter'
-          description: List of parameters this tool accepts
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional metadata about the tool
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - toolgroup_id
-        - description
-        - parameters
-      title: Tool
-      description: A tool that can be invoked by agents.
-    ToolGroup:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_db
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: tool_group
-          default: tool_group
-          description: Type of resource, always 'tool_group'
-        mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            (Optional) Model Context Protocol endpoint for remote tools
-        args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional arguments for the tool group
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-      title: ToolGroup
-      description: >-
-        A group of related tools managed together.
-    Trace:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: Unique identifier for the trace
-        root_span_id:
-          type: string
-          description: >-
-            Unique identifier for the root span that started this trace
-        start_time:
-          type: string
-          format: date-time
-          description: Timestamp when the trace began
-        end_time:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the trace finished, if completed
-      additionalProperties: false
-      required:
-        - trace_id
-        - root_span_id
-        - start_time
-      title: Trace
-      description: >-
-        A trace representing the complete execution path of a request across multiple
-        operations.
-    Checkpoint:
-      type: object
-      properties:
-        identifier:
-          type: string
-          description: Unique identifier for the checkpoint
-        created_at:
-          type: string
-          format: date-time
-          description: >-
-            Timestamp when the checkpoint was created
-        epoch:
-          type: integer
-          description: >-
-            Training epoch when the checkpoint was saved
-        post_training_job_id:
-          type: string
-          description: >-
-            Identifier of the training job that created this checkpoint
-        path:
-          type: string
-          description: >-
-            File system path where the checkpoint is stored
-        training_metrics:
-          $ref: '#/components/schemas/PostTrainingMetric'
-          description: >-
-            (Optional) Training metrics associated with this checkpoint
-      additionalProperties: false
-      required:
-        - identifier
-        - created_at
-        - epoch
-        - post_training_job_id
-        - path
-      title: Checkpoint
-      description: Checkpoint created during training runs.
-    PostTrainingJobArtifactsResponse:
-      type: object
-      properties:
-        job_uuid:
-          type: string
-          description: Unique identifier for the training job
-        checkpoints:
-          type: array
-          items:
-            $ref: '#/components/schemas/Checkpoint'
-          description: >-
-            List of model checkpoints created during training
-      additionalProperties: false
-      required:
-        - job_uuid
-        - checkpoints
-      title: PostTrainingJobArtifactsResponse
-      description: Artifacts of a finetuning job.
-    PostTrainingMetric:
-      type: object
-      properties:
-        epoch:
-          type: integer
-          description: Training epoch number
-        train_loss:
-          type: number
-          description: Loss value on the training dataset
-        validation_loss:
-          type: number
-          description: Loss value on the validation dataset
-        perplexity:
-          type: number
-          description: >-
-            Perplexity metric indicating model confidence
-      additionalProperties: false
-      required:
-        - epoch
-        - train_loss
-        - validation_loss
-        - perplexity
-      title: PostTrainingMetric
-      description: >-
-        Training metrics captured during post-training jobs.
-    PostTrainingJobStatusResponse:
-      type: object
-      properties:
-        job_uuid:
-          type: string
-          description: Unique identifier for the training job
-        status:
-          type: string
-          enum:
-            - completed
-            - in_progress
-            - failed
-            - scheduled
-            - cancelled
-          description: Current status of the training job
-        scheduled_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job was scheduled
-        started_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job execution began
-        completed_at:
-          type: string
-          format: date-time
-          description: >-
-            (Optional) Timestamp when the job finished, if completed
-        resources_allocated:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Information about computational resources allocated to the
-            job
-        checkpoints:
-          type: array
-          items:
-            $ref: '#/components/schemas/Checkpoint'
-          description: >-
-            List of model checkpoints created during training
-      additionalProperties: false
-      required:
-        - job_uuid
-        - status
-        - checkpoints
-      title: PostTrainingJobStatusResponse
-      description: Status of a finetuning job.
-    ListPostTrainingJobsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            type: object
-            properties:
-              job_uuid:
-                type: string
-            additionalProperties: false
-            required:
-              - job_uuid
-            title: PostTrainingJob
-      additionalProperties: false
-      required:
-        - data
-      title: ListPostTrainingJobsResponse
-    VectorDB:
-      type: object
-      properties:
-        identifier:
-          type: string
-        provider_resource_id:
-          type: string
-        provider_id:
-          type: string
-        type:
-          type: string
-          enum:
-            - model
-            - shield
-            - vector_db
-            - dataset
-            - scoring_function
-            - benchmark
-            - tool
-            - tool_group
-            - prompt
-          const: vector_db
-          default: vector_db
-          description: >-
-            Type of resource, always 'vector_db' for vector databases
-        embedding_model:
-          type: string
-          description: >-
-            Name of the embedding model to use for vector generation
-        embedding_dimension:
-          type: integer
-          description: Dimension of the embedding vectors
-        vector_db_name:
-          type: string
-      additionalProperties: false
-      required:
-        - identifier
-        - provider_id
-        - type
-        - embedding_model
-        - embedding_dimension
-      title: VectorDB
-      description: >-
-        Vector database resource for storing and querying vector embeddings.
-    HealthInfo:
-      type: object
-      properties:
-        status:
-          type: string
-          enum:
-            - OK
-            - Error
-            - Not Implemented
-          description: Current health status of the service
-      additionalProperties: false
-      required:
-        - status
-      title: HealthInfo
-      description: >-
-        Health status information for the service.
-    RAGDocument:
-      type: object
-      properties:
-        document_id:
-          type: string
-          description: The unique identifier for the document.
-        content:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/InterleavedContentItem'
-            - type: array
-              items:
-                $ref: '#/components/schemas/InterleavedContentItem'
-            - $ref: '#/components/schemas/URL'
-          description: The content of the document.
-        mime_type:
-          type: string
-          description: The MIME type of the document.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Additional metadata for the document.
-      additionalProperties: false
-      required:
-        - document_id
-        - content
-        - metadata
-      title: RAGDocument
-      description: >-
-        A document to be used for document ingestion in the RAG Tool.
-    InsertRequest:
-      type: object
-      properties:
-        documents:
-          type: array
-          items:
-            $ref: '#/components/schemas/RAGDocument'
-          description: >-
-            List of documents to index in the RAG system
-        vector_db_id:
-          type: string
-          description: >-
-            ID of the vector database to store the document embeddings
-        chunk_size_in_tokens:
-          type: integer
-          description: >-
-            (Optional) Size in tokens for document chunking during indexing
-      additionalProperties: false
-      required:
-        - documents
-        - vector_db_id
-        - chunk_size_in_tokens
-      title: InsertRequest
-    Chunk:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The content of the chunk, which can be interleaved text, images, or other
-            types.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Metadata associated with the chunk that will be used in the model context
-            during inference.
-        embedding:
-          type: array
-          items:
-            type: number
-          description: >-
-            Optional embedding for the chunk. If not provided, it will be computed
-            later.
-        stored_chunk_id:
-          type: string
-          description: >-
-            The chunk ID that is stored in the vector database. Used for backend functionality.
-        chunk_metadata:
-          $ref: '#/components/schemas/ChunkMetadata'
-          description: >-
-            Metadata for the chunk that will NOT be used in the context during inference.
-            The `chunk_metadata` is required backend functionality.
-      additionalProperties: false
-      required:
-        - content
-        - metadata
-      title: Chunk
-      description: >-
-        A chunk of content that can be inserted into a vector database.
-    ChunkMetadata:
-      type: object
-      properties:
-        chunk_id:
-          type: string
-          description: >-
-            The ID of the chunk. If not set, it will be generated based on the document
-            ID and content.
-        document_id:
-          type: string
-          description: >-
-            The ID of the document this chunk belongs to.
-        source:
-          type: string
-          description: >-
-            The source of the content, such as a URL, file path, or other identifier.
-        created_timestamp:
-          type: integer
-          description: >-
-            An optional timestamp indicating when the chunk was created.
-        updated_timestamp:
-          type: integer
-          description: >-
-            An optional timestamp indicating when the chunk was last updated.
-        chunk_window:
-          type: string
-          description: >-
-            The window of the chunk, which can be used to group related chunks together.
-        chunk_tokenizer:
-          type: string
-          description: >-
-            The tokenizer used to create the chunk. Default is Tiktoken.
-        chunk_embedding_model:
-          type: string
-          description: >-
-            The embedding model used to create the chunk's embedding.
-        chunk_embedding_dimension:
-          type: integer
-          description: >-
-            The dimension of the embedding vector for the chunk.
-        content_token_count:
-          type: integer
-          description: >-
-            The number of tokens in the content of the chunk.
-        metadata_token_count:
-          type: integer
-          description: >-
-            The number of tokens in the metadata of the chunk.
-      additionalProperties: false
-      title: ChunkMetadata
-      description: >-
-        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
-        information about the chunk that     will not be used in the context during
-        inference, but is required for backend functionality. The `ChunkMetadata`     is
-        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
-        expected to change after.     Use `Chunk.metadata` for metadata that will
-        be used in the context during inference.
-    InsertChunksRequest:
-      type: object
-      properties:
-        vector_db_id:
-          type: string
-          description: >-
-            The identifier of the vector database to insert the chunks into.
-        chunks:
-          type: array
-          items:
-            $ref: '#/components/schemas/Chunk'
-          description: >-
-            The chunks to insert. Each `Chunk` should contain content which can be
-            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
-            and `embedding`: `List[float]` are optional. If `metadata` is provided,
-            you configure how Llama Stack formats the chunk during generation. If
-            `embedding` is not provided, it will be computed later.
-        ttl_seconds:
-          type: integer
-          description: The time to live of the chunks.
-      additionalProperties: false
-      required:
-        - vector_db_id
-        - chunks
-      title: InsertChunksRequest
-    ProviderInfo:
-      type: object
-      properties:
-        api:
-          type: string
-          description: The API name this provider implements
-        provider_id:
-          type: string
-          description: Unique identifier for the provider
-        provider_type:
-          type: string
-          description: The type of provider implementation
-        config:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Configuration parameters for the provider
-        health:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Current health status of the provider
-      additionalProperties: false
-      required:
-        - api
-        - provider_id
-        - provider_type
-        - config
-        - health
-      title: ProviderInfo
-      description: >-
-        Information about a registered provider including its configuration and health
-        status.
-    InvokeToolRequest:
-      type: object
-      properties:
-        tool_name:
-          type: string
-          description: The name of the tool to invoke.
-        kwargs:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool.
-      additionalProperties: false
-      required:
-        - tool_name
-        - kwargs
-      title: InvokeToolRequest
-    ToolInvocationResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The output content from the tool execution
-        error_message:
-          type: string
-          description: >-
-            (Optional) Error message if the tool execution failed
-        error_code:
-          type: integer
-          description: >-
-            (Optional) Numeric error code if the tool execution failed
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Additional metadata about the tool execution
-      additionalProperties: false
-      title: ToolInvocationResult
-      description: Result of a tool invocation.
-    PaginatedResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The list of items for the current page
-        has_more:
-          type: boolean
-          description: >-
-            Whether there are more items available after this set
-        url:
-          type: string
-          description: The URL for accessing this list
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-      title: PaginatedResponse
-      description: >-
-        A generic paginated response that follows a simple format.
-    Job:
-      type: object
-      properties:
-        job_id:
-          type: string
-          description: Unique identifier for the job
-        status:
-          type: string
-          enum:
-            - completed
-            - in_progress
-            - failed
-            - scheduled
-            - cancelled
-          description: Current execution status of the job
-      additionalProperties: false
-      required:
-        - job_id
-        - status
-      title: Job
-      description: >-
-        A job execution instance with status tracking.
-    ListBenchmarksResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Benchmark'
-      additionalProperties: false
-      required:
-        - data
-      title: ListBenchmarksResponse
-    Order:
-      type: string
-      enum:
-        - asc
-        - desc
-      title: Order
-      description: Sort order for paginated responses.
-    ListOpenAIChatCompletionResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            type: object
-            properties:
-              id:
-                type: string
-                description: The ID of the chat completion
-              choices:
-                type: array
-                items:
-                  $ref: '#/components/schemas/OpenAIChoice'
-                description: List of choices
-              object:
-                type: string
-                const: chat.completion
-                default: chat.completion
-                description: >-
-                  The object type, which will be "chat.completion"
-              created:
-                type: integer
-                description: >-
-                  The Unix timestamp in seconds when the chat completion was created
-              model:
-                type: string
-                description: >-
-                  The model that was used to generate the chat completion
-              input_messages:
-                type: array
-                items:
-                  $ref: '#/components/schemas/OpenAIMessageParam'
-            additionalProperties: false
-            required:
-              - id
-              - choices
-              - object
-              - created
-              - model
-              - input_messages
-            title: OpenAICompletionWithInputMessages
-          description: >-
-            List of chat completion objects with their input messages
-        has_more:
-          type: boolean
-          description: >-
-            Whether there are more completions available beyond this list
-        first_id:
-          type: string
-          description: ID of the first completion in this list
-        last_id:
-          type: string
-          description: ID of the last completion in this list
-        object:
-          type: string
-          const: list
-          default: list
-          description: >-
-            Must be "list" to identify this as a list response
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIChatCompletionResponse
-      description: >-
-        Response from listing OpenAI-compatible chat completions.
-    ListDatasetsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Dataset'
-          description: List of datasets
-      additionalProperties: false
-      required:
-        - data
-      title: ListDatasetsResponse
-      description: Response from listing datasets.
-    ListModelsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Model'
-      additionalProperties: false
-      required:
-        - data
-      title: ListModelsResponse
-    ListOpenAIResponseInputItem:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseInput'
-          description: List of input items
-        object:
-          type: string
-          const: list
-          default: list
-          description: Object type identifier, always "list"
-      additionalProperties: false
-      required:
-        - data
-        - object
-      title: ListOpenAIResponseInputItem
-      description: >-
-        List container for OpenAI response input items.
-    ListOpenAIResponseObject:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
-          description: >-
-            List of response objects with their input context
-        has_more:
-          type: boolean
-          description: >-
-            Whether there are more results available beyond this page
-        first_id:
-          type: string
-          description: >-
-            Identifier of the first item in this page
-        last_id:
-          type: string
-          description: Identifier of the last item in this page
-        object:
-          type: string
-          const: list
-          default: list
-          description: Object type identifier, always "list"
-      additionalProperties: false
-      required:
-        - data
-        - has_more
-        - first_id
-        - last_id
-        - object
-      title: ListOpenAIResponseObject
-      description: >-
-        Paginated list of OpenAI response objects with navigation metadata.
-    OpenAIResponseObjectWithInput:
-      type: object
-      properties:
-        created_at:
-          type: integer
-          description: >-
-            Unix timestamp when the response was created
-        error:
-          $ref: '#/components/schemas/OpenAIResponseError'
-          description: >-
-            (Optional) Error details if the response generation failed
-        id:
-          type: string
-          description: Unique identifier for this response
-        model:
-          type: string
-          description: Model identifier used for generation
-        object:
-          type: string
-          const: response
-          default: response
-          description: >-
-            Object type identifier, always "response"
-        output:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseOutput'
-          description: >-
-            List of generated output items (messages, tool calls, etc.)
-        parallel_tool_calls:
-          type: boolean
-          default: false
-          description: >-
-            Whether tool calls can be executed in parallel
-        previous_response_id:
-          type: string
-          description: >-
-            (Optional) ID of the previous response in a conversation
-        status:
-          type: string
-          description: >-
-            Current status of the response generation
-        temperature:
-          type: number
-          description: >-
-            (Optional) Sampling temperature used for generation
-        text:
-          $ref: '#/components/schemas/OpenAIResponseText'
-          description: >-
-            Text formatting configuration for the response
-        top_p:
-          type: number
-          description: >-
-            (Optional) Nucleus sampling parameter used for generation
-        truncation:
-          type: string
-          description: >-
-            (Optional) Truncation strategy applied to the response
-        user:
-          type: string
-          description: >-
-            (Optional) User identifier associated with the request
-        input:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIResponseInput'
-          description: >-
-            List of input items that led to this response
-      additionalProperties: false
-      required:
-        - created_at
-        - id
-        - model
-        - object
-        - output
-        - parallel_tool_calls
-        - status
-        - text
-        - input
-      title: OpenAIResponseObjectWithInput
-      description: >-
-        OpenAI response object extended with input context information.
-    ListPromptsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Prompt'
-      additionalProperties: false
-      required:
-        - data
-      title: ListPromptsResponse
-      description: Response model to list prompts.
-    ListProvidersResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ProviderInfo'
-          description: List of provider information objects
-      additionalProperties: false
-      required:
-        - data
-      title: ListProvidersResponse
-      description: >-
-        Response containing a list of all available providers.
-    RouteInfo:
-      type: object
-      properties:
-        route:
-          type: string
-          description: The API endpoint path
-        method:
-          type: string
-          description: HTTP method for the route
-        provider_types:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of provider types that implement this route
-      additionalProperties: false
-      required:
-        - route
-        - method
-        - provider_types
-      title: RouteInfo
-      description: >-
-        Information about an API route including its path, method, and implementing
-        providers.
-    ListRoutesResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/RouteInfo'
-          description: >-
-            List of available route information objects
-      additionalProperties: false
-      required:
-        - data
-      title: ListRoutesResponse
-      description: >-
-        Response containing a list of all available API routes.
-    ListToolDefsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolDef'
-          description: List of tool definitions
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolDefsResponse
-      description: >-
-        Response containing a list of tool definitions.
-    ListScoringFunctionsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ScoringFn'
-      additionalProperties: false
-      required:
-        - data
-      title: ListScoringFunctionsResponse
-    ListShieldsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Shield'
-      additionalProperties: false
-      required:
-        - data
-      title: ListShieldsResponse
-    ListToolGroupsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolGroup'
-          description: List of tool groups
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolGroupsResponse
-      description: >-
-        Response containing a list of tool groups.
-    ListToolsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Tool'
-          description: List of tools
-      additionalProperties: false
-      required:
-        - data
-      title: ListToolsResponse
-      description: Response containing a list of tools.
-    ListVectorDBsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/VectorDB'
-          description: List of vector databases
-      additionalProperties: false
-      required:
-        - data
-      title: ListVectorDBsResponse
-      description: Response from listing vector databases.
-    Event:
-      oneOf:
-        - $ref: '#/components/schemas/UnstructuredLogEvent'
-        - $ref: '#/components/schemas/MetricEvent'
-        - $ref: '#/components/schemas/StructuredLogEvent'
-      discriminator:
-        propertyName: type
-        mapping:
-          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
-          metric: '#/components/schemas/MetricEvent'
-          structured_log: '#/components/schemas/StructuredLogEvent'
-    EventType:
-      type: string
-      enum:
-        - unstructured_log
-        - structured_log
-        - metric
-      title: EventType
-      description: >-
-        The type of telemetry event being logged.
-    LogSeverity:
-      type: string
-      enum:
-        - verbose
-        - debug
-        - info
-        - warn
-        - error
-        - critical
-      title: LogSeverity
-      description: The severity level of a log message.
-    MetricEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: metric
-          default: metric
-          description: Event type identifier set to METRIC
-        metric:
-          type: string
-          description: The name of the metric being measured
-        value:
-          oneOf:
-            - type: integer
-            - type: number
-          description: >-
-            The numeric value of the metric measurement
-        unit:
-          type: string
-          description: >-
-            The unit of measurement for the metric value
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - metric
-        - value
-        - unit
-      title: MetricEvent
-      description: >-
-        A metric event containing a measured value.
-    SpanEndPayload:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/StructuredLogType'
-          const: span_end
-          default: span_end
-          description: Payload type identifier set to SPAN_END
-        status:
-          $ref: '#/components/schemas/SpanStatus'
-          description: >-
-            The final status of the span indicating success or failure
-      additionalProperties: false
-      required:
-        - type
-        - status
-      title: SpanEndPayload
-      description: Payload for a span end event.
-    SpanStartPayload:
-      type: object
-      properties:
-        type:
-          $ref: '#/components/schemas/StructuredLogType'
-          const: span_start
-          default: span_start
-          description: >-
-            Payload type identifier set to SPAN_START
-        name:
-          type: string
-          description: >-
-            Human-readable name describing the operation this span represents
-        parent_span_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the parent span, if this is a child span
-      additionalProperties: false
-      required:
-        - type
-        - name
-      title: SpanStartPayload
-      description: Payload for a span start event.
-    StructuredLogEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: structured_log
-          default: structured_log
-          description: >-
-            Event type identifier set to STRUCTURED_LOG
-        payload:
-          $ref: '#/components/schemas/StructuredLogPayload'
-          description: >-
-            The structured payload data for the log event
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - payload
-      title: StructuredLogEvent
-      description: >-
-        A structured log event containing typed payload data.
-    StructuredLogPayload:
-      oneOf:
-        - $ref: '#/components/schemas/SpanStartPayload'
-        - $ref: '#/components/schemas/SpanEndPayload'
-      discriminator:
-        propertyName: type
-        mapping:
-          span_start: '#/components/schemas/SpanStartPayload'
-          span_end: '#/components/schemas/SpanEndPayload'
-    StructuredLogType:
-      type: string
-      enum:
-        - span_start
-        - span_end
-      title: StructuredLogType
-      description: >-
-        The type of structured log event payload.
-    UnstructuredLogEvent:
-      type: object
-      properties:
-        trace_id:
-          type: string
-          description: >-
-            Unique identifier for the trace this event belongs to
-        span_id:
-          type: string
-          description: >-
-            Unique identifier for the span this event belongs to
-        timestamp:
-          type: string
-          format: date-time
-          description: Timestamp when the event occurred
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: string
-              - type: integer
-              - type: number
-              - type: boolean
-              - type: 'null'
-          description: >-
-            (Optional) Key-value pairs containing additional metadata about the event
-        type:
-          $ref: '#/components/schemas/EventType'
-          const: unstructured_log
-          default: unstructured_log
-          description: >-
-            Event type identifier set to UNSTRUCTURED_LOG
-        message:
-          type: string
-          description: The log message text
-        severity:
-          $ref: '#/components/schemas/LogSeverity'
-          description: The severity level of the log message
-      additionalProperties: false
-      required:
-        - trace_id
-        - span_id
-        - timestamp
-        - type
-        - message
-        - severity
-      title: UnstructuredLogEvent
-      description: >-
-        An unstructured log event containing a simple text message.
-    LogEventRequest:
-      type: object
-      properties:
-        event:
-          $ref: '#/components/schemas/Event'
-          description: The event to log.
-        ttl_seconds:
-          type: integer
-          description: The time to live of the event.
-      additionalProperties: false
-      required:
-        - event
-        - ttl_seconds
-      title: LogEventRequest
-    VectorStoreChunkingStrategy:
-      oneOf:
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-      discriminator:
-        propertyName: type
-        mapping:
-          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
-          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
-    VectorStoreChunkingStrategyAuto:
-      type: object
-      properties:
-        type:
-          type: string
-          const: auto
-          default: auto
-          description: >-
-            Strategy type, always "auto" for automatic chunking
-      additionalProperties: false
-      required:
-        - type
-      title: VectorStoreChunkingStrategyAuto
-      description: >-
-        Automatic chunking strategy for vector store files.
-    VectorStoreChunkingStrategyStatic:
-      type: object
-      properties:
-        type:
-          type: string
-          const: static
-          default: static
-          description: >-
-            Strategy type, always "static" for static chunking
-        static:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
-          description: >-
-            Configuration parameters for the static chunking strategy
-      additionalProperties: false
-      required:
-        - type
-        - static
-      title: VectorStoreChunkingStrategyStatic
-      description: >-
-        Static chunking strategy with configurable parameters.
-    VectorStoreChunkingStrategyStaticConfig:
-      type: object
-      properties:
-        chunk_overlap_tokens:
-          type: integer
-          default: 400
-          description: >-
-            Number of tokens to overlap between adjacent chunks
-        max_chunk_size_tokens:
-          type: integer
-          default: 800
-          description: >-
-            Maximum number of tokens per chunk, must be between 100 and 4096
-      additionalProperties: false
-      required:
-        - chunk_overlap_tokens
-        - max_chunk_size_tokens
-      title: VectorStoreChunkingStrategyStaticConfig
-      description: >-
-        Configuration for static chunking strategy.
-    OpenaiAttachFileToVectorStoreRequest:
-      type: object
-      properties:
-        file_id:
-          type: string
-          description: >-
-            The ID of the file to attach to the vector store.
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The key-value attributes stored with the file, which can be used for filtering.
-        chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            The chunking strategy to use for the file.
-      additionalProperties: false
-      required:
-        - file_id
-      title: OpenaiAttachFileToVectorStoreRequest
-    VectorStoreFileLastError:
-      type: object
-      properties:
-        code:
-          oneOf:
-            - type: string
-              const: server_error
-            - type: string
-              const: rate_limit_exceeded
-          description: >-
-            Error code indicating the type of failure
-        message:
-          type: string
-          description: >-
-            Human-readable error message describing the failure
-      additionalProperties: false
-      required:
-        - code
-        - message
-      title: VectorStoreFileLastError
-      description: >-
-        Error information for failed vector store file processing.
-    VectorStoreFileObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: Unique identifier for the file
-        object:
-          type: string
-          default: vector_store.file
-          description: >-
-            Object type identifier, always "vector_store.file"
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Key-value attributes associated with the file
-        chunking_strategy:
-          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
-          description: >-
-            Strategy used for splitting the file into chunks
-        created_at:
-          type: integer
-          description: >-
-            Timestamp when the file was added to the vector store
-        last_error:
-          $ref: '#/components/schemas/VectorStoreFileLastError'
-          description: >-
-            (Optional) Error information if file processing failed
-        status:
-          $ref: '#/components/schemas/VectorStoreFileStatus'
-          description: Current processing status of the file
-        usage_bytes:
-          type: integer
-          default: 0
-          description: Storage space used by this file in bytes
-        vector_store_id:
-          type: string
-          description: >-
-            ID of the vector store containing this file
-      additionalProperties: false
-      required:
-        - id
-        - object
-        - attributes
-        - chunking_strategy
-        - created_at
-        - status
-        - usage_bytes
-        - vector_store_id
-      title: VectorStoreFileObject
-      description: OpenAI Vector Store File object.
-    VectorStoreFileStatus:
-      oneOf:
-        - type: string
-          const: completed
-        - type: string
-          const: in_progress
-        - type: string
-          const: cancelled
-        - type: string
-          const: failed
->>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
     OpenAIJSONSchema:
       type: object
       properties:
@@ -7282,6 +5158,7 @@ components:
       enum:
         - llm
         - embedding
+        - rerank
       title: ModelType
       description: >-
         Enumeration of supported model types in Llama Stack.
@@ -11706,1606 +9583,6 @@ components:
       title: VectorStoreSearchResponsePage
       description: >-
         Paginated response from searching a vector store.
-<<<<<<< HEAD
-=======
-    OpenaiUpdateVectorStoreRequest:
-      type: object
-      properties:
-        name:
-          type: string
-          description: The name of the vector store.
-        expires_after:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The expiration policy for a vector store.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Set of 16 key-value pairs that can be attached to an object.
-      additionalProperties: false
-      title: OpenaiUpdateVectorStoreRequest
-    OpenaiUpdateVectorStoreFileRequest:
-      type: object
-      properties:
-        attributes:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The updated key-value attributes to store with the file.
-      additionalProperties: false
-      required:
-        - attributes
-      title: OpenaiUpdateVectorStoreFileRequest
-    DPOAlignmentConfig:
-      type: object
-      properties:
-        beta:
-          type: number
-          description: Temperature parameter for the DPO loss
-        loss_type:
-          $ref: '#/components/schemas/DPOLossType'
-          default: sigmoid
-          description: The type of loss function to use for DPO
-      additionalProperties: false
-      required:
-        - beta
-        - loss_type
-      title: DPOAlignmentConfig
-      description: >-
-        Configuration for Direct Preference Optimization (DPO) alignment.
-    DPOLossType:
-      type: string
-      enum:
-        - sigmoid
-        - hinge
-        - ipo
-        - kto_pair
-      title: DPOLossType
-    DataConfig:
-      type: object
-      properties:
-        dataset_id:
-          type: string
-          description: >-
-            Unique identifier for the training dataset
-        batch_size:
-          type: integer
-          description: Number of samples per training batch
-        shuffle:
-          type: boolean
-          description: >-
-            Whether to shuffle the dataset during training
-        data_format:
-          $ref: '#/components/schemas/DatasetFormat'
-          description: >-
-            Format of the dataset (instruct or dialog)
-        validation_dataset_id:
-          type: string
-          description: >-
-            (Optional) Unique identifier for the validation dataset
-        packed:
-          type: boolean
-          default: false
-          description: >-
-            (Optional) Whether to pack multiple samples into a single sequence for
-            efficiency
-        train_on_input:
-          type: boolean
-          default: false
-          description: >-
-            (Optional) Whether to compute loss on input tokens as well as output tokens
-      additionalProperties: false
-      required:
-        - dataset_id
-        - batch_size
-        - shuffle
-        - data_format
-      title: DataConfig
-      description: >-
-        Configuration for training data and data loading.
-    DatasetFormat:
-      type: string
-      enum:
-        - instruct
-        - dialog
-      title: DatasetFormat
-      description: Format of the training dataset.
-    EfficiencyConfig:
-      type: object
-      properties:
-        enable_activation_checkpointing:
-          type: boolean
-          default: false
-          description: >-
-            (Optional) Whether to use activation checkpointing to reduce memory usage
-        enable_activation_offloading:
-          type: boolean
-          default: false
-          description: >-
-            (Optional) Whether to offload activations to CPU to save GPU memory
-        memory_efficient_fsdp_wrap:
-          type: boolean
-          default: false
-          description: >-
-            (Optional) Whether to use memory-efficient FSDP wrapping
-        fsdp_cpu_offload:
-          type: boolean
-          default: false
-          description: >-
-            (Optional) Whether to offload FSDP parameters to CPU
-      additionalProperties: false
-      title: EfficiencyConfig
-      description: >-
-        Configuration for memory and compute efficiency optimizations.
-    OptimizerConfig:
-      type: object
-      properties:
-        optimizer_type:
-          $ref: '#/components/schemas/OptimizerType'
-          description: >-
-            Type of optimizer to use (adam, adamw, or sgd)
-        lr:
-          type: number
-          description: Learning rate for the optimizer
-        weight_decay:
-          type: number
-          description: >-
-            Weight decay coefficient for regularization
-        num_warmup_steps:
-          type: integer
-          description: Number of steps for learning rate warmup
-      additionalProperties: false
-      required:
-        - optimizer_type
-        - lr
-        - weight_decay
-        - num_warmup_steps
-      title: OptimizerConfig
-      description: >-
-        Configuration parameters for the optimization algorithm.
-    OptimizerType:
-      type: string
-      enum:
-        - adam
-        - adamw
-        - sgd
-      title: OptimizerType
-      description: >-
-        Available optimizer algorithms for training.
-    TrainingConfig:
-      type: object
-      properties:
-        n_epochs:
-          type: integer
-          description: Number of training epochs to run
-        max_steps_per_epoch:
-          type: integer
-          default: 1
-          description: Maximum number of steps to run per epoch
-        gradient_accumulation_steps:
-          type: integer
-          default: 1
-          description: >-
-            Number of steps to accumulate gradients before updating
-        max_validation_steps:
-          type: integer
-          default: 1
-          description: >-
-            (Optional) Maximum number of validation steps per epoch
-        data_config:
-          $ref: '#/components/schemas/DataConfig'
-          description: >-
-            (Optional) Configuration for data loading and formatting
-        optimizer_config:
-          $ref: '#/components/schemas/OptimizerConfig'
-          description: >-
-            (Optional) Configuration for the optimization algorithm
-        efficiency_config:
-          $ref: '#/components/schemas/EfficiencyConfig'
-          description: >-
-            (Optional) Configuration for memory and compute optimizations
-        dtype:
-          type: string
-          default: bf16
-          description: >-
-            (Optional) Data type for model parameters (bf16, fp16, fp32)
-      additionalProperties: false
-      required:
-        - n_epochs
-        - max_steps_per_epoch
-        - gradient_accumulation_steps
-      title: TrainingConfig
-      description: >-
-        Comprehensive configuration for the training process.
-    PreferenceOptimizeRequest:
-      type: object
-      properties:
-        job_uuid:
-          type: string
-          description: The UUID of the job to create.
-        finetuned_model:
-          type: string
-          description: The model to fine-tune.
-        algorithm_config:
-          $ref: '#/components/schemas/DPOAlignmentConfig'
-          description: The algorithm configuration.
-        training_config:
-          $ref: '#/components/schemas/TrainingConfig'
-          description: The training configuration.
-        hyperparam_search_config:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The hyperparam search configuration.
-        logger_config:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The logger configuration.
-      additionalProperties: false
-      required:
-        - job_uuid
-        - finetuned_model
-        - algorithm_config
-        - training_config
-        - hyperparam_search_config
-        - logger_config
-      title: PreferenceOptimizeRequest
-    PostTrainingJob:
-      type: object
-      properties:
-        job_uuid:
-          type: string
-      additionalProperties: false
-      required:
-        - job_uuid
-      title: PostTrainingJob
-    DefaultRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: default
-          default: default
-          description: >-
-            Type of query generator, always 'default'
-        separator:
-          type: string
-          default: ' '
-          description: >-
-            String separator used to join query terms
-      additionalProperties: false
-      required:
-        - type
-        - separator
-      title: DefaultRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the default RAG query generator.
-    LLMRAGQueryGeneratorConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: llm
-          default: llm
-          description: Type of query generator, always 'llm'
-        model:
-          type: string
-          description: >-
-            Name of the language model to use for query generation
-        template:
-          type: string
-          description: >-
-            Template string for formatting the query generation prompt
-      additionalProperties: false
-      required:
-        - type
-        - model
-        - template
-      title: LLMRAGQueryGeneratorConfig
-      description: >-
-        Configuration for the LLM-based RAG query generator.
-    RAGQueryConfig:
-      type: object
-      properties:
-        query_generator_config:
-          $ref: '#/components/schemas/RAGQueryGeneratorConfig'
-          description: Configuration for the query generator.
-        max_tokens_in_context:
-          type: integer
-          default: 4096
-          description: Maximum number of tokens in the context.
-        max_chunks:
-          type: integer
-          default: 5
-          description: Maximum number of chunks to retrieve.
-        chunk_template:
-          type: string
-          default: >
-            Result {index}
-
-            Content: {chunk.content}
-
-            Metadata: {metadata}
-          description: >-
-            Template for formatting each retrieved chunk in the context. Available
-            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
-            {chunk.content}\nMetadata: {metadata}\n"
-        mode:
-          $ref: '#/components/schemas/RAGSearchMode'
-          default: vector
-          description: >-
-            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
-            "vector".
-        ranker:
-          $ref: '#/components/schemas/Ranker'
-          description: >-
-            Configuration for the ranker to use in hybrid search. Defaults to RRF
-            ranker.
-      additionalProperties: false
-      required:
-        - query_generator_config
-        - max_tokens_in_context
-        - max_chunks
-        - chunk_template
-      title: RAGQueryConfig
-      description: >-
-        Configuration for the RAG query generation.
-    RAGQueryGeneratorConfig:
-      oneOf:
-        - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-        - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-      discriminator:
-        propertyName: type
-        mapping:
-          default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
-          llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
-    RAGSearchMode:
-      type: string
-      enum:
-        - vector
-        - keyword
-        - hybrid
-      title: RAGSearchMode
-      description: >-
-        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
-        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
-        - HYBRID: Combines both vector and keyword search for better results
-    RRFRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: rrf
-          default: rrf
-          description: The type of ranker, always "rrf"
-        impact_factor:
-          type: number
-          default: 60.0
-          description: >-
-            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
-            results. Must be greater than 0
-      additionalProperties: false
-      required:
-        - type
-        - impact_factor
-      title: RRFRanker
-      description: >-
-        Reciprocal Rank Fusion (RRF) ranker configuration.
-    Ranker:
-      oneOf:
-        - $ref: '#/components/schemas/RRFRanker'
-        - $ref: '#/components/schemas/WeightedRanker'
-      discriminator:
-        propertyName: type
-        mapping:
-          rrf: '#/components/schemas/RRFRanker'
-          weighted: '#/components/schemas/WeightedRanker'
-    WeightedRanker:
-      type: object
-      properties:
-        type:
-          type: string
-          const: weighted
-          default: weighted
-          description: The type of ranker, always "weighted"
-        alpha:
-          type: number
-          default: 0.5
-          description: >-
-            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
-            only use vector scores, values in between blend both scores.
-      additionalProperties: false
-      required:
-        - type
-        - alpha
-      title: WeightedRanker
-      description: >-
-        Weighted ranker configuration that combines vector and keyword scores.
-    QueryRequest:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            The query content to search for in the indexed documents
-        vector_db_ids:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of vector database IDs to search within
-        query_config:
-          $ref: '#/components/schemas/RAGQueryConfig'
-          description: >-
-            (Optional) Configuration parameters for the query operation
-      additionalProperties: false
-      required:
-        - content
-        - vector_db_ids
-      title: QueryRequest
-    RAGQueryResult:
-      type: object
-      properties:
-        content:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: >-
-            (Optional) The retrieved content from the query
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            Additional metadata about the query result
-      additionalProperties: false
-      required:
-        - metadata
-      title: RAGQueryResult
-      description: >-
-        Result of a RAG query containing retrieved content and metadata.
-    QueryChunksRequest:
-      type: object
-      properties:
-        vector_db_id:
-          type: string
-          description: >-
-            The identifier of the vector database to query.
-        query:
-          $ref: '#/components/schemas/InterleavedContent'
-          description: The query to search for.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the query.
-      additionalProperties: false
-      required:
-        - vector_db_id
-        - query
-      title: QueryChunksRequest
-    QueryChunksResponse:
-      type: object
-      properties:
-        chunks:
-          type: array
-          items:
-            $ref: '#/components/schemas/Chunk'
-          description: >-
-            List of content chunks returned from the query
-        scores:
-          type: array
-          items:
-            type: number
-          description: >-
-            Relevance scores corresponding to each returned chunk
-      additionalProperties: false
-      required:
-        - chunks
-        - scores
-      title: QueryChunksResponse
-      description: >-
-        Response from querying chunks in a vector database.
-    QueryMetricsRequest:
-      type: object
-      properties:
-        start_time:
-          type: integer
-          description: The start time of the metric to query.
-        end_time:
-          type: integer
-          description: The end time of the metric to query.
-        granularity:
-          type: string
-          description: The granularity of the metric to query.
-        query_type:
-          type: string
-          enum:
-            - range
-            - instant
-          description: The type of query to perform.
-        label_matchers:
-          type: array
-          items:
-            type: object
-            properties:
-              name:
-                type: string
-                description: The name of the label to match
-              value:
-                type: string
-                description: The value to match against
-              operator:
-                type: string
-                enum:
-                  - '='
-                  - '!='
-                  - =~
-                  - '!~'
-                description: >-
-                  The comparison operator to use for matching
-                default: '='
-            additionalProperties: false
-            required:
-              - name
-              - value
-              - operator
-            title: MetricLabelMatcher
-            description: >-
-              A matcher for filtering metrics by label values.
-          description: >-
-            The label matchers to apply to the metric.
-      additionalProperties: false
-      required:
-        - start_time
-        - query_type
-      title: QueryMetricsRequest
-    MetricDataPoint:
-      type: object
-      properties:
-        timestamp:
-          type: integer
-          description: >-
-            Unix timestamp when the metric value was recorded
-        value:
-          type: number
-          description: >-
-            The numeric value of the metric at this timestamp
-        unit:
-          type: string
-      additionalProperties: false
-      required:
-        - timestamp
-        - value
-        - unit
-      title: MetricDataPoint
-      description: >-
-        A single data point in a metric time series.
-    MetricLabel:
-      type: object
-      properties:
-        name:
-          type: string
-          description: The name of the label
-        value:
-          type: string
-          description: The value of the label
-      additionalProperties: false
-      required:
-        - name
-        - value
-      title: MetricLabel
-      description: A label associated with a metric.
-    MetricSeries:
-      type: object
-      properties:
-        metric:
-          type: string
-          description: The name of the metric
-        labels:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricLabel'
-          description: >-
-            List of labels associated with this metric series
-        values:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricDataPoint'
-          description: >-
-            List of data points in chronological order
-      additionalProperties: false
-      required:
-        - metric
-        - labels
-        - values
-      title: MetricSeries
-      description: A time series of metric data points.
-    QueryMetricsResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/MetricSeries'
-          description: >-
-            List of metric series matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QueryMetricsResponse
-      description: >-
-        Response containing metric time series data.
-    QueryCondition:
-      type: object
-      properties:
-        key:
-          type: string
-          description: The attribute key to filter on
-        op:
-          $ref: '#/components/schemas/QueryConditionOp'
-          description: The comparison operator to apply
-        value:
-          oneOf:
-            - type: 'null'
-            - type: boolean
-            - type: number
-            - type: string
-            - type: array
-            - type: object
-          description: The value to compare against
-      additionalProperties: false
-      required:
-        - key
-        - op
-        - value
-      title: QueryCondition
-      description: A condition for filtering query results.
-    QueryConditionOp:
-      type: string
-      enum:
-        - eq
-        - ne
-        - gt
-        - lt
-      title: QueryConditionOp
-      description: >-
-        Comparison operators for query conditions.
-    QuerySpansRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the spans.
-        attributes_to_return:
-          type: array
-          items:
-            type: string
-          description: The attributes to return in the spans.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      required:
-        - attribute_filters
-        - attributes_to_return
-      title: QuerySpansRequest
-    QuerySpansResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Span'
-          description: >-
-            List of spans matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QuerySpansResponse
-      description: Response containing a list of spans.
-    QueryTracesRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the traces.
-        limit:
-          type: integer
-          description: The limit of traces to return.
-        offset:
-          type: integer
-          description: The offset of the traces to return.
-        order_by:
-          type: array
-          items:
-            type: string
-          description: The order by of the traces to return.
-      additionalProperties: false
-      title: QueryTracesRequest
-    QueryTracesResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/Trace'
-          description: >-
-            List of traces matching the query criteria
-      additionalProperties: false
-      required:
-        - data
-      title: QueryTracesResponse
-      description: Response containing a list of traces.
-    RegisterBenchmarkRequest:
-      type: object
-      properties:
-        benchmark_id:
-          type: string
-          description: The ID of the benchmark to register.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to use for the benchmark.
-        scoring_functions:
-          type: array
-          items:
-            type: string
-          description: >-
-            The scoring functions to use for the benchmark.
-        provider_benchmark_id:
-          type: string
-          description: >-
-            The ID of the provider benchmark to use for the benchmark.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the benchmark.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The metadata to use for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_id
-        - dataset_id
-        - scoring_functions
-      title: RegisterBenchmarkRequest
-    RegisterDatasetRequest:
-      type: object
-      properties:
-        purpose:
-          type: string
-          enum:
-            - post-training/messages
-            - eval/question-answer
-            - eval/messages-answer
-          description: >-
-            The purpose of the dataset. One of: - "post-training/messages": The dataset
-            contains a messages column with list of messages for post-training. {
-            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
-            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
-            contains a question column and an answer column for evaluation. { "question":
-            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
-            The dataset contains a messages column with list of messages and an answer
-            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
-            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
-            Doe. How can I help you today?"}, {"role": "user", "content": "What's
-            my name?"}, ], "answer": "John Doe" }
-        source:
-          $ref: '#/components/schemas/DataSource'
-          description: >-
-            The data source of the dataset. Ensure that the data source schema is
-            compatible with the purpose of the dataset. Examples: - { "type": "uri",
-            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
-            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
-            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
-            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
-            } ] }
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            The metadata for the dataset. - E.g. {"description": "My dataset"}.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset. If not provided, an ID will be generated.
-      additionalProperties: false
-      required:
-        - purpose
-        - source
-      title: RegisterDatasetRequest
-    RegisterModelRequest:
-      type: object
-      properties:
-        model_id:
-          type: string
-          description: The identifier of the model to register.
-        provider_model_id:
-          type: string
-          description: >-
-            The identifier of the model in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: Any additional metadata for this model.
-        model_type:
-          $ref: '#/components/schemas/ModelType'
-          description: The type of model to register.
-      additionalProperties: false
-      required:
-        - model_id
-      title: RegisterModelRequest
-    RegisterScoringFunctionRequest:
-      type: object
-      properties:
-        scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the scoring function to register.
-        description:
-          type: string
-          description: The description of the scoring function.
-        return_type:
-          $ref: '#/components/schemas/ParamType'
-          description: The return type of the scoring function.
-        provider_scoring_fn_id:
-          type: string
-          description: >-
-            The ID of the provider scoring function to use for the scoring function.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the scoring function.
-        params:
-          $ref: '#/components/schemas/ScoringFnParams'
-          description: >-
-            The parameters for the scoring function for benchmark eval, these can
-            be overridden for app eval.
-      additionalProperties: false
-      required:
-        - scoring_fn_id
-        - description
-        - return_type
-      title: RegisterScoringFunctionRequest
-    RegisterShieldRequest:
-      type: object
-      properties:
-        shield_id:
-          type: string
-          description: >-
-            The identifier of the shield to register.
-        provider_shield_id:
-          type: string
-          description: >-
-            The identifier of the shield in the provider.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-      title: RegisterShieldRequest
-    RegisterToolGroupRequest:
-      type: object
-      properties:
-        toolgroup_id:
-          type: string
-          description: The ID of the tool group to register.
-        provider_id:
-          type: string
-          description: >-
-            The ID of the provider to use for the tool group.
-        mcp_endpoint:
-          $ref: '#/components/schemas/URL'
-          description: >-
-            The MCP endpoint to use for the tool group.
-        args:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            A dictionary of arguments to pass to the tool group.
-      additionalProperties: false
-      required:
-        - toolgroup_id
-        - provider_id
-      title: RegisterToolGroupRequest
-    RegisterVectorDbRequest:
-      type: object
-      properties:
-        vector_db_id:
-          type: string
-          description: >-
-            The identifier of the vector database to register.
-        embedding_model:
-          type: string
-          description: The embedding model to use.
-        embedding_dimension:
-          type: integer
-          description: The dimension of the embedding model.
-        provider_id:
-          type: string
-          description: The identifier of the provider.
-        vector_db_name:
-          type: string
-          description: The name of the vector database.
-        provider_vector_db_id:
-          type: string
-          description: >-
-            The identifier of the vector database in the provider.
-      additionalProperties: false
-      required:
-        - vector_db_id
-        - embedding_model
-      title: RegisterVectorDbRequest
-    RerankRequest:
-      type: object
-      properties:
-        model:
-          type: string
-          description: >-
-            The identifier of the reranking model to use. The model must be a reranking
-            model registered with Llama Stack and available via the /models endpoint.
-        query:
-          oneOf:
-            - type: string
-            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          description: >-
-            The search query to rank items against. Can be a string, text content
-            part, or image content part. The input must not exceed the model's max
-            input token length.
-        items:
-          type: array
-          items:
-            oneOf:
-              - type: string
-              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
-              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
-          description: >-
-            List of items to rerank. Each item can be a string, text content part,
-            or image content part. Each input must not exceed the model's max input
-            token length.
-        max_num_results:
-          type: integer
-          description: >-
-            (Optional) Maximum number of results to return. Default: returns all.
-      additionalProperties: false
-      required:
-        - model
-        - query
-        - items
-      title: RerankRequest
-    RerankData:
-      type: object
-      properties:
-        index:
-          type: integer
-          description: >-
-            The original index of the document in the input list
-        relevance_score:
-          type: number
-          description: >-
-            The relevance score from the model output. Values are inverted when applicable
-            so that higher scores indicate greater relevance.
-      additionalProperties: false
-      required:
-        - index
-        - relevance_score
-      title: RerankData
-      description: >-
-        A single rerank result from a reranking response.
-    RerankResponse:
-      type: object
-      properties:
-        data:
-          type: array
-          items:
-            $ref: '#/components/schemas/RerankData'
-          description: >-
-            List of rerank result objects, sorted by relevance score (descending)
-      additionalProperties: false
-      required:
-        - data
-      title: RerankResponse
-      description: Response from a reranking request.
-    ResumeAgentTurnRequest:
-      type: object
-      properties:
-        tool_responses:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolResponse'
-          description: >-
-            The tool call responses to resume the turn with.
-        stream:
-          type: boolean
-          description: Whether to stream the response.
-      additionalProperties: false
-      required:
-        - tool_responses
-      title: ResumeAgentTurnRequest
-    RunEvalRequest:
-      type: object
-      properties:
-        benchmark_config:
-          $ref: '#/components/schemas/BenchmarkConfig'
-          description: The configuration for the benchmark.
-      additionalProperties: false
-      required:
-        - benchmark_config
-      title: RunEvalRequest
-    RunModerationRequest:
-      type: object
-      properties:
-        input:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: >-
-            Input (or inputs) to classify. Can be a single string, an array of strings,
-            or an array of multi-modal input objects similar to other models.
-        model:
-          type: string
-          description: >-
-            The content moderation model you would like to use.
-      additionalProperties: false
-      required:
-        - input
-        - model
-      title: RunModerationRequest
-    ModerationObject:
-      type: object
-      properties:
-        id:
-          type: string
-          description: >-
-            The unique identifier for the moderation request.
-        model:
-          type: string
-          description: >-
-            The model used to generate the moderation results.
-        results:
-          type: array
-          items:
-            $ref: '#/components/schemas/ModerationObjectResults'
-          description: A list of moderation objects
-      additionalProperties: false
-      required:
-        - id
-        - model
-        - results
-      title: ModerationObject
-      description: A moderation object.
-    ModerationObjectResults:
-      type: object
-      properties:
-        flagged:
-          type: boolean
-          description: >-
-            Whether any of the below categories are flagged.
-        categories:
-          type: object
-          additionalProperties:
-            type: boolean
-          description: >-
-            A list of the categories, and whether they are flagged or not.
-        category_applied_input_types:
-          type: object
-          additionalProperties:
-            type: array
-            items:
-              type: string
-          description: >-
-            A list of the categories along with the input type(s) that the score applies
-            to.
-        category_scores:
-          type: object
-          additionalProperties:
-            type: number
-          description: >-
-            A list of the categories along with their scores as predicted by model.
-        user_message:
-          type: string
-        metadata:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-      additionalProperties: false
-      required:
-        - flagged
-        - metadata
-      title: ModerationObjectResults
-      description: A moderation object.
-    RunShieldRequest:
-      type: object
-      properties:
-        shield_id:
-          type: string
-          description: The identifier of the shield to run.
-        messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/Message'
-          description: The messages to run the shield on.
-        params:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The parameters of the shield.
-      additionalProperties: false
-      required:
-        - shield_id
-        - messages
-        - params
-      title: RunShieldRequest
-    RunShieldResponse:
-      type: object
-      properties:
-        violation:
-          $ref: '#/components/schemas/SafetyViolation'
-          description: >-
-            (Optional) Safety violation detected by the shield, if any
-      additionalProperties: false
-      title: RunShieldResponse
-      description: Response from running a safety shield.
-    SaveSpansToDatasetRequest:
-      type: object
-      properties:
-        attribute_filters:
-          type: array
-          items:
-            $ref: '#/components/schemas/QueryCondition'
-          description: >-
-            The attribute filters to apply to the spans.
-        attributes_to_save:
-          type: array
-          items:
-            type: string
-          description: The attributes to save to the dataset.
-        dataset_id:
-          type: string
-          description: >-
-            The ID of the dataset to save the spans to.
-        max_depth:
-          type: integer
-          description: The maximum depth of the tree.
-      additionalProperties: false
-      required:
-        - attribute_filters
-        - attributes_to_save
-        - dataset_id
-      title: SaveSpansToDatasetRequest
-    ScoreRequest:
-      type: object
-      properties:
-        input_rows:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: The rows to score.
-        scoring_functions:
-          type: object
-          additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
-      additionalProperties: false
-      required:
-        - input_rows
-        - scoring_functions
-      title: ScoreRequest
-    ScoreResponse:
-      type: object
-      properties:
-        results:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult.
-      additionalProperties: false
-      required:
-        - results
-      title: ScoreResponse
-      description: The response from scoring.
-    ScoreBatchRequest:
-      type: object
-      properties:
-        dataset_id:
-          type: string
-          description: The ID of the dataset to score.
-        scoring_functions:
-          type: object
-          additionalProperties:
-            oneOf:
-              - $ref: '#/components/schemas/ScoringFnParams'
-              - type: 'null'
-          description: >-
-            The scoring functions to use for the scoring.
-        save_results_dataset:
-          type: boolean
-          description: >-
-            Whether to save the results to a dataset.
-      additionalProperties: false
-      required:
-        - dataset_id
-        - scoring_functions
-        - save_results_dataset
-      title: ScoreBatchRequest
-    ScoreBatchResponse:
-      type: object
-      properties:
-        dataset_id:
-          type: string
-          description: >-
-            (Optional) The identifier of the dataset that was scored
-        results:
-          type: object
-          additionalProperties:
-            $ref: '#/components/schemas/ScoringResult'
-          description: >-
-            A map of scoring function name to ScoringResult
-      additionalProperties: false
-      required:
-        - results
-      title: ScoreBatchResponse
-      description: >-
-        Response from batch scoring operations on datasets.
-    SetDefaultVersionRequest:
-      type: object
-      properties:
-        version:
-          type: integer
-          description: The version to set as default.
-      additionalProperties: false
-      required:
-        - version
-      title: SetDefaultVersionRequest
-    AlgorithmConfig:
-      oneOf:
-        - $ref: '#/components/schemas/LoraFinetuningConfig'
-        - $ref: '#/components/schemas/QATFinetuningConfig'
-      discriminator:
-        propertyName: type
-        mapping:
-          LoRA: '#/components/schemas/LoraFinetuningConfig'
-          QAT: '#/components/schemas/QATFinetuningConfig'
-    LoraFinetuningConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: LoRA
-          default: LoRA
-          description: Algorithm type identifier, always "LoRA"
-        lora_attn_modules:
-          type: array
-          items:
-            type: string
-          description: >-
-            List of attention module names to apply LoRA to
-        apply_lora_to_mlp:
-          type: boolean
-          description: Whether to apply LoRA to MLP layers
-        apply_lora_to_output:
-          type: boolean
-          description: >-
-            Whether to apply LoRA to output projection layers
-        rank:
-          type: integer
-          description: >-
-            Rank of the LoRA adaptation (lower rank = fewer parameters)
-        alpha:
-          type: integer
-          description: >-
-            LoRA scaling parameter that controls adaptation strength
-        use_dora:
-          type: boolean
-          default: false
-          description: >-
-            (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
-        quantize_base:
-          type: boolean
-          default: false
-          description: >-
-            (Optional) Whether to quantize the base model weights
-      additionalProperties: false
-      required:
-        - type
-        - lora_attn_modules
-        - apply_lora_to_mlp
-        - apply_lora_to_output
-        - rank
-        - alpha
-      title: LoraFinetuningConfig
-      description: >-
-        Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
-    QATFinetuningConfig:
-      type: object
-      properties:
-        type:
-          type: string
-          const: QAT
-          default: QAT
-          description: Algorithm type identifier, always "QAT"
-        quantizer_name:
-          type: string
-          description: >-
-            Name of the quantization algorithm to use
-        group_size:
-          type: integer
-          description: Size of groups for grouped quantization
-      additionalProperties: false
-      required:
-        - type
-        - quantizer_name
-        - group_size
-      title: QATFinetuningConfig
-      description: >-
-        Configuration for Quantization-Aware Training (QAT) fine-tuning.
-    SupervisedFineTuneRequest:
-      type: object
-      properties:
-        job_uuid:
-          type: string
-          description: The UUID of the job to create.
-        training_config:
-          $ref: '#/components/schemas/TrainingConfig'
-          description: The training configuration.
-        hyperparam_search_config:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The hyperparam search configuration.
-        logger_config:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: The logger configuration.
-        model:
-          type: string
-          description: The model to fine-tune.
-        checkpoint_dir:
-          type: string
-          description: The directory to save checkpoint(s) to.
-        algorithm_config:
-          $ref: '#/components/schemas/AlgorithmConfig'
-          description: The algorithm configuration.
-      additionalProperties: false
-      required:
-        - job_uuid
-        - training_config
-        - hyperparam_search_config
-        - logger_config
-      title: SupervisedFineTuneRequest
-    SyntheticDataGenerateRequest:
-      type: object
-      properties:
-        dialogs:
-          type: array
-          items:
-            $ref: '#/components/schemas/Message'
-          description: >-
-            List of conversation messages to use as input for synthetic data generation
-        filtering_function:
-          type: string
-          enum:
-            - none
-            - random
-            - top_k
-            - top_p
-            - top_k_top_p
-            - sigmoid
-          description: >-
-            Type of filtering to apply to generated synthetic data samples
-        model:
-          type: string
-          description: >-
-            (Optional) The identifier of the model to use. The model must be registered
-            with Llama Stack and available via the /models endpoint
-      additionalProperties: false
-      required:
-        - dialogs
-        - filtering_function
-      title: SyntheticDataGenerateRequest
-    SyntheticDataGenerationResponse:
-      type: object
-      properties:
-        synthetic_data:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: >-
-            List of generated synthetic data samples that passed the filtering criteria
-        statistics:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: >-
-            (Optional) Statistical information about the generation process and filtering
-            results
-      additionalProperties: false
-      required:
-        - synthetic_data
-      title: SyntheticDataGenerationResponse
-      description: >-
-        Response from the synthetic data generation. Batch of (prompt, response, score)
-        tuples that pass the threshold.
-    UpdatePromptRequest:
-      type: object
-      properties:
-        prompt:
-          type: string
-          description: The updated prompt text content.
-        version:
-          type: integer
-          description: >-
-            The current version of the prompt being updated.
-        variables:
-          type: array
-          items:
-            type: string
-          description: >-
-            Updated list of variable names that can be used in the prompt template.
-        set_as_default:
-          type: boolean
-          description: >-
-            Set the new version as the default (default=True).
-      additionalProperties: false
-      required:
-        - prompt
-        - version
-        - set_as_default
-      title: UpdatePromptRequest
->>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
     VersionInfo:
       type: object
       properties:
diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index c1d4203c2..fcc16332f 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -201,7 +201,6 @@ class InferenceRouter(Inference):
             max_num_results=max_num_results,
         )
 
-
     async def openai_completion(
         self,
         model: str,
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index 8f4c564c8..dfbcf476d 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -181,7 +181,14 @@ def model_providers(llama_stack_client):
 
 @pytest.fixture(autouse=True)
 def skip_if_no_model(request):
-    model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id", "rerank_model_id"]
+    model_fixtures = [
+        "text_model_id",
+        "vision_model_id",
+        "embedding_model_id",
+        "judge_model_id",
+        "shield_id",
+        "rerank_model_id",
+    ]
     test_func = request.node.function
 
     actual_params = inspect.signature(test_func).parameters.keys()

From 6b4940806f2aa6b411f14e28f9c1414df9d059e5 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Wed, 1 Oct 2025 10:37:58 -0700
Subject: [PATCH 18/18] Fix rerank integration test based on client side
 changes

---
 docs/docs/providers/agents/index.mdx         |   2 +-
 docs/docs/providers/inference/index.mdx      |   3 +-
 docs/static/deprecated-llama-stack-spec.html |   2 +-
 docs/static/deprecated-llama-stack-spec.yaml |   7 +-
 docs/static/stainless-llama-stack-spec.html  |   7 +-
 docs/static/stainless-llama-stack-spec.yaml  |  11 +-
 example.py                                   | 257 -------------------
 tests/integration/inference/test_rerank.py   |  14 +-
 8 files changed, 27 insertions(+), 276 deletions(-)
 delete mode 100644 example.py

diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx
index 200d0119f..06eb104af 100644
--- a/docs/docs/providers/agents/index.mdx
+++ b/docs/docs/providers/agents/index.mdx
@@ -14,4 +14,4 @@ Agents
 
     APIs for creating and interacting with agentic systems.
 
-This section contains documentation for all available providers for the **agents** API.
\ No newline at end of file
+This section contains documentation for all available providers for the **agents** API.
diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index 065f620df..63741f202 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -4,8 +4,7 @@ description: "Llama Stack Inference API for generating completions, chat complet
     This API provides the raw interface to the underlying models. Three kinds of models are supported:
     - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
-    - Rerank models: these models rerank the documents by relevance."
-
+    - Rerank models: these models reorder the documents based on their relevance to a query."
 sidebar_label: Inference
 title: Inference
 ---
diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html
index 7edfe3f5d..f0dd903a6 100644
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@@ -13335,7 +13335,7 @@
         },
         {
             "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
             "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
         },
         {
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index ca832d46b..48863025f 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -9990,13 +9990,16 @@ tags:
     description: ''
   - name: Inference
     description: >-
-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:
 
       - LLM models: these models generate "raw" and "chat" (conversational) completions.
 
       - Embedding models: these models generate embeddings to be used for semantic
       search.
+
+      - Rerank models: these models reorder the documents based on their relevance
+      to a query.
     x-displayName: >-
       Llama Stack Inference API for generating completions, chat completions, and
       embeddings.
diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html
index 7ec48ef74..6bc67536d 100644
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@@ -8838,7 +8838,8 @@
                 "type": "string",
                 "enum": [
                     "llm",
-                    "embedding"
+                    "embedding",
+                    "rerank"
                 ],
                 "title": "ModelType",
                 "description": "Enumeration of supported model types in Llama Stack."
@@ -17033,7 +17034,7 @@
                 "properties": {
                     "model": {
                         "type": "string",
-                        "description": "The identifier of the reranking model to use."
+                        "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint."
                     },
                     "query": {
                         "oneOf": [
@@ -18456,7 +18457,7 @@
         },
         {
             "name": "Inference",
-            "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.",
+            "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
             "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings."
         },
         {
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 3bede159b..8fc70a5cd 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -6603,6 +6603,7 @@ components:
       enum:
         - llm
         - embedding
+        - rerank
       title: ModelType
       description: >-
         Enumeration of supported model types in Llama Stack.
@@ -12693,7 +12694,8 @@ components:
         model:
           type: string
           description: >-
-            The identifier of the reranking model to use.
+            The identifier of the reranking model to use. The model must be a reranking
+            model registered with Llama Stack and available via the /models endpoint.
         query:
           oneOf:
             - type: string
@@ -13774,13 +13776,16 @@ tags:
     description: ''
   - name: Inference
     description: >-
-      This API provides the raw interface to the underlying models. Two kinds of models
-      are supported:
+      This API provides the raw interface to the underlying models. Three kinds of
+      models are supported:
 
       - LLM models: these models generate "raw" and "chat" (conversational) completions.
 
       - Embedding models: these models generate embeddings to be used for semantic
       search.
+
+      - Rerank models: these models reorder the documents based on their relevance
+      to a query.
     x-displayName: >-
       Llama Stack Inference API for generating completions, chat completions, and
       embeddings.
diff --git a/example.py b/example.py
deleted file mode 100644
index 7e968e24a..000000000
--- a/example.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-
-os.environ["NVIDIA_API_KEY"] = "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"
-# Option 1: Use default NIM URL (will auto-switch to ai.api.nvidia.com for rerank)
-# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com"
-# Option 2: Use AI Foundation URL directly for rerank models
-# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com/v1"
-os.environ["NVIDIA_BASE_URL"] = "https://integrate.api.nvidia.com"
-
-import base64
-import io
-from PIL import Image
-
-from llama_stack.core.library_client import LlamaStackAsLibraryClient
-
-client = LlamaStackAsLibraryClient("nvidia")
-client.initialize()
-
-# # response = client.inference.completion(
-# #     model_id="meta/llama-3.1-8b-instruct",
-# #     content="Complete the sentence using one word: Roses are red, violets are :",
-# #     stream=False,
-# #     sampling_params={
-# #         "max_tokens": 50,
-# #     },
-# # )
-# # print(f"Response: {response.content}")
-
-
-# response = client.inference.chat_completion(
-#     model_id="nvidia/nvidia-nemotron-nano-9b-v2",
-#     messages=[
-#         {
-#             "role": "system",
-#             "content": "/think",
-#         },
-#         {
-#             "role": "user",
-#             "content": "How are you?",
-#         },
-#     ],
-#     stream=False,
-#     sampling_params={
-#         "max_tokens": 1024,
-#     },
-# )
-# print(f"Response: {response}")
-
-
-print(client.models.list())
-rerank_response = client.inference.rerank(
-    model="nvidia/llama-3.2-nv-rerankqa-1b-v2",
-    query="query",
-    items=[
-        "item_1",
-        "item_2",
-        "item_3",
-    ]
-)
-
-print(rerank_response)
-for i, result in enumerate(rerank_response):
-    print(f"{i+1}. [Index: {result.index}, "
-          f"Score: {(result.relevance_score):.3f}]")
-
-# # from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
-
-# # tool_definition = ToolDefinition(
-# #     tool_name="get_weather",
-# #     description="Get current weather information for a location",
-# #     parameters={
-# #         "location": ToolParamDefinition(
-# #             param_type="string",
-# #             description="The city and state, e.g. San Francisco, CA",
-# #             required=True
-# #         ),
-# #         "unit": ToolParamDefinition(
-# #             param_type="string",
-# #             description="Temperature unit (celsius or fahrenheit)",
-# #             required=False,
-# #             default="celsius"
-# #         )
-# #     }
-# # )
-
-# # # tool_response = client.inference.chat_completion(
-# # #     model_id="meta-llama/Llama-3.1-8B-Instruct",
-# # #     messages=[
-# # #         {"role": "user", "content": "What's the weather like in San Francisco?"}
-# # #     ],
-# # #     tools=[tool_definition],
-# # # )
-
-# # # print(f"Tool Response: {tool_response.completion_message.content}")
-# # # if tool_response.completion_message.tool_calls:
-# # #     for tool_call in tool_response.completion_message.tool_calls:
-# # #         print(f"Tool Called: {tool_call.tool_name}")
-# # #         print(f"Arguments: {tool_call.arguments}")
-
-
-# # # from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
-
-# # # person_schema = {
-# # #     "type": "object",
-# # #     "properties": {
-# # #         "name": {"type": "string"},
-# # #         "age": {"type": "integer"},
-# # #         "occupation": {"type": "string"},
-# # #     },
-# # #     "required": ["name", "age", "occupation"]
-# # # }
-
-# # # response_format = JsonSchemaResponseFormat(
-# # #     type=ResponseFormatType.json_schema,
-# # #     json_schema=person_schema
-# # # )
-
-# # # structured_response = client.inference.chat_completion(
-# # #     model_id="meta-llama/Llama-3.1-8B-Instruct",
-# # #     messages=[
-# # #         {
-# # #             "role": "user",
-# # #             "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. "
-# # #         }
-# # #     ],
-# # #     response_format=response_format,
-# # # )
-
-# # # print(f"Structured Response: {structured_response.completion_message.content}")
-
-# # # print("\n" + "="*50)
-# # # print("VISION LANGUAGE MODEL (VLM) EXAMPLE")
-# # # print("="*50)
-
-# # def load_image_as_base64(image_path):
-# #     with open(image_path, "rb") as image_file:
-# #         img_bytes = image_file.read()
-# #         return base64.b64encode(img_bytes).decode("utf-8")
-
-# # image_path = "/home/jiayin/llama-stack/docs/dog.jpg"
-# # demo_image_b64 = load_image_as_base64(image_path)
-
-# # vlm_response = client.inference.chat_completion(
-# #     model_id="nvidia/vila",
-# #     messages=[
-# #         {
-# #             "role": "user",
-# #             "content": [
-# #                 {
-# #                     "type": "image",
-# #                     "image": {
-# #                         "data": demo_image_b64,
-# #                     },
-# #                 },
-# #                 {
-# #                     "type": "text",
-# #                     "text": "Please describe what you see in this image in detail.",
-# #                 },
-# #             ],
-# #         }
-# #     ],
-# # )
-
-# # print(f"VLM Response: {vlm_response.completion_message.content}")
-
-# # # print("\n" + "="*50)
-# # # print("EMBEDDING EXAMPLE")
-# # # print("="*50)
-
-# # # # Embedding example
-# # # embedding_response = client.inference.embeddings(
-# # #     model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
-# # #     contents=["Hello world", "How are you today?"],
-# # #     task_type="query"
-# # # )
-
-# # # print(f"Number of embeddings: {len(embedding_response.embeddings)}")
-# # # print(f"Embedding dimension: {len(embedding_response.embeddings[0])}")
-# # # print(f"First few values: {embedding_response.embeddings[0][:5]}")
-
-# # # # from openai import OpenAI
-
-# # # # client = OpenAI(
-# # # #   base_url = "http://10.176.230.61:8000/v1",
-# # # #   api_key = "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"
-# # # # )
-
-# # # # # completion = client.completions.create(
-# # # # #   model="meta/llama-3.1-405b-instruct",
-# # # # #   prompt="How are you?",
-# # # # #   temperature=0.2,
-# # # # #   top_p=0.7,
-# # # # #   max_tokens=1024,
-# # # # #   stream=False
-# # # # # )
-
-# # # # # # completion = client.chat.completions.create(
-# # # # # #   model="meta/llama-3.1-8b-instruct",
-# # # # # #   messages=[{"role":"user","content":"hi"}],
-# # # # # #   temperature=0.2,
-# # # # # #   top_p=0.7,
-# # # # # #   max_tokens=1024,
-# # # # # #   stream=True
-# # # # # # )
-
-# # # # # for chunk in completion:
-# # # # #   if chunk.choices[0].delta.content is not None:
-# # # # #     print(chunk.choices[0].delta.content, end="")
-
-
-# # # # # response = client.inference.completion(
-# # # # #     model_id="meta/llama-3.1-8b-instruct",
-# # # # #     content="Complete the sentence using one word: Roses are red, violets are :",
-# # # # #     stream=False,
-# # # # #     sampling_params={
-# # # # #         "max_tokens": 50,
-# # # # #     },
-# # # # # )
-# # # # # print(f"Response: {response.content}")
-
-
-
-
-# from openai import OpenAI
-
-# client = OpenAI(
-#   base_url = "https://integrate.api.nvidia.com/v1",
-#   api_key = "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"
-# )
-
-# completion = client.chat.completions.create(
-#   model="nvidia/nvidia-nemotron-nano-9b-v2",
-#   messages=[{"role":"system","content":"/think"}],
-#   temperature=0.6,
-#   top_p=0.95,
-#   max_tokens=2048,
-#   frequency_penalty=0,
-#   presence_penalty=0,
-#   stream=True,
-#   extra_body={
-#     "min_thinking_tokens": 1024,
-#     "max_thinking_tokens": 2048
-#   }
-# )
-
-# for chunk in completion:
-#   reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None)
-#   if reasoning:
-#     print(reasoning, end="")
-#   if chunk.choices[0].delta.content is not None:
-#     print(chunk.choices[0].delta.content, end="")
diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py
index 4931c3d6c..82f35cd27 100644
--- a/tests/integration/inference/test_rerank.py
+++ b/tests/integration/inference/test_rerank.py
@@ -6,7 +6,7 @@
 
 import pytest
 from llama_stack_client import BadRequestError as LlamaStackBadRequestError
-from llama_stack_client.types import InferenceRerankResponse
+from llama_stack_client.types.alpha import InferenceRerankResponse
 from llama_stack_client.types.shared.interleaved_content import (
     ImageContentItem,
     ImageContentItemImage,
@@ -97,7 +97,7 @@ def _validate_semantic_ranking(response: InferenceRerankResponse, items: list, e
 def test_rerank_text(client_with_models, rerank_model_id, query, items, inference_provider_type):
     skip_if_provider_doesnt_support_rerank(inference_provider_type)
 
-    response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
+    response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
     assert isinstance(response, list)
     # TODO: Add type validation for response items once InferenceRerankResponseItem is exported from llama stack client.
     assert len(response) <= len(items)
@@ -129,9 +129,9 @@ def test_rerank_image(client_with_models, rerank_model_id, query, items, inferen
             ValueError if isinstance(client_with_models, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
         )
         with pytest.raises(error_type):
-            client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
+            client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
     else:
-        response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
+        response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
 
         assert isinstance(response, list)
         assert len(response) <= len(items)
@@ -144,7 +144,7 @@ def test_rerank_max_results(client_with_models, rerank_model_id, inference_provi
     items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2]
     max_num_results = 2
 
-    response = client_with_models.inference.rerank(
+    response = client_with_models.alpha.inference.rerank(
         model=rerank_model_id,
         query=DUMMY_STRING,
         items=items,
@@ -160,7 +160,7 @@ def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_i
     skip_if_provider_doesnt_support_rerank(inference_provider_type)
 
     items = [DUMMY_STRING, DUMMY_STRING2]
-    response = client_with_models.inference.rerank(
+    response = client_with_models.alpha.inference.rerank(
         model=rerank_model_id,
         query=DUMMY_STRING,
         items=items,
@@ -208,7 +208,7 @@ def test_rerank_semantic_correctness(
 ):
     skip_if_provider_doesnt_support_rerank(inference_provider_type)
 
-    response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items)
+    response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items)
 
     _validate_rerank_response(response, items)
     _validate_semantic_ranking(response, items, expected_first_item)