From bab9d7aaea9ab3fd7d68cabed6e22345f1d7f739 Mon Sep 17 00:00:00 2001
From: Jiayi <jiayin@nvidia.com>
Date: Wed, 3 Sep 2025 17:34:05 -0700
Subject: [PATCH] Add rerank API for NVIDIA Inference Provider

---
 docs/docs/providers/inference/index.mdx       |    2 +
 docs/static/llama-stack-spec.html             | 4992 +++++++++++++++++
 docs/static/llama-stack-spec.yaml             | 3724 ++++++++++++
 example.py                                    |  257 +
 llama_stack/apis/inference/inference.py       |    2 +-
 llama_stack/apis/models/models.py             |    2 +
 llama_stack/core/routers/inference.py         |   24 +
 .../remote/inference/nvidia/models.py         |  131 +
 .../remote/inference/nvidia/nvidia.py         |   80 +
 9 files changed, 9213 insertions(+), 1 deletion(-)
 create mode 100644 example.py
 create mode 100644 llama_stack/providers/remote/inference/nvidia/models.py

diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index ebbaf1be1..e96169cad 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -4,6 +4,7 @@ description: "Llama Stack Inference API for generating completions, chat complet
     This API provides the raw interface to the underlying models. Two kinds of models are supported:
     - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search."
+    - Rerank models: these models rerank the documents by relevance."
 sidebar_label: Inference
 title: Inference
 ---
@@ -17,5 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe
     This API provides the raw interface to the underlying models. Two kinds of models are supported:
     - LLM models: these models generate "raw" and "chat" (conversational) completions.
     - Embedding models: these models generate embeddings to be used for semantic search.
+    - Rerank models: these models rerank the documents by relevance.
 
 This section contains documentation for all available providers for the **inference** API.
diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html
index 96e97035f..b260f01a7 100644
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@@ -4819,6 +4819,2834 @@
                 "title": "OpenAIUserMessageParam",
                 "description": "A message from the user in an OpenAI-compatible chat completion request."
             },
+            "OpenAICompletionWithInputMessages": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "The ID of the chat completion"
+                    },
+                    "choices": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIChoice"
+                        },
+                        "description": "List of choices"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "chat.completion",
+                        "default": "chat.completion",
+                        "description": "The object type, which will be \"chat.completion\""
+                    },
+                    "created": {
+                        "type": "integer",
+                        "description": "The Unix timestamp in seconds when the chat completion was created"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model that was used to generate the chat completion"
+                    },
+                    "input_messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIMessageParam"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "choices",
+                    "object",
+                    "created",
+                    "model",
+                    "input_messages"
+                ],
+                "title": "OpenAICompletionWithInputMessages"
+            },
+            "DataSource": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/URIDataSource"
+                    },
+                    {
+                        "$ref": "#/components/schemas/RowsDataSource"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "uri": "#/components/schemas/URIDataSource",
+                        "rows": "#/components/schemas/RowsDataSource"
+                    }
+                }
+            },
+            "Dataset": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "dataset",
+                        "default": "dataset",
+                        "description": "Type of resource, always 'dataset' for datasets"
+                    },
+                    "purpose": {
+                        "type": "string",
+                        "enum": [
+                            "post-training/messages",
+                            "eval/question-answer",
+                            "eval/messages-answer"
+                        ],
+                        "description": "Purpose of the dataset indicating its intended use"
+                    },
+                    "source": {
+                        "$ref": "#/components/schemas/DataSource",
+                        "description": "Data source configuration for the dataset"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata for the dataset"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "purpose",
+                    "source",
+                    "metadata"
+                ],
+                "title": "Dataset",
+                "description": "Dataset resource for storing and accessing training or evaluation data."
+            },
+            "RowsDataSource": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "rows",
+                        "default": "rows"
+                    },
+                    "rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "rows"
+                ],
+                "title": "RowsDataSource",
+                "description": "A dataset stored in rows."
+            },
+            "URIDataSource": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "uri",
+                        "default": "uri"
+                    },
+                    "uri": {
+                        "type": "string",
+                        "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "uri"
+                ],
+                "title": "URIDataSource",
+                "description": "A dataset that can be obtained from a URI."
+            },
+            "Model": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string",
+                        "description": "Unique identifier for this resource in llama stack"
+                    },
+                    "provider_resource_id": {
+                        "type": "string",
+                        "description": "Unique identifier for this resource in the provider"
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "ID of the provider that owns this resource"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "model",
+                        "default": "model",
+                        "description": "The resource type, always 'model' for model resources"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Any additional metadata for this model"
+                    },
+                    "model_type": {
+                        "$ref": "#/components/schemas/ModelType",
+                        "default": "llm",
+                        "description": "The type of model (LLM or embedding model)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "metadata",
+                    "model_type"
+                ],
+                "title": "Model",
+                "description": "A model resource representing an AI model registered in Llama Stack."
+            },
+            "ModelType": {
+                "type": "string",
+                "enum": [
+                    "llm",
+                    "embedding",
+                    "rerank"
+                ],
+                "title": "ModelType",
+                "description": "Enumeration of supported model types in Llama Stack."
+            },
+            "AgentTurnInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "agent_turn_input",
+                        "default": "agent_turn_input",
+                        "description": "Discriminator type. Always \"agent_turn_input\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "AgentTurnInputType",
+                "description": "Parameter type for agent turn input."
+            },
+            "ArrayType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "array",
+                        "default": "array",
+                        "description": "Discriminator type. Always \"array\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ArrayType",
+                "description": "Parameter type for array values."
+            },
+            "BooleanType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "boolean",
+                        "default": "boolean",
+                        "description": "Discriminator type. Always \"boolean\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "BooleanType",
+                "description": "Parameter type for boolean values."
+            },
+            "ChatCompletionInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "chat_completion_input",
+                        "default": "chat_completion_input",
+                        "description": "Discriminator type. Always \"chat_completion_input\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ChatCompletionInputType",
+                "description": "Parameter type for chat completion input."
+            },
+            "CompletionInputType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "completion_input",
+                        "default": "completion_input",
+                        "description": "Discriminator type. Always \"completion_input\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "CompletionInputType",
+                "description": "Parameter type for completion input."
+            },
+            "JsonType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "json",
+                        "default": "json",
+                        "description": "Discriminator type. Always \"json\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "JsonType",
+                "description": "Parameter type for JSON values."
+            },
+            "NumberType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "number",
+                        "default": "number",
+                        "description": "Discriminator type. Always \"number\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "NumberType",
+                "description": "Parameter type for numeric values."
+            },
+            "ObjectType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "object",
+                        "default": "object",
+                        "description": "Discriminator type. Always \"object\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "ObjectType",
+                "description": "Parameter type for object values."
+            },
+            "ParamType": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/StringType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/NumberType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/BooleanType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ArrayType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ObjectType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/JsonType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/UnionType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/ChatCompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/CompletionInputType"
+                    },
+                    {
+                        "$ref": "#/components/schemas/AgentTurnInputType"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "string": "#/components/schemas/StringType",
+                        "number": "#/components/schemas/NumberType",
+                        "boolean": "#/components/schemas/BooleanType",
+                        "array": "#/components/schemas/ArrayType",
+                        "object": "#/components/schemas/ObjectType",
+                        "json": "#/components/schemas/JsonType",
+                        "union": "#/components/schemas/UnionType",
+                        "chat_completion_input": "#/components/schemas/ChatCompletionInputType",
+                        "completion_input": "#/components/schemas/CompletionInputType",
+                        "agent_turn_input": "#/components/schemas/AgentTurnInputType"
+                    }
+                }
+            },
+            "ScoringFn": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "scoring_function",
+                        "default": "scoring_function",
+                        "description": "The resource type, always scoring_function"
+                    },
+                    "description": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "return_type": {
+                        "$ref": "#/components/schemas/ParamType"
+                    },
+                    "params": {
+                        "$ref": "#/components/schemas/ScoringFnParams"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "metadata",
+                    "return_type"
+                ],
+                "title": "ScoringFn",
+                "description": "A scoring function resource for evaluating model outputs."
+            },
+            "StringType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "string",
+                        "default": "string",
+                        "description": "Discriminator type. Always \"string\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "StringType",
+                "description": "Parameter type for string values."
+            },
+            "UnionType": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "union",
+                        "default": "union",
+                        "description": "Discriminator type. Always \"union\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "UnionType",
+                "description": "Parameter type for union values."
+            },
+            "Shield": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "shield",
+                        "default": "shield",
+                        "description": "The resource type, always shield"
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Configuration parameters for the shield"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type"
+                ],
+                "title": "Shield",
+                "description": "A safety shield resource that can be used to check content."
+            },
+            "Span": {
+                "type": "object",
+                "properties": {
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span"
+                    },
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this span belongs to"
+                    },
+                    "parent_span_id": {
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Human-readable name describing the operation this span represents"
+                    },
+                    "start_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the operation began"
+                    },
+                    "end_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the operation finished, if completed"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "span_id",
+                    "trace_id",
+                    "name",
+                    "start_time"
+                ],
+                "title": "Span",
+                "description": "A span representing a single operation within a trace."
+            },
+            "GetSpanTreeRequest": {
+                "type": "object",
+                "properties": {
+                    "attributes_to_return": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The attributes to return in the tree."
+                    },
+                    "max_depth": {
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "GetSpanTreeRequest"
+            },
+            "SpanStatus": {
+                "type": "string",
+                "enum": [
+                    "ok",
+                    "error"
+                ],
+                "title": "SpanStatus",
+                "description": "The status of a span indicating whether it completed successfully or with an error."
+            },
+            "SpanWithStatus": {
+                "type": "object",
+                "properties": {
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span"
+                    },
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this span belongs to"
+                    },
+                    "parent_span_id": {
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Human-readable name describing the operation this span represents"
+                    },
+                    "start_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the operation began"
+                    },
+                    "end_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the operation finished, if completed"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the span"
+                    },
+                    "status": {
+                        "$ref": "#/components/schemas/SpanStatus",
+                        "description": "(Optional) The current status of the span"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "span_id",
+                    "trace_id",
+                    "name",
+                    "start_time"
+                ],
+                "title": "SpanWithStatus",
+                "description": "A span that includes status information."
+            },
+            "QuerySpanTreeResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/SpanWithStatus"
+                        },
+                        "description": "Dictionary mapping span IDs to spans with status information"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QuerySpanTreeResponse",
+                "description": "Response containing a tree structure of spans."
+            },
+            "Tool": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "tool",
+                        "default": "tool",
+                        "description": "Type of resource, always 'tool'"
+                    },
+                    "toolgroup_id": {
+                        "type": "string",
+                        "description": "ID of the tool group this tool belongs to"
+                    },
+                    "description": {
+                        "type": "string",
+                        "description": "Human-readable description of what the tool does"
+                    },
+                    "parameters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolParameter"
+                        },
+                        "description": "List of parameters this tool accepts"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Additional metadata about the tool"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "toolgroup_id",
+                    "description",
+                    "parameters"
+                ],
+                "title": "Tool",
+                "description": "A tool that can be invoked by agents."
+            },
+            "ToolGroup": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "tool_group",
+                        "default": "tool_group",
+                        "description": "Type of resource, always 'tool_group'"
+                    },
+                    "mcp_endpoint": {
+                        "$ref": "#/components/schemas/URL",
+                        "description": "(Optional) Model Context Protocol endpoint for remote tools"
+                    },
+                    "args": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Additional arguments for the tool group"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type"
+                ],
+                "title": "ToolGroup",
+                "description": "A group of related tools managed together."
+            },
+            "Trace": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace"
+                    },
+                    "root_span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the root span that started this trace"
+                    },
+                    "start_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the trace began"
+                    },
+                    "end_time": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the trace finished, if completed"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "root_span_id",
+                    "start_time"
+                ],
+                "title": "Trace",
+                "description": "A trace representing the complete execution path of a request across multiple operations."
+            },
+            "Checkpoint": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string",
+                        "description": "Unique identifier for the checkpoint"
+                    },
+                    "created_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the checkpoint was created"
+                    },
+                    "epoch": {
+                        "type": "integer",
+                        "description": "Training epoch when the checkpoint was saved"
+                    },
+                    "post_training_job_id": {
+                        "type": "string",
+                        "description": "Identifier of the training job that created this checkpoint"
+                    },
+                    "path": {
+                        "type": "string",
+                        "description": "File system path where the checkpoint is stored"
+                    },
+                    "training_metrics": {
+                        "$ref": "#/components/schemas/PostTrainingMetric",
+                        "description": "(Optional) Training metrics associated with this checkpoint"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "created_at",
+                    "epoch",
+                    "post_training_job_id",
+                    "path"
+                ],
+                "title": "Checkpoint",
+                "description": "Checkpoint created during training runs."
+            },
+            "PostTrainingJobArtifactsResponse": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string",
+                        "description": "Unique identifier for the training job"
+                    },
+                    "checkpoints": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Checkpoint"
+                        },
+                        "description": "List of model checkpoints created during training"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "checkpoints"
+                ],
+                "title": "PostTrainingJobArtifactsResponse",
+                "description": "Artifacts of a finetuning job."
+            },
+            "PostTrainingMetric": {
+                "type": "object",
+                "properties": {
+                    "epoch": {
+                        "type": "integer",
+                        "description": "Training epoch number"
+                    },
+                    "train_loss": {
+                        "type": "number",
+                        "description": "Loss value on the training dataset"
+                    },
+                    "validation_loss": {
+                        "type": "number",
+                        "description": "Loss value on the validation dataset"
+                    },
+                    "perplexity": {
+                        "type": "number",
+                        "description": "Perplexity metric indicating model confidence"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "epoch",
+                    "train_loss",
+                    "validation_loss",
+                    "perplexity"
+                ],
+                "title": "PostTrainingMetric",
+                "description": "Training metrics captured during post-training jobs."
+            },
+            "PostTrainingJobStatusResponse": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string",
+                        "description": "Unique identifier for the training job"
+                    },
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "completed",
+                            "in_progress",
+                            "failed",
+                            "scheduled",
+                            "cancelled"
+                        ],
+                        "description": "Current status of the training job"
+                    },
+                    "scheduled_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job was scheduled"
+                    },
+                    "started_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job execution began"
+                    },
+                    "completed_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "(Optional) Timestamp when the job finished, if completed"
+                    },
+                    "resources_allocated": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Information about computational resources allocated to the job"
+                    },
+                    "checkpoints": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Checkpoint"
+                        },
+                        "description": "List of model checkpoints created during training"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "status",
+                    "checkpoints"
+                ],
+                "title": "PostTrainingJobStatusResponse",
+                "description": "Status of a finetuning job."
+            },
+            "ListPostTrainingJobsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "job_uuid": {
+                                    "type": "string"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "job_uuid"
+                            ],
+                            "title": "PostTrainingJob"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListPostTrainingJobsResponse"
+            },
+            "VectorDB": {
+                "type": "object",
+                "properties": {
+                    "identifier": {
+                        "type": "string"
+                    },
+                    "provider_resource_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "type": {
+                        "type": "string",
+                        "enum": [
+                            "model",
+                            "shield",
+                            "vector_db",
+                            "dataset",
+                            "scoring_function",
+                            "benchmark",
+                            "tool",
+                            "tool_group",
+                            "prompt"
+                        ],
+                        "const": "vector_db",
+                        "default": "vector_db",
+                        "description": "Type of resource, always 'vector_db' for vector databases"
+                    },
+                    "embedding_model": {
+                        "type": "string",
+                        "description": "Name of the embedding model to use for vector generation"
+                    },
+                    "embedding_dimension": {
+                        "type": "integer",
+                        "description": "Dimension of the embedding vectors"
+                    },
+                    "vector_db_name": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "identifier",
+                    "provider_id",
+                    "type",
+                    "embedding_model",
+                    "embedding_dimension"
+                ],
+                "title": "VectorDB",
+                "description": "Vector database resource for storing and querying vector embeddings."
+            },
+            "HealthInfo": {
+                "type": "object",
+                "properties": {
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "OK",
+                            "Error",
+                            "Not Implemented"
+                        ],
+                        "description": "Current health status of the service"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "status"
+                ],
+                "title": "HealthInfo",
+                "description": "Health status information for the service."
+            },
+            "RAGDocument": {
+                "type": "object",
+                "properties": {
+                    "document_id": {
+                        "type": "string",
+                        "description": "The unique identifier for the document."
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/InterleavedContentItem"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "$ref": "#/components/schemas/InterleavedContentItem"
+                                }
+                            },
+                            {
+                                "$ref": "#/components/schemas/URL"
+                            }
+                        ],
+                        "description": "The content of the document."
+                    },
+                    "mime_type": {
+                        "type": "string",
+                        "description": "The MIME type of the document."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata for the document."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "document_id",
+                    "content",
+                    "metadata"
+                ],
+                "title": "RAGDocument",
+                "description": "A document to be used for document ingestion in the RAG Tool."
+            },
+            "InsertRequest": {
+                "type": "object",
+                "properties": {
+                    "documents": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RAGDocument"
+                        },
+                        "description": "List of documents to index in the RAG system"
+                    },
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "ID of the vector database to store the document embeddings"
+                    },
+                    "chunk_size_in_tokens": {
+                        "type": "integer",
+                        "description": "(Optional) Size in tokens for document chunking during indexing"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "documents",
+                    "vector_db_id",
+                    "chunk_size_in_tokens"
+                ],
+                "title": "InsertRequest"
+            },
+            "Chunk": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The content of the chunk, which can be interleaved text, images, or other types."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Metadata associated with the chunk that will be used in the model context during inference."
+                    },
+                    "embedding": {
+                        "type": "array",
+                        "items": {
+                            "type": "number"
+                        },
+                        "description": "Optional embedding for the chunk. If not provided, it will be computed later."
+                    },
+                    "stored_chunk_id": {
+                        "type": "string",
+                        "description": "The chunk ID that is stored in the vector database. Used for backend functionality."
+                    },
+                    "chunk_metadata": {
+                        "$ref": "#/components/schemas/ChunkMetadata",
+                        "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "metadata"
+                ],
+                "title": "Chunk",
+                "description": "A chunk of content that can be inserted into a vector database."
+            },
+            "ChunkMetadata": {
+                "type": "object",
+                "properties": {
+                    "chunk_id": {
+                        "type": "string",
+                        "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content."
+                    },
+                    "document_id": {
+                        "type": "string",
+                        "description": "The ID of the document this chunk belongs to."
+                    },
+                    "source": {
+                        "type": "string",
+                        "description": "The source of the content, such as a URL, file path, or other identifier."
+                    },
+                    "created_timestamp": {
+                        "type": "integer",
+                        "description": "An optional timestamp indicating when the chunk was created."
+                    },
+                    "updated_timestamp": {
+                        "type": "integer",
+                        "description": "An optional timestamp indicating when the chunk was last updated."
+                    },
+                    "chunk_window": {
+                        "type": "string",
+                        "description": "The window of the chunk, which can be used to group related chunks together."
+                    },
+                    "chunk_tokenizer": {
+                        "type": "string",
+                        "description": "The tokenizer used to create the chunk. Default is Tiktoken."
+                    },
+                    "chunk_embedding_model": {
+                        "type": "string",
+                        "description": "The embedding model used to create the chunk's embedding."
+                    },
+                    "chunk_embedding_dimension": {
+                        "type": "integer",
+                        "description": "The dimension of the embedding vector for the chunk."
+                    },
+                    "content_token_count": {
+                        "type": "integer",
+                        "description": "The number of tokens in the content of the chunk."
+                    },
+                    "metadata_token_count": {
+                        "type": "integer",
+                        "description": "The number of tokens in the metadata of the chunk."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "ChunkMetadata",
+                "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that     will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata`     is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after.     Use `Chunk.metadata` for metadata that will be used in the context during inference."
+            },
+            "InsertChunksRequest": {
+                "type": "object",
+                "properties": {
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "The identifier of the vector database to insert the chunks into."
+                    },
+                    "chunks": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Chunk"
+                        },
+                        "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later."
+                    },
+                    "ttl_seconds": {
+                        "type": "integer",
+                        "description": "The time to live of the chunks."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "vector_db_id",
+                    "chunks"
+                ],
+                "title": "InsertChunksRequest"
+            },
+            "ProviderInfo": {
+                "type": "object",
+                "properties": {
+                    "api": {
+                        "type": "string",
+                        "description": "The API name this provider implements"
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the provider"
+                    },
+                    "provider_type": {
+                        "type": "string",
+                        "description": "The type of provider implementation"
+                    },
+                    "config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Configuration parameters for the provider"
+                    },
+                    "health": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Current health status of the provider"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "api",
+                    "provider_id",
+                    "provider_type",
+                    "config",
+                    "health"
+                ],
+                "title": "ProviderInfo",
+                "description": "Information about a registered provider including its configuration and health status."
+            },
+            "InvokeToolRequest": {
+                "type": "object",
+                "properties": {
+                    "tool_name": {
+                        "type": "string",
+                        "description": "The name of the tool to invoke."
+                    },
+                    "kwargs": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "A dictionary of arguments to pass to the tool."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "tool_name",
+                    "kwargs"
+                ],
+                "title": "InvokeToolRequest"
+            },
+            "ToolInvocationResult": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "(Optional) The output content from the tool execution"
+                    },
+                    "error_message": {
+                        "type": "string",
+                        "description": "(Optional) Error message if the tool execution failed"
+                    },
+                    "error_code": {
+                        "type": "integer",
+                        "description": "(Optional) Numeric error code if the tool execution failed"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Additional metadata about the tool execution"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "ToolInvocationResult",
+                "description": "Result of a tool invocation."
+            },
+            "PaginatedResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "The list of items for the current page"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "description": "Whether there are more items available after this set"
+                    },
+                    "url": {
+                        "type": "string",
+                        "description": "The URL for accessing this list"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more"
+                ],
+                "title": "PaginatedResponse",
+                "description": "A generic paginated response that follows a simple format."
+            },
+            "Job": {
+                "type": "object",
+                "properties": {
+                    "job_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the job"
+                    },
+                    "status": {
+                        "type": "string",
+                        "enum": [
+                            "completed",
+                            "in_progress",
+                            "failed",
+                            "scheduled",
+                            "cancelled"
+                        ],
+                        "description": "Current execution status of the job"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_id",
+                    "status"
+                ],
+                "title": "Job",
+                "description": "A job execution instance with status tracking."
+            },
+            "ListBenchmarksResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Benchmark"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListBenchmarksResponse"
+            },
+            "Order": {
+                "type": "string",
+                "enum": [
+                    "asc",
+                    "desc"
+                ],
+                "title": "Order",
+                "description": "Sort order for paginated responses."
+            },
+            "ListOpenAIChatCompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "id": {
+                                    "type": "string",
+                                    "description": "The ID of the chat completion"
+                                },
+                                "choices": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/OpenAIChoice"
+                                    },
+                                    "description": "List of choices"
+                                },
+                                "object": {
+                                    "type": "string",
+                                    "const": "chat.completion",
+                                    "default": "chat.completion",
+                                    "description": "The object type, which will be \"chat.completion\""
+                                },
+                                "created": {
+                                    "type": "integer",
+                                    "description": "The Unix timestamp in seconds when the chat completion was created"
+                                },
+                                "model": {
+                                    "type": "string",
+                                    "description": "The model that was used to generate the chat completion"
+                                },
+                                "input_messages": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/OpenAIMessageParam"
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "id",
+                                "choices",
+                                "object",
+                                "created",
+                                "model",
+                                "input_messages"
+                            ],
+                            "title": "OpenAICompletionWithInputMessages"
+                        },
+                        "description": "List of chat completion objects with their input messages"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "description": "Whether there are more completions available beyond this list"
+                    },
+                    "first_id": {
+                        "type": "string",
+                        "description": "ID of the first completion in this list"
+                    },
+                    "last_id": {
+                        "type": "string",
+                        "description": "ID of the last completion in this list"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list",
+                        "description": "Must be \"list\" to identify this as a list response"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more",
+                    "first_id",
+                    "last_id",
+                    "object"
+                ],
+                "title": "ListOpenAIChatCompletionResponse",
+                "description": "Response from listing OpenAI-compatible chat completions."
+            },
+            "ListDatasetsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Dataset"
+                        },
+                        "description": "List of datasets"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListDatasetsResponse",
+                "description": "Response from listing datasets."
+            },
+            "ListModelsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Model"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListModelsResponse"
+            },
+            "ListOpenAIResponseInputItem": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        },
+                        "description": "List of input items"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list",
+                        "description": "Object type identifier, always \"list\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseInputItem",
+                "description": "List container for OpenAI response input items."
+            },
+            "ListOpenAIResponseObject": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseObjectWithInput"
+                        },
+                        "description": "List of response objects with their input context"
+                    },
+                    "has_more": {
+                        "type": "boolean",
+                        "description": "Whether there are more results available beyond this page"
+                    },
+                    "first_id": {
+                        "type": "string",
+                        "description": "Identifier of the first item in this page"
+                    },
+                    "last_id": {
+                        "type": "string",
+                        "description": "Identifier of the last item in this page"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "list",
+                        "default": "list",
+                        "description": "Object type identifier, always \"list\""
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data",
+                    "has_more",
+                    "first_id",
+                    "last_id",
+                    "object"
+                ],
+                "title": "ListOpenAIResponseObject",
+                "description": "Paginated list of OpenAI response objects with navigation metadata."
+            },
+            "OpenAIResponseObjectWithInput": {
+                "type": "object",
+                "properties": {
+                    "created_at": {
+                        "type": "integer",
+                        "description": "Unix timestamp when the response was created"
+                    },
+                    "error": {
+                        "$ref": "#/components/schemas/OpenAIResponseError",
+                        "description": "(Optional) Error details if the response generation failed"
+                    },
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier for this response"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "Model identifier used for generation"
+                    },
+                    "object": {
+                        "type": "string",
+                        "const": "response",
+                        "default": "response",
+                        "description": "Object type identifier, always \"response\""
+                    },
+                    "output": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseOutput"
+                        },
+                        "description": "List of generated output items (messages, tool calls, etc.)"
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "Whether tool calls can be executed in parallel"
+                    },
+                    "previous_response_id": {
+                        "type": "string",
+                        "description": "(Optional) ID of the previous response in a conversation"
+                    },
+                    "status": {
+                        "type": "string",
+                        "description": "Current status of the response generation"
+                    },
+                    "temperature": {
+                        "type": "number",
+                        "description": "(Optional) Sampling temperature used for generation"
+                    },
+                    "text": {
+                        "$ref": "#/components/schemas/OpenAIResponseText",
+                        "description": "Text formatting configuration for the response"
+                    },
+                    "top_p": {
+                        "type": "number",
+                        "description": "(Optional) Nucleus sampling parameter used for generation"
+                    },
+                    "truncation": {
+                        "type": "string",
+                        "description": "(Optional) Truncation strategy applied to the response"
+                    },
+                    "user": {
+                        "type": "string",
+                        "description": "(Optional) User identifier associated with the request"
+                    },
+                    "input": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIResponseInput"
+                        },
+                        "description": "List of input items that led to this response"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "created_at",
+                    "id",
+                    "model",
+                    "object",
+                    "output",
+                    "parallel_tool_calls",
+                    "status",
+                    "text",
+                    "input"
+                ],
+                "title": "OpenAIResponseObjectWithInput",
+                "description": "OpenAI response object extended with input context information."
+            },
+            "ListPromptsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Prompt"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListPromptsResponse",
+                "description": "Response model to list prompts."
+            },
+            "ListProvidersResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ProviderInfo"
+                        },
+                        "description": "List of provider information objects"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListProvidersResponse",
+                "description": "Response containing a list of all available providers."
+            },
+            "RouteInfo": {
+                "type": "object",
+                "properties": {
+                    "route": {
+                        "type": "string",
+                        "description": "The API endpoint path"
+                    },
+                    "method": {
+                        "type": "string",
+                        "description": "HTTP method for the route"
+                    },
+                    "provider_types": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of provider types that implement this route"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "route",
+                    "method",
+                    "provider_types"
+                ],
+                "title": "RouteInfo",
+                "description": "Information about an API route including its path, method, and implementing providers."
+            },
+            "ListRoutesResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RouteInfo"
+                        },
+                        "description": "List of available route information objects"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListRoutesResponse",
+                "description": "Response containing a list of all available API routes."
+            },
+            "ListToolDefsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolDef"
+                        },
+                        "description": "List of tool definitions"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListToolDefsResponse",
+                "description": "Response containing a list of tool definitions."
+            },
+            "ListScoringFunctionsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ScoringFn"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListScoringFunctionsResponse"
+            },
+            "ListShieldsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Shield"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListShieldsResponse"
+            },
+            "ListToolGroupsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolGroup"
+                        },
+                        "description": "List of tool groups"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListToolGroupsResponse",
+                "description": "Response containing a list of tool groups."
+            },
+            "ListToolsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Tool"
+                        },
+                        "description": "List of tools"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListToolsResponse",
+                "description": "Response containing a list of tools."
+            },
+            "ListVectorDBsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/VectorDB"
+                        },
+                        "description": "List of vector databases"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "ListVectorDBsResponse",
+                "description": "Response from listing vector databases."
+            },
+            "Event": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/UnstructuredLogEvent"
+                    },
+                    {
+                        "$ref": "#/components/schemas/MetricEvent"
+                    },
+                    {
+                        "$ref": "#/components/schemas/StructuredLogEvent"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "unstructured_log": "#/components/schemas/UnstructuredLogEvent",
+                        "metric": "#/components/schemas/MetricEvent",
+                        "structured_log": "#/components/schemas/StructuredLogEvent"
+                    }
+                }
+            },
+            "EventType": {
+                "type": "string",
+                "enum": [
+                    "unstructured_log",
+                    "structured_log",
+                    "metric"
+                ],
+                "title": "EventType",
+                "description": "The type of telemetry event being logged."
+            },
+            "LogSeverity": {
+                "type": "string",
+                "enum": [
+                    "verbose",
+                    "debug",
+                    "info",
+                    "warn",
+                    "error",
+                    "critical"
+                ],
+                "title": "LogSeverity",
+                "description": "The severity level of a log message."
+            },
+            "MetricEvent": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this event belongs to"
+                    },
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span this event belongs to"
+                    },
+                    "timestamp": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the event occurred"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "integer"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
+                    },
+                    "type": {
+                        "$ref": "#/components/schemas/EventType",
+                        "const": "metric",
+                        "default": "metric",
+                        "description": "Event type identifier set to METRIC"
+                    },
+                    "metric": {
+                        "type": "string",
+                        "description": "The name of the metric being measured"
+                    },
+                    "value": {
+                        "oneOf": [
+                            {
+                                "type": "integer"
+                            },
+                            {
+                                "type": "number"
+                            }
+                        ],
+                        "description": "The numeric value of the metric measurement"
+                    },
+                    "unit": {
+                        "type": "string",
+                        "description": "The unit of measurement for the metric value"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "span_id",
+                    "timestamp",
+                    "type",
+                    "metric",
+                    "value",
+                    "unit"
+                ],
+                "title": "MetricEvent",
+                "description": "A metric event containing a measured value."
+            },
+            "SpanEndPayload": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "$ref": "#/components/schemas/StructuredLogType",
+                        "const": "span_end",
+                        "default": "span_end",
+                        "description": "Payload type identifier set to SPAN_END"
+                    },
+                    "status": {
+                        "$ref": "#/components/schemas/SpanStatus",
+                        "description": "The final status of the span indicating success or failure"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "status"
+                ],
+                "title": "SpanEndPayload",
+                "description": "Payload for a span end event."
+            },
+            "SpanStartPayload": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "$ref": "#/components/schemas/StructuredLogType",
+                        "const": "span_start",
+                        "default": "span_start",
+                        "description": "Payload type identifier set to SPAN_START"
+                    },
+                    "name": {
+                        "type": "string",
+                        "description": "Human-readable name describing the operation this span represents"
+                    },
+                    "parent_span_id": {
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the parent span, if this is a child span"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "name"
+                ],
+                "title": "SpanStartPayload",
+                "description": "Payload for a span start event."
+            },
+            "StructuredLogEvent": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this event belongs to"
+                    },
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span this event belongs to"
+                    },
+                    "timestamp": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the event occurred"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "integer"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
+                    },
+                    "type": {
+                        "$ref": "#/components/schemas/EventType",
+                        "const": "structured_log",
+                        "default": "structured_log",
+                        "description": "Event type identifier set to STRUCTURED_LOG"
+                    },
+                    "payload": {
+                        "$ref": "#/components/schemas/StructuredLogPayload",
+                        "description": "The structured payload data for the log event"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "span_id",
+                    "timestamp",
+                    "type",
+                    "payload"
+                ],
+                "title": "StructuredLogEvent",
+                "description": "A structured log event containing typed payload data."
+            },
+            "StructuredLogPayload": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/SpanStartPayload"
+                    },
+                    {
+                        "$ref": "#/components/schemas/SpanEndPayload"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "span_start": "#/components/schemas/SpanStartPayload",
+                        "span_end": "#/components/schemas/SpanEndPayload"
+                    }
+                }
+            },
+            "StructuredLogType": {
+                "type": "string",
+                "enum": [
+                    "span_start",
+                    "span_end"
+                ],
+                "title": "StructuredLogType",
+                "description": "The type of structured log event payload."
+            },
+            "UnstructuredLogEvent": {
+                "type": "object",
+                "properties": {
+                    "trace_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the trace this event belongs to"
+                    },
+                    "span_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the span this event belongs to"
+                    },
+                    "timestamp": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Timestamp when the event occurred"
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "integer"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Key-value pairs containing additional metadata about the event"
+                    },
+                    "type": {
+                        "$ref": "#/components/schemas/EventType",
+                        "const": "unstructured_log",
+                        "default": "unstructured_log",
+                        "description": "Event type identifier set to UNSTRUCTURED_LOG"
+                    },
+                    "message": {
+                        "type": "string",
+                        "description": "The log message text"
+                    },
+                    "severity": {
+                        "$ref": "#/components/schemas/LogSeverity",
+                        "description": "The severity level of the log message"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "trace_id",
+                    "span_id",
+                    "timestamp",
+                    "type",
+                    "message",
+                    "severity"
+                ],
+                "title": "UnstructuredLogEvent",
+                "description": "An unstructured log event containing a simple text message."
+            },
+            "LogEventRequest": {
+                "type": "object",
+                "properties": {
+                    "event": {
+                        "$ref": "#/components/schemas/Event",
+                        "description": "The event to log."
+                    },
+                    "ttl_seconds": {
+                        "type": "integer",
+                        "description": "The time to live of the event."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "event",
+                    "ttl_seconds"
+                ],
+                "title": "LogEventRequest"
+            },
+            "VectorStoreChunkingStrategy": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto"
+                    },
+                    {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "auto": "#/components/schemas/VectorStoreChunkingStrategyAuto",
+                        "static": "#/components/schemas/VectorStoreChunkingStrategyStatic"
+                    }
+                }
+            },
+            "VectorStoreChunkingStrategyAuto": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "auto",
+                        "default": "auto",
+                        "description": "Strategy type, always \"auto\" for automatic chunking"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type"
+                ],
+                "title": "VectorStoreChunkingStrategyAuto",
+                "description": "Automatic chunking strategy for vector store files."
+            },
+            "VectorStoreChunkingStrategyStatic": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "static",
+                        "default": "static",
+                        "description": "Strategy type, always \"static\" for static chunking"
+                    },
+                    "static": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategyStaticConfig",
+                        "description": "Configuration parameters for the static chunking strategy"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "static"
+                ],
+                "title": "VectorStoreChunkingStrategyStatic",
+                "description": "Static chunking strategy with configurable parameters."
+            },
+            "VectorStoreChunkingStrategyStaticConfig": {
+                "type": "object",
+                "properties": {
+                    "chunk_overlap_tokens": {
+                        "type": "integer",
+                        "default": 400,
+                        "description": "Number of tokens to overlap between adjacent chunks"
+                    },
+                    "max_chunk_size_tokens": {
+                        "type": "integer",
+                        "default": 800,
+                        "description": "Maximum number of tokens per chunk, must be between 100 and 4096"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "chunk_overlap_tokens",
+                    "max_chunk_size_tokens"
+                ],
+                "title": "VectorStoreChunkingStrategyStaticConfig",
+                "description": "Configuration for static chunking strategy."
+            },
+            "OpenaiAttachFileToVectorStoreRequest": {
+                "type": "object",
+                "properties": {
+                    "file_id": {
+                        "type": "string",
+                        "description": "The ID of the file to attach to the vector store."
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The key-value attributes stored with the file, which can be used for filtering."
+                    },
+                    "chunking_strategy": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
+                        "description": "The chunking strategy to use for the file."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "file_id"
+                ],
+                "title": "OpenaiAttachFileToVectorStoreRequest"
+            },
+            "VectorStoreFileLastError": {
+                "type": "object",
+                "properties": {
+                    "code": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "const": "server_error"
+                            },
+                            {
+                                "type": "string",
+                                "const": "rate_limit_exceeded"
+                            }
+                        ],
+                        "description": "Error code indicating the type of failure"
+                    },
+                    "message": {
+                        "type": "string",
+                        "description": "Human-readable error message describing the failure"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "code",
+                    "message"
+                ],
+                "title": "VectorStoreFileLastError",
+                "description": "Error information for failed vector store file processing."
+            },
+            "VectorStoreFileObject": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "Unique identifier for the file"
+                    },
+                    "object": {
+                        "type": "string",
+                        "default": "vector_store.file",
+                        "description": "Object type identifier, always \"vector_store.file\""
+                    },
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Key-value attributes associated with the file"
+                    },
+                    "chunking_strategy": {
+                        "$ref": "#/components/schemas/VectorStoreChunkingStrategy",
+                        "description": "Strategy used for splitting the file into chunks"
+                    },
+                    "created_at": {
+                        "type": "integer",
+                        "description": "Timestamp when the file was added to the vector store"
+                    },
+                    "last_error": {
+                        "$ref": "#/components/schemas/VectorStoreFileLastError",
+                        "description": "(Optional) Error information if file processing failed"
+                    },
+                    "status": {
+                        "$ref": "#/components/schemas/VectorStoreFileStatus",
+                        "description": "Current processing status of the file"
+                    },
+                    "usage_bytes": {
+                        "type": "integer",
+                        "default": 0,
+                        "description": "Storage space used by this file in bytes"
+                    },
+                    "vector_store_id": {
+                        "type": "string",
+                        "description": "ID of the vector store containing this file"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "object",
+                    "attributes",
+                    "chunking_strategy",
+                    "created_at",
+                    "status",
+                    "usage_bytes",
+                    "vector_store_id"
+                ],
+                "title": "VectorStoreFileObject",
+                "description": "OpenAI Vector Store File object."
+            },
+            "VectorStoreFileStatus": {
+                "oneOf": [
+                    {
+                        "type": "string",
+                        "const": "completed"
+                    },
+                    {
+                        "type": "string",
+                        "const": "in_progress"
+                    },
+                    {
+                        "type": "string",
+                        "const": "cancelled"
+                    },
+                    {
+                        "type": "string",
+                        "const": "failed"
+                    }
+                ]
+            },
             "OpenAIJSONSchema": {
                 "type": "object",
                 "properties": {
@@ -12782,6 +15610,2170 @@
                 "title": "VectorStoreSearchResponsePage",
                 "description": "Paginated response from searching a vector store."
             },
+<<<<<<< HEAD
+=======
+            "OpenaiUpdateVectorStoreRequest": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "The name of the vector store."
+                    },
+                    "expires_after": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The expiration policy for a vector store."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Set of 16 key-value pairs that can be attached to an object."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "OpenaiUpdateVectorStoreRequest"
+            },
+            "OpenaiUpdateVectorStoreFileRequest": {
+                "type": "object",
+                "properties": {
+                    "attributes": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The updated key-value attributes to store with the file."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "attributes"
+                ],
+                "title": "OpenaiUpdateVectorStoreFileRequest"
+            },
+            "DPOAlignmentConfig": {
+                "type": "object",
+                "properties": {
+                    "beta": {
+                        "type": "number",
+                        "description": "Temperature parameter for the DPO loss"
+                    },
+                    "loss_type": {
+                        "$ref": "#/components/schemas/DPOLossType",
+                        "default": "sigmoid",
+                        "description": "The type of loss function to use for DPO"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "beta",
+                    "loss_type"
+                ],
+                "title": "DPOAlignmentConfig",
+                "description": "Configuration for Direct Preference Optimization (DPO) alignment."
+            },
+            "DPOLossType": {
+                "type": "string",
+                "enum": [
+                    "sigmoid",
+                    "hinge",
+                    "ipo",
+                    "kto_pair"
+                ],
+                "title": "DPOLossType"
+            },
+            "DataConfig": {
+                "type": "object",
+                "properties": {
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "Unique identifier for the training dataset"
+                    },
+                    "batch_size": {
+                        "type": "integer",
+                        "description": "Number of samples per training batch"
+                    },
+                    "shuffle": {
+                        "type": "boolean",
+                        "description": "Whether to shuffle the dataset during training"
+                    },
+                    "data_format": {
+                        "$ref": "#/components/schemas/DatasetFormat",
+                        "description": "Format of the dataset (instruct or dialog)"
+                    },
+                    "validation_dataset_id": {
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the validation dataset"
+                    },
+                    "packed": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency"
+                    },
+                    "train_on_input": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to compute loss on input tokens as well as output tokens"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dataset_id",
+                    "batch_size",
+                    "shuffle",
+                    "data_format"
+                ],
+                "title": "DataConfig",
+                "description": "Configuration for training data and data loading."
+            },
+            "DatasetFormat": {
+                "type": "string",
+                "enum": [
+                    "instruct",
+                    "dialog"
+                ],
+                "title": "DatasetFormat",
+                "description": "Format of the training dataset."
+            },
+            "EfficiencyConfig": {
+                "type": "object",
+                "properties": {
+                    "enable_activation_checkpointing": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to use activation checkpointing to reduce memory usage"
+                    },
+                    "enable_activation_offloading": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to offload activations to CPU to save GPU memory"
+                    },
+                    "memory_efficient_fsdp_wrap": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to use memory-efficient FSDP wrapping"
+                    },
+                    "fsdp_cpu_offload": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to offload FSDP parameters to CPU"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "EfficiencyConfig",
+                "description": "Configuration for memory and compute efficiency optimizations."
+            },
+            "OptimizerConfig": {
+                "type": "object",
+                "properties": {
+                    "optimizer_type": {
+                        "$ref": "#/components/schemas/OptimizerType",
+                        "description": "Type of optimizer to use (adam, adamw, or sgd)"
+                    },
+                    "lr": {
+                        "type": "number",
+                        "description": "Learning rate for the optimizer"
+                    },
+                    "weight_decay": {
+                        "type": "number",
+                        "description": "Weight decay coefficient for regularization"
+                    },
+                    "num_warmup_steps": {
+                        "type": "integer",
+                        "description": "Number of steps for learning rate warmup"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "optimizer_type",
+                    "lr",
+                    "weight_decay",
+                    "num_warmup_steps"
+                ],
+                "title": "OptimizerConfig",
+                "description": "Configuration parameters for the optimization algorithm."
+            },
+            "OptimizerType": {
+                "type": "string",
+                "enum": [
+                    "adam",
+                    "adamw",
+                    "sgd"
+                ],
+                "title": "OptimizerType",
+                "description": "Available optimizer algorithms for training."
+            },
+            "TrainingConfig": {
+                "type": "object",
+                "properties": {
+                    "n_epochs": {
+                        "type": "integer",
+                        "description": "Number of training epochs to run"
+                    },
+                    "max_steps_per_epoch": {
+                        "type": "integer",
+                        "default": 1,
+                        "description": "Maximum number of steps to run per epoch"
+                    },
+                    "gradient_accumulation_steps": {
+                        "type": "integer",
+                        "default": 1,
+                        "description": "Number of steps to accumulate gradients before updating"
+                    },
+                    "max_validation_steps": {
+                        "type": "integer",
+                        "default": 1,
+                        "description": "(Optional) Maximum number of validation steps per epoch"
+                    },
+                    "data_config": {
+                        "$ref": "#/components/schemas/DataConfig",
+                        "description": "(Optional) Configuration for data loading and formatting"
+                    },
+                    "optimizer_config": {
+                        "$ref": "#/components/schemas/OptimizerConfig",
+                        "description": "(Optional) Configuration for the optimization algorithm"
+                    },
+                    "efficiency_config": {
+                        "$ref": "#/components/schemas/EfficiencyConfig",
+                        "description": "(Optional) Configuration for memory and compute optimizations"
+                    },
+                    "dtype": {
+                        "type": "string",
+                        "default": "bf16",
+                        "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "n_epochs",
+                    "max_steps_per_epoch",
+                    "gradient_accumulation_steps"
+                ],
+                "title": "TrainingConfig",
+                "description": "Comprehensive configuration for the training process."
+            },
+            "PreferenceOptimizeRequest": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string",
+                        "description": "The UUID of the job to create."
+                    },
+                    "finetuned_model": {
+                        "type": "string",
+                        "description": "The model to fine-tune."
+                    },
+                    "algorithm_config": {
+                        "$ref": "#/components/schemas/DPOAlignmentConfig",
+                        "description": "The algorithm configuration."
+                    },
+                    "training_config": {
+                        "$ref": "#/components/schemas/TrainingConfig",
+                        "description": "The training configuration."
+                    },
+                    "hyperparam_search_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The hyperparam search configuration."
+                    },
+                    "logger_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The logger configuration."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "finetuned_model",
+                    "algorithm_config",
+                    "training_config",
+                    "hyperparam_search_config",
+                    "logger_config"
+                ],
+                "title": "PreferenceOptimizeRequest"
+            },
+            "PostTrainingJob": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid"
+                ],
+                "title": "PostTrainingJob"
+            },
+            "DefaultRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "default",
+                        "default": "default",
+                        "description": "Type of query generator, always 'default'"
+                    },
+                    "separator": {
+                        "type": "string",
+                        "default": " ",
+                        "description": "String separator used to join query terms"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "separator"
+                ],
+                "title": "DefaultRAGQueryGeneratorConfig",
+                "description": "Configuration for the default RAG query generator."
+            },
+            "LLMRAGQueryGeneratorConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "llm",
+                        "default": "llm",
+                        "description": "Type of query generator, always 'llm'"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "Name of the language model to use for query generation"
+                    },
+                    "template": {
+                        "type": "string",
+                        "description": "Template string for formatting the query generation prompt"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "model",
+                    "template"
+                ],
+                "title": "LLMRAGQueryGeneratorConfig",
+                "description": "Configuration for the LLM-based RAG query generator."
+            },
+            "RAGQueryConfig": {
+                "type": "object",
+                "properties": {
+                    "query_generator_config": {
+                        "$ref": "#/components/schemas/RAGQueryGeneratorConfig",
+                        "description": "Configuration for the query generator."
+                    },
+                    "max_tokens_in_context": {
+                        "type": "integer",
+                        "default": 4096,
+                        "description": "Maximum number of tokens in the context."
+                    },
+                    "max_chunks": {
+                        "type": "integer",
+                        "default": 5,
+                        "description": "Maximum number of chunks to retrieve."
+                    },
+                    "chunk_template": {
+                        "type": "string",
+                        "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n",
+                        "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\""
+                    },
+                    "mode": {
+                        "$ref": "#/components/schemas/RAGSearchMode",
+                        "default": "vector",
+                        "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"."
+                    },
+                    "ranker": {
+                        "$ref": "#/components/schemas/Ranker",
+                        "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "query_generator_config",
+                    "max_tokens_in_context",
+                    "max_chunks",
+                    "chunk_template"
+                ],
+                "title": "RAGQueryConfig",
+                "description": "Configuration for the RAG query generation."
+            },
+            "RAGQueryGeneratorConfig": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig"
+                    },
+                    {
+                        "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig",
+                        "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig"
+                    }
+                }
+            },
+            "RAGSearchMode": {
+                "type": "string",
+                "enum": [
+                    "vector",
+                    "keyword",
+                    "hybrid"
+                ],
+                "title": "RAGSearchMode",
+                "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results"
+            },
+            "RRFRanker": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "rrf",
+                        "default": "rrf",
+                        "description": "The type of ranker, always \"rrf\""
+                    },
+                    "impact_factor": {
+                        "type": "number",
+                        "default": 60.0,
+                        "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "impact_factor"
+                ],
+                "title": "RRFRanker",
+                "description": "Reciprocal Rank Fusion (RRF) ranker configuration."
+            },
+            "Ranker": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/RRFRanker"
+                    },
+                    {
+                        "$ref": "#/components/schemas/WeightedRanker"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "rrf": "#/components/schemas/RRFRanker",
+                        "weighted": "#/components/schemas/WeightedRanker"
+                    }
+                }
+            },
+            "WeightedRanker": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "weighted",
+                        "default": "weighted",
+                        "description": "The type of ranker, always \"weighted\""
+                    },
+                    "alpha": {
+                        "type": "number",
+                        "default": 0.5,
+                        "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "alpha"
+                ],
+                "title": "WeightedRanker",
+                "description": "Weighted ranker configuration that combines vector and keyword scores."
+            },
+            "QueryRequest": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The query content to search for in the indexed documents"
+                    },
+                    "vector_db_ids": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of vector database IDs to search within"
+                    },
+                    "query_config": {
+                        "$ref": "#/components/schemas/RAGQueryConfig",
+                        "description": "(Optional) Configuration parameters for the query operation"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "vector_db_ids"
+                ],
+                "title": "QueryRequest"
+            },
+            "RAGQueryResult": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "(Optional) The retrieved content from the query"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Additional metadata about the query result"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "metadata"
+                ],
+                "title": "RAGQueryResult",
+                "description": "Result of a RAG query containing retrieved content and metadata."
+            },
+            "QueryChunksRequest": {
+                "type": "object",
+                "properties": {
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "The identifier of the vector database to query."
+                    },
+                    "query": {
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The query to search for."
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The parameters of the query."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "vector_db_id",
+                    "query"
+                ],
+                "title": "QueryChunksRequest"
+            },
+            "QueryChunksResponse": {
+                "type": "object",
+                "properties": {
+                    "chunks": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Chunk"
+                        },
+                        "description": "List of content chunks returned from the query"
+                    },
+                    "scores": {
+                        "type": "array",
+                        "items": {
+                            "type": "number"
+                        },
+                        "description": "Relevance scores corresponding to each returned chunk"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "chunks",
+                    "scores"
+                ],
+                "title": "QueryChunksResponse",
+                "description": "Response from querying chunks in a vector database."
+            },
+            "QueryMetricsRequest": {
+                "type": "object",
+                "properties": {
+                    "start_time": {
+                        "type": "integer",
+                        "description": "The start time of the metric to query."
+                    },
+                    "end_time": {
+                        "type": "integer",
+                        "description": "The end time of the metric to query."
+                    },
+                    "granularity": {
+                        "type": "string",
+                        "description": "The granularity of the metric to query."
+                    },
+                    "query_type": {
+                        "type": "string",
+                        "enum": [
+                            "range",
+                            "instant"
+                        ],
+                        "description": "The type of query to perform."
+                    },
+                    "label_matchers": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "name": {
+                                    "type": "string",
+                                    "description": "The name of the label to match"
+                                },
+                                "value": {
+                                    "type": "string",
+                                    "description": "The value to match against"
+                                },
+                                "operator": {
+                                    "type": "string",
+                                    "enum": [
+                                        "=",
+                                        "!=",
+                                        "=~",
+                                        "!~"
+                                    ],
+                                    "description": "The comparison operator to use for matching",
+                                    "default": "="
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "name",
+                                "value",
+                                "operator"
+                            ],
+                            "title": "MetricLabelMatcher",
+                            "description": "A matcher for filtering metrics by label values."
+                        },
+                        "description": "The label matchers to apply to the metric."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "start_time",
+                    "query_type"
+                ],
+                "title": "QueryMetricsRequest"
+            },
+            "MetricDataPoint": {
+                "type": "object",
+                "properties": {
+                    "timestamp": {
+                        "type": "integer",
+                        "description": "Unix timestamp when the metric value was recorded"
+                    },
+                    "value": {
+                        "type": "number",
+                        "description": "The numeric value of the metric at this timestamp"
+                    },
+                    "unit": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "timestamp",
+                    "value",
+                    "unit"
+                ],
+                "title": "MetricDataPoint",
+                "description": "A single data point in a metric time series."
+            },
+            "MetricLabel": {
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "The name of the label"
+                    },
+                    "value": {
+                        "type": "string",
+                        "description": "The value of the label"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "name",
+                    "value"
+                ],
+                "title": "MetricLabel",
+                "description": "A label associated with a metric."
+            },
+            "MetricSeries": {
+                "type": "object",
+                "properties": {
+                    "metric": {
+                        "type": "string",
+                        "description": "The name of the metric"
+                    },
+                    "labels": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricLabel"
+                        },
+                        "description": "List of labels associated with this metric series"
+                    },
+                    "values": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricDataPoint"
+                        },
+                        "description": "List of data points in chronological order"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "metric",
+                    "labels",
+                    "values"
+                ],
+                "title": "MetricSeries",
+                "description": "A time series of metric data points."
+            },
+            "QueryMetricsResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/MetricSeries"
+                        },
+                        "description": "List of metric series matching the query criteria"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QueryMetricsResponse",
+                "description": "Response containing metric time series data."
+            },
+            "QueryCondition": {
+                "type": "object",
+                "properties": {
+                    "key": {
+                        "type": "string",
+                        "description": "The attribute key to filter on"
+                    },
+                    "op": {
+                        "$ref": "#/components/schemas/QueryConditionOp",
+                        "description": "The comparison operator to apply"
+                    },
+                    "value": {
+                        "oneOf": [
+                            {
+                                "type": "null"
+                            },
+                            {
+                                "type": "boolean"
+                            },
+                            {
+                                "type": "number"
+                            },
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array"
+                            },
+                            {
+                                "type": "object"
+                            }
+                        ],
+                        "description": "The value to compare against"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "key",
+                    "op",
+                    "value"
+                ],
+                "title": "QueryCondition",
+                "description": "A condition for filtering query results."
+            },
+            "QueryConditionOp": {
+                "type": "string",
+                "enum": [
+                    "eq",
+                    "ne",
+                    "gt",
+                    "lt"
+                ],
+                "title": "QueryConditionOp",
+                "description": "Comparison operators for query conditions."
+            },
+            "QuerySpansRequest": {
+                "type": "object",
+                "properties": {
+                    "attribute_filters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/QueryCondition"
+                        },
+                        "description": "The attribute filters to apply to the spans."
+                    },
+                    "attributes_to_return": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The attributes to return in the spans."
+                    },
+                    "max_depth": {
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "attribute_filters",
+                    "attributes_to_return"
+                ],
+                "title": "QuerySpansRequest"
+            },
+            "QuerySpansResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Span"
+                        },
+                        "description": "List of spans matching the query criteria"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QuerySpansResponse",
+                "description": "Response containing a list of spans."
+            },
+            "QueryTracesRequest": {
+                "type": "object",
+                "properties": {
+                    "attribute_filters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/QueryCondition"
+                        },
+                        "description": "The attribute filters to apply to the traces."
+                    },
+                    "limit": {
+                        "type": "integer",
+                        "description": "The limit of traces to return."
+                    },
+                    "offset": {
+                        "type": "integer",
+                        "description": "The offset of the traces to return."
+                    },
+                    "order_by": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The order by of the traces to return."
+                    }
+                },
+                "additionalProperties": false,
+                "title": "QueryTracesRequest"
+            },
+            "QueryTracesResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Trace"
+                        },
+                        "description": "List of traces matching the query criteria"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "QueryTracesResponse",
+                "description": "Response containing a list of traces."
+            },
+            "RegisterBenchmarkRequest": {
+                "type": "object",
+                "properties": {
+                    "benchmark_id": {
+                        "type": "string",
+                        "description": "The ID of the benchmark to register."
+                    },
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "The ID of the dataset to use for the benchmark."
+                    },
+                    "scoring_functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The scoring functions to use for the benchmark."
+                    },
+                    "provider_benchmark_id": {
+                        "type": "string",
+                        "description": "The ID of the provider benchmark to use for the benchmark."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The ID of the provider to use for the benchmark."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The metadata to use for the benchmark."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "benchmark_id",
+                    "dataset_id",
+                    "scoring_functions"
+                ],
+                "title": "RegisterBenchmarkRequest"
+            },
+            "RegisterDatasetRequest": {
+                "type": "object",
+                "properties": {
+                    "purpose": {
+                        "type": "string",
+                        "enum": [
+                            "post-training/messages",
+                            "eval/question-answer",
+                            "eval/messages-answer"
+                        ],
+                        "description": "The purpose of the dataset. One of: - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }"
+                    },
+                    "source": {
+                        "$ref": "#/components/schemas/DataSource",
+                        "description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}."
+                    },
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "The ID of the dataset. If not provided, an ID will be generated."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "purpose",
+                    "source"
+                ],
+                "title": "RegisterDatasetRequest"
+            },
+            "RegisterModelRequest": {
+                "type": "object",
+                "properties": {
+                    "model_id": {
+                        "type": "string",
+                        "description": "The identifier of the model to register."
+                    },
+                    "provider_model_id": {
+                        "type": "string",
+                        "description": "The identifier of the model in the provider."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The identifier of the provider."
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "Any additional metadata for this model."
+                    },
+                    "model_type": {
+                        "$ref": "#/components/schemas/ModelType",
+                        "description": "The type of model to register."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model_id"
+                ],
+                "title": "RegisterModelRequest"
+            },
+            "RegisterScoringFunctionRequest": {
+                "type": "object",
+                "properties": {
+                    "scoring_fn_id": {
+                        "type": "string",
+                        "description": "The ID of the scoring function to register."
+                    },
+                    "description": {
+                        "type": "string",
+                        "description": "The description of the scoring function."
+                    },
+                    "return_type": {
+                        "$ref": "#/components/schemas/ParamType",
+                        "description": "The return type of the scoring function."
+                    },
+                    "provider_scoring_fn_id": {
+                        "type": "string",
+                        "description": "The ID of the provider scoring function to use for the scoring function."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The ID of the provider to use for the scoring function."
+                    },
+                    "params": {
+                        "$ref": "#/components/schemas/ScoringFnParams",
+                        "description": "The parameters for the scoring function for benchmark eval, these can be overridden for app eval."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "scoring_fn_id",
+                    "description",
+                    "return_type"
+                ],
+                "title": "RegisterScoringFunctionRequest"
+            },
+            "RegisterShieldRequest": {
+                "type": "object",
+                "properties": {
+                    "shield_id": {
+                        "type": "string",
+                        "description": "The identifier of the shield to register."
+                    },
+                    "provider_shield_id": {
+                        "type": "string",
+                        "description": "The identifier of the shield in the provider."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The identifier of the provider."
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The parameters of the shield."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "shield_id"
+                ],
+                "title": "RegisterShieldRequest"
+            },
+            "RegisterToolGroupRequest": {
+                "type": "object",
+                "properties": {
+                    "toolgroup_id": {
+                        "type": "string",
+                        "description": "The ID of the tool group to register."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The ID of the provider to use for the tool group."
+                    },
+                    "mcp_endpoint": {
+                        "$ref": "#/components/schemas/URL",
+                        "description": "The MCP endpoint to use for the tool group."
+                    },
+                    "args": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "A dictionary of arguments to pass to the tool group."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "toolgroup_id",
+                    "provider_id"
+                ],
+                "title": "RegisterToolGroupRequest"
+            },
+            "RegisterVectorDbRequest": {
+                "type": "object",
+                "properties": {
+                    "vector_db_id": {
+                        "type": "string",
+                        "description": "The identifier of the vector database to register."
+                    },
+                    "embedding_model": {
+                        "type": "string",
+                        "description": "The embedding model to use."
+                    },
+                    "embedding_dimension": {
+                        "type": "integer",
+                        "description": "The dimension of the embedding model."
+                    },
+                    "provider_id": {
+                        "type": "string",
+                        "description": "The identifier of the provider."
+                    },
+                    "vector_db_name": {
+                        "type": "string",
+                        "description": "The name of the vector database."
+                    },
+                    "provider_vector_db_id": {
+                        "type": "string",
+                        "description": "The identifier of the vector database in the provider."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "vector_db_id",
+                    "embedding_model"
+                ],
+                "title": "RegisterVectorDbRequest"
+            },
+            "RerankRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string",
+                        "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint."
+                    },
+                    "query": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
+                            },
+                            {
+                                "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                            }
+                        ],
+                        "description": "The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length."
+                    },
+                    "items": {
+                        "type": "array",
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
+                                }
+                            ]
+                        },
+                        "description": "List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length."
+                    },
+                    "max_num_results": {
+                        "type": "integer",
+                        "description": "(Optional) Maximum number of results to return. Default: returns all."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "query",
+                    "items"
+                ],
+                "title": "RerankRequest"
+            },
+            "RerankData": {
+                "type": "object",
+                "properties": {
+                    "index": {
+                        "type": "integer",
+                        "description": "The original index of the document in the input list"
+                    },
+                    "relevance_score": {
+                        "type": "number",
+                        "description": "The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "index",
+                    "relevance_score"
+                ],
+                "title": "RerankData",
+                "description": "A single rerank result from a reranking response."
+            },
+            "RerankResponse": {
+                "type": "object",
+                "properties": {
+                    "data": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/RerankData"
+                        },
+                        "description": "List of rerank result objects, sorted by relevance score (descending)"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "data"
+                ],
+                "title": "RerankResponse",
+                "description": "Response from a reranking request."
+            },
+            "ResumeAgentTurnRequest": {
+                "type": "object",
+                "properties": {
+                    "tool_responses": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ToolResponse"
+                        },
+                        "description": "The tool call responses to resume the turn with."
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "description": "Whether to stream the response."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "tool_responses"
+                ],
+                "title": "ResumeAgentTurnRequest"
+            },
+            "RunEvalRequest": {
+                "type": "object",
+                "properties": {
+                    "benchmark_config": {
+                        "$ref": "#/components/schemas/BenchmarkConfig",
+                        "description": "The configuration for the benchmark."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "benchmark_config"
+                ],
+                "title": "RunEvalRequest"
+            },
+            "RunModerationRequest": {
+                "type": "object",
+                "properties": {
+                    "input": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ],
+                        "description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The content moderation model you would like to use."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input",
+                    "model"
+                ],
+                "title": "RunModerationRequest"
+            },
+            "ModerationObject": {
+                "type": "object",
+                "properties": {
+                    "id": {
+                        "type": "string",
+                        "description": "The unique identifier for the moderation request."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model used to generate the moderation results."
+                    },
+                    "results": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ModerationObjectResults"
+                        },
+                        "description": "A list of moderation objects"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "id",
+                    "model",
+                    "results"
+                ],
+                "title": "ModerationObject",
+                "description": "A moderation object."
+            },
+            "ModerationObjectResults": {
+                "type": "object",
+                "properties": {
+                    "flagged": {
+                        "type": "boolean",
+                        "description": "Whether any of the below categories are flagged."
+                    },
+                    "categories": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "boolean"
+                        },
+                        "description": "A list of the categories, and whether they are flagged or not."
+                    },
+                    "category_applied_input_types": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "array",
+                            "items": {
+                                "type": "string"
+                            }
+                        },
+                        "description": "A list of the categories along with the input type(s) that the score applies to."
+                    },
+                    "category_scores": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        },
+                        "description": "A list of the categories along with their scores as predicted by model."
+                    },
+                    "user_message": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "flagged",
+                    "metadata"
+                ],
+                "title": "ModerationObjectResults",
+                "description": "A moderation object."
+            },
+            "RunShieldRequest": {
+                "type": "object",
+                "properties": {
+                    "shield_id": {
+                        "type": "string",
+                        "description": "The identifier of the shield to run."
+                    },
+                    "messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Message"
+                        },
+                        "description": "The messages to run the shield on."
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The parameters of the shield."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "shield_id",
+                    "messages",
+                    "params"
+                ],
+                "title": "RunShieldRequest"
+            },
+            "RunShieldResponse": {
+                "type": "object",
+                "properties": {
+                    "violation": {
+                        "$ref": "#/components/schemas/SafetyViolation",
+                        "description": "(Optional) Safety violation detected by the shield, if any"
+                    }
+                },
+                "additionalProperties": false,
+                "title": "RunShieldResponse",
+                "description": "Response from running a safety shield."
+            },
+            "SaveSpansToDatasetRequest": {
+                "type": "object",
+                "properties": {
+                    "attribute_filters": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/QueryCondition"
+                        },
+                        "description": "The attribute filters to apply to the spans."
+                    },
+                    "attributes_to_save": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "The attributes to save to the dataset."
+                    },
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "The ID of the dataset to save the spans to."
+                    },
+                    "max_depth": {
+                        "type": "integer",
+                        "description": "The maximum depth of the tree."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "attribute_filters",
+                    "attributes_to_save",
+                    "dataset_id"
+                ],
+                "title": "SaveSpansToDatasetRequest"
+            },
+            "ScoreRequest": {
+                "type": "object",
+                "properties": {
+                    "input_rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "The rows to score."
+                    },
+                    "scoring_functions": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/ScoringFnParams"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "The scoring functions to use for the scoring."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "input_rows",
+                    "scoring_functions"
+                ],
+                "title": "ScoreRequest"
+            },
+            "ScoreResponse": {
+                "type": "object",
+                "properties": {
+                    "results": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ScoringResult"
+                        },
+                        "description": "A map of scoring function name to ScoringResult."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "results"
+                ],
+                "title": "ScoreResponse",
+                "description": "The response from scoring."
+            },
+            "ScoreBatchRequest": {
+                "type": "object",
+                "properties": {
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "The ID of the dataset to score."
+                    },
+                    "scoring_functions": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/ScoringFnParams"
+                                },
+                                {
+                                    "type": "null"
+                                }
+                            ]
+                        },
+                        "description": "The scoring functions to use for the scoring."
+                    },
+                    "save_results_dataset": {
+                        "type": "boolean",
+                        "description": "Whether to save the results to a dataset."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dataset_id",
+                    "scoring_functions",
+                    "save_results_dataset"
+                ],
+                "title": "ScoreBatchRequest"
+            },
+            "ScoreBatchResponse": {
+                "type": "object",
+                "properties": {
+                    "dataset_id": {
+                        "type": "string",
+                        "description": "(Optional) The identifier of the dataset that was scored"
+                    },
+                    "results": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ScoringResult"
+                        },
+                        "description": "A map of scoring function name to ScoringResult"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "results"
+                ],
+                "title": "ScoreBatchResponse",
+                "description": "Response from batch scoring operations on datasets."
+            },
+            "SetDefaultVersionRequest": {
+                "type": "object",
+                "properties": {
+                    "version": {
+                        "type": "integer",
+                        "description": "The version to set as default."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "version"
+                ],
+                "title": "SetDefaultVersionRequest"
+            },
+            "AlgorithmConfig": {
+                "oneOf": [
+                    {
+                        "$ref": "#/components/schemas/LoraFinetuningConfig"
+                    },
+                    {
+                        "$ref": "#/components/schemas/QATFinetuningConfig"
+                    }
+                ],
+                "discriminator": {
+                    "propertyName": "type",
+                    "mapping": {
+                        "LoRA": "#/components/schemas/LoraFinetuningConfig",
+                        "QAT": "#/components/schemas/QATFinetuningConfig"
+                    }
+                }
+            },
+            "LoraFinetuningConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "LoRA",
+                        "default": "LoRA",
+                        "description": "Algorithm type identifier, always \"LoRA\""
+                    },
+                    "lora_attn_modules": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "List of attention module names to apply LoRA to"
+                    },
+                    "apply_lora_to_mlp": {
+                        "type": "boolean",
+                        "description": "Whether to apply LoRA to MLP layers"
+                    },
+                    "apply_lora_to_output": {
+                        "type": "boolean",
+                        "description": "Whether to apply LoRA to output projection layers"
+                    },
+                    "rank": {
+                        "type": "integer",
+                        "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)"
+                    },
+                    "alpha": {
+                        "type": "integer",
+                        "description": "LoRA scaling parameter that controls adaptation strength"
+                    },
+                    "use_dora": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)"
+                    },
+                    "quantize_base": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "(Optional) Whether to quantize the base model weights"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "lora_attn_modules",
+                    "apply_lora_to_mlp",
+                    "apply_lora_to_output",
+                    "rank",
+                    "alpha"
+                ],
+                "title": "LoraFinetuningConfig",
+                "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning."
+            },
+            "QATFinetuningConfig": {
+                "type": "object",
+                "properties": {
+                    "type": {
+                        "type": "string",
+                        "const": "QAT",
+                        "default": "QAT",
+                        "description": "Algorithm type identifier, always \"QAT\""
+                    },
+                    "quantizer_name": {
+                        "type": "string",
+                        "description": "Name of the quantization algorithm to use"
+                    },
+                    "group_size": {
+                        "type": "integer",
+                        "description": "Size of groups for grouped quantization"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "type",
+                    "quantizer_name",
+                    "group_size"
+                ],
+                "title": "QATFinetuningConfig",
+                "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning."
+            },
+            "SupervisedFineTuneRequest": {
+                "type": "object",
+                "properties": {
+                    "job_uuid": {
+                        "type": "string",
+                        "description": "The UUID of the job to create."
+                    },
+                    "training_config": {
+                        "$ref": "#/components/schemas/TrainingConfig",
+                        "description": "The training configuration."
+                    },
+                    "hyperparam_search_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The hyperparam search configuration."
+                    },
+                    "logger_config": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "The logger configuration."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "The model to fine-tune."
+                    },
+                    "checkpoint_dir": {
+                        "type": "string",
+                        "description": "The directory to save checkpoint(s) to."
+                    },
+                    "algorithm_config": {
+                        "$ref": "#/components/schemas/AlgorithmConfig",
+                        "description": "The algorithm configuration."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "job_uuid",
+                    "training_config",
+                    "hyperparam_search_config",
+                    "logger_config"
+                ],
+                "title": "SupervisedFineTuneRequest"
+            },
+            "SyntheticDataGenerateRequest": {
+                "type": "object",
+                "properties": {
+                    "dialogs": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Message"
+                        },
+                        "description": "List of conversation messages to use as input for synthetic data generation"
+                    },
+                    "filtering_function": {
+                        "type": "string",
+                        "enum": [
+                            "none",
+                            "random",
+                            "top_k",
+                            "top_p",
+                            "top_k_top_p",
+                            "sigmoid"
+                        ],
+                        "description": "Type of filtering to apply to generated synthetic data samples"
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "dialogs",
+                    "filtering_function"
+                ],
+                "title": "SyntheticDataGenerateRequest"
+            },
+            "SyntheticDataGenerationResponse": {
+                "type": "object",
+                "properties": {
+                    "synthetic_data": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        },
+                        "description": "List of generated synthetic data samples that passed the filtering criteria"
+                    },
+                    "statistics": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        },
+                        "description": "(Optional) Statistical information about the generation process and filtering results"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "synthetic_data"
+                ],
+                "title": "SyntheticDataGenerationResponse",
+                "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
+            },
+            "UpdatePromptRequest": {
+                "type": "object",
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "The updated prompt text content."
+                    },
+                    "version": {
+                        "type": "integer",
+                        "description": "The current version of the prompt being updated."
+                    },
+                    "variables": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "Updated list of variable names that can be used in the prompt template."
+                    },
+                    "set_as_default": {
+                        "type": "boolean",
+                        "description": "Set the new version as the default (default=True)."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "prompt",
+                    "version",
+                    "set_as_default"
+                ],
+                "title": "UpdatePromptRequest"
+            },
+>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
             "VersionInfo": {
                 "type": "object",
                 "properties": {
diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml
index b9e03d614..ebe142557 100644
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@@ -3634,6 +3634,2130 @@ components:
       title: OpenAIUserMessageParam
       description: >-
         A message from the user in an OpenAI-compatible chat completion request.
+<<<<<<< HEAD
+=======
+    OpenAICompletionWithInputMessages:
+      type: object
+      properties:
+        id:
+          type: string
+          description: The ID of the chat completion
+        choices:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIChoice'
+          description: List of choices
+        object:
+          type: string
+          const: chat.completion
+          default: chat.completion
+          description: >-
+            The object type, which will be "chat.completion"
+        created:
+          type: integer
+          description: >-
+            The Unix timestamp in seconds when the chat completion was created
+        model:
+          type: string
+          description: >-
+            The model that was used to generate the chat completion
+        input_messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIMessageParam'
+      additionalProperties: false
+      required:
+        - id
+        - choices
+        - object
+        - created
+        - model
+        - input_messages
+      title: OpenAICompletionWithInputMessages
+    DataSource:
+      oneOf:
+        - $ref: '#/components/schemas/URIDataSource'
+        - $ref: '#/components/schemas/RowsDataSource'
+      discriminator:
+        propertyName: type
+        mapping:
+          uri: '#/components/schemas/URIDataSource'
+          rows: '#/components/schemas/RowsDataSource'
+    Dataset:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: dataset
+          default: dataset
+          description: >-
+            Type of resource, always 'dataset' for datasets
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            Purpose of the dataset indicating its intended use
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            Data source configuration for the dataset
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Additional metadata for the dataset
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - purpose
+        - source
+        - metadata
+      title: Dataset
+      description: >-
+        Dataset resource for storing and accessing training or evaluation data.
+    RowsDataSource:
+      type: object
+      properties:
+        type:
+          type: string
+          const: rows
+          default: rows
+        rows:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: >-
+            The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user",
+            "content": "Hello, world!"}, {"role": "assistant", "content": "Hello,
+            world!"}]} ]
+      additionalProperties: false
+      required:
+        - type
+        - rows
+      title: RowsDataSource
+      description: A dataset stored in rows.
+    URIDataSource:
+      type: object
+      properties:
+        type:
+          type: string
+          const: uri
+          default: uri
+        uri:
+          type: string
+          description: >-
+            The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl"
+            - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}"
+      additionalProperties: false
+      required:
+        - type
+        - uri
+      title: URIDataSource
+      description: >-
+        A dataset that can be obtained from a URI.
+    Model:
+      type: object
+      properties:
+        identifier:
+          type: string
+          description: >-
+            Unique identifier for this resource in llama stack
+        provider_resource_id:
+          type: string
+          description: >-
+            Unique identifier for this resource in the provider
+        provider_id:
+          type: string
+          description: >-
+            ID of the provider that owns this resource
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: model
+          default: model
+          description: >-
+            The resource type, always 'model' for model resources
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          default: llm
+          description: >-
+            The type of model (LLM or embedding model)
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - metadata
+        - model_type
+      title: Model
+      description: >-
+        A model resource representing an AI model registered in Llama Stack.
+    ModelType:
+      type: string
+      enum:
+        - llm
+        - embedding
+        - rerank
+      title: ModelType
+      description: >-
+        Enumeration of supported model types in Llama Stack.
+    AgentTurnInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: agent_turn_input
+          default: agent_turn_input
+          description: >-
+            Discriminator type. Always "agent_turn_input"
+      additionalProperties: false
+      required:
+        - type
+      title: AgentTurnInputType
+      description: Parameter type for agent turn input.
+    ArrayType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: array
+          default: array
+          description: Discriminator type. Always "array"
+      additionalProperties: false
+      required:
+        - type
+      title: ArrayType
+      description: Parameter type for array values.
+    BooleanType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: boolean
+          default: boolean
+          description: Discriminator type. Always "boolean"
+      additionalProperties: false
+      required:
+        - type
+      title: BooleanType
+      description: Parameter type for boolean values.
+    ChatCompletionInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: chat_completion_input
+          default: chat_completion_input
+          description: >-
+            Discriminator type. Always "chat_completion_input"
+      additionalProperties: false
+      required:
+        - type
+      title: ChatCompletionInputType
+      description: >-
+        Parameter type for chat completion input.
+    CompletionInputType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: completion_input
+          default: completion_input
+          description: >-
+            Discriminator type. Always "completion_input"
+      additionalProperties: false
+      required:
+        - type
+      title: CompletionInputType
+      description: Parameter type for completion input.
+    JsonType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: json
+          default: json
+          description: Discriminator type. Always "json"
+      additionalProperties: false
+      required:
+        - type
+      title: JsonType
+      description: Parameter type for JSON values.
+    NumberType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: number
+          default: number
+          description: Discriminator type. Always "number"
+      additionalProperties: false
+      required:
+        - type
+      title: NumberType
+      description: Parameter type for numeric values.
+    ObjectType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: object
+          default: object
+          description: Discriminator type. Always "object"
+      additionalProperties: false
+      required:
+        - type
+      title: ObjectType
+      description: Parameter type for object values.
+    ParamType:
+      oneOf:
+        - $ref: '#/components/schemas/StringType'
+        - $ref: '#/components/schemas/NumberType'
+        - $ref: '#/components/schemas/BooleanType'
+        - $ref: '#/components/schemas/ArrayType'
+        - $ref: '#/components/schemas/ObjectType'
+        - $ref: '#/components/schemas/JsonType'
+        - $ref: '#/components/schemas/UnionType'
+        - $ref: '#/components/schemas/ChatCompletionInputType'
+        - $ref: '#/components/schemas/CompletionInputType'
+        - $ref: '#/components/schemas/AgentTurnInputType'
+      discriminator:
+        propertyName: type
+        mapping:
+          string: '#/components/schemas/StringType'
+          number: '#/components/schemas/NumberType'
+          boolean: '#/components/schemas/BooleanType'
+          array: '#/components/schemas/ArrayType'
+          object: '#/components/schemas/ObjectType'
+          json: '#/components/schemas/JsonType'
+          union: '#/components/schemas/UnionType'
+          chat_completion_input: '#/components/schemas/ChatCompletionInputType'
+          completion_input: '#/components/schemas/CompletionInputType'
+          agent_turn_input: '#/components/schemas/AgentTurnInputType'
+    ScoringFn:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: scoring_function
+          default: scoring_function
+          description: >-
+            The resource type, always scoring_function
+        description:
+          type: string
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - metadata
+        - return_type
+      title: ScoringFn
+      description: >-
+        A scoring function resource for evaluating model outputs.
+    StringType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: string
+          default: string
+          description: Discriminator type. Always "string"
+      additionalProperties: false
+      required:
+        - type
+      title: StringType
+      description: Parameter type for string values.
+    UnionType:
+      type: object
+      properties:
+        type:
+          type: string
+          const: union
+          default: union
+          description: Discriminator type. Always "union"
+      additionalProperties: false
+      required:
+        - type
+      title: UnionType
+      description: Parameter type for union values.
+    Shield:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: shield
+          default: shield
+          description: The resource type, always shield
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Configuration parameters for the shield
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+      title: Shield
+      description: >-
+        A safety shield resource that can be used to check content.
+    Span:
+      type: object
+      properties:
+        span_id:
+          type: string
+          description: Unique identifier for the span
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this span belongs to
+        parent_span_id:
+          type: string
+          description: >-
+            (Optional) Unique identifier for the parent span, if this is a child span
+        name:
+          type: string
+          description: >-
+            Human-readable name describing the operation this span represents
+        start_time:
+          type: string
+          format: date-time
+          description: Timestamp when the operation began
+        end_time:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the operation finished, if completed
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the span
+      additionalProperties: false
+      required:
+        - span_id
+        - trace_id
+        - name
+        - start_time
+      title: Span
+      description: >-
+        A span representing a single operation within a trace.
+    GetSpanTreeRequest:
+      type: object
+      properties:
+        attributes_to_return:
+          type: array
+          items:
+            type: string
+          description: The attributes to return in the tree.
+        max_depth:
+          type: integer
+          description: The maximum depth of the tree.
+      additionalProperties: false
+      title: GetSpanTreeRequest
+    SpanStatus:
+      type: string
+      enum:
+        - ok
+        - error
+      title: SpanStatus
+      description: >-
+        The status of a span indicating whether it completed successfully or with
+        an error.
+    SpanWithStatus:
+      type: object
+      properties:
+        span_id:
+          type: string
+          description: Unique identifier for the span
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this span belongs to
+        parent_span_id:
+          type: string
+          description: >-
+            (Optional) Unique identifier for the parent span, if this is a child span
+        name:
+          type: string
+          description: >-
+            Human-readable name describing the operation this span represents
+        start_time:
+          type: string
+          format: date-time
+          description: Timestamp when the operation began
+        end_time:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the operation finished, if completed
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the span
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+          description: >-
+            (Optional) The current status of the span
+      additionalProperties: false
+      required:
+        - span_id
+        - trace_id
+        - name
+        - start_time
+      title: SpanWithStatus
+      description: A span that includes status information.
+    QuerySpanTreeResponse:
+      type: object
+      properties:
+        data:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/SpanWithStatus'
+          description: >-
+            Dictionary mapping span IDs to spans with status information
+      additionalProperties: false
+      required:
+        - data
+      title: QuerySpanTreeResponse
+      description: >-
+        Response containing a tree structure of spans.
+    Tool:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: tool
+          default: tool
+          description: Type of resource, always 'tool'
+        toolgroup_id:
+          type: string
+          description: >-
+            ID of the tool group this tool belongs to
+        description:
+          type: string
+          description: >-
+            Human-readable description of what the tool does
+        parameters:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolParameter'
+          description: List of parameters this tool accepts
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Additional metadata about the tool
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - toolgroup_id
+        - description
+        - parameters
+      title: Tool
+      description: A tool that can be invoked by agents.
+    ToolGroup:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: tool_group
+          default: tool_group
+          description: Type of resource, always 'tool_group'
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            (Optional) Model Context Protocol endpoint for remote tools
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Additional arguments for the tool group
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+      title: ToolGroup
+      description: >-
+        A group of related tools managed together.
+    Trace:
+      type: object
+      properties:
+        trace_id:
+          type: string
+          description: Unique identifier for the trace
+        root_span_id:
+          type: string
+          description: >-
+            Unique identifier for the root span that started this trace
+        start_time:
+          type: string
+          format: date-time
+          description: Timestamp when the trace began
+        end_time:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the trace finished, if completed
+      additionalProperties: false
+      required:
+        - trace_id
+        - root_span_id
+        - start_time
+      title: Trace
+      description: >-
+        A trace representing the complete execution path of a request across multiple
+        operations.
+    Checkpoint:
+      type: object
+      properties:
+        identifier:
+          type: string
+          description: Unique identifier for the checkpoint
+        created_at:
+          type: string
+          format: date-time
+          description: >-
+            Timestamp when the checkpoint was created
+        epoch:
+          type: integer
+          description: >-
+            Training epoch when the checkpoint was saved
+        post_training_job_id:
+          type: string
+          description: >-
+            Identifier of the training job that created this checkpoint
+        path:
+          type: string
+          description: >-
+            File system path where the checkpoint is stored
+        training_metrics:
+          $ref: '#/components/schemas/PostTrainingMetric'
+          description: >-
+            (Optional) Training metrics associated with this checkpoint
+      additionalProperties: false
+      required:
+        - identifier
+        - created_at
+        - epoch
+        - post_training_job_id
+        - path
+      title: Checkpoint
+      description: Checkpoint created during training runs.
+    PostTrainingJobArtifactsResponse:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+          description: Unique identifier for the training job
+        checkpoints:
+          type: array
+          items:
+            $ref: '#/components/schemas/Checkpoint'
+          description: >-
+            List of model checkpoints created during training
+      additionalProperties: false
+      required:
+        - job_uuid
+        - checkpoints
+      title: PostTrainingJobArtifactsResponse
+      description: Artifacts of a finetuning job.
+    PostTrainingMetric:
+      type: object
+      properties:
+        epoch:
+          type: integer
+          description: Training epoch number
+        train_loss:
+          type: number
+          description: Loss value on the training dataset
+        validation_loss:
+          type: number
+          description: Loss value on the validation dataset
+        perplexity:
+          type: number
+          description: >-
+            Perplexity metric indicating model confidence
+      additionalProperties: false
+      required:
+        - epoch
+        - train_loss
+        - validation_loss
+        - perplexity
+      title: PostTrainingMetric
+      description: >-
+        Training metrics captured during post-training jobs.
+    PostTrainingJobStatusResponse:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+          description: Unique identifier for the training job
+        status:
+          type: string
+          enum:
+            - completed
+            - in_progress
+            - failed
+            - scheduled
+            - cancelled
+          description: Current status of the training job
+        scheduled_at:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the job was scheduled
+        started_at:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the job execution began
+        completed_at:
+          type: string
+          format: date-time
+          description: >-
+            (Optional) Timestamp when the job finished, if completed
+        resources_allocated:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Information about computational resources allocated to the
+            job
+        checkpoints:
+          type: array
+          items:
+            $ref: '#/components/schemas/Checkpoint'
+          description: >-
+            List of model checkpoints created during training
+      additionalProperties: false
+      required:
+        - job_uuid
+        - status
+        - checkpoints
+      title: PostTrainingJobStatusResponse
+      description: Status of a finetuning job.
+    ListPostTrainingJobsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              job_uuid:
+                type: string
+            additionalProperties: false
+            required:
+              - job_uuid
+            title: PostTrainingJob
+      additionalProperties: false
+      required:
+        - data
+      title: ListPostTrainingJobsResponse
+    VectorDB:
+      type: object
+      properties:
+        identifier:
+          type: string
+        provider_resource_id:
+          type: string
+        provider_id:
+          type: string
+        type:
+          type: string
+          enum:
+            - model
+            - shield
+            - vector_db
+            - dataset
+            - scoring_function
+            - benchmark
+            - tool
+            - tool_group
+            - prompt
+          const: vector_db
+          default: vector_db
+          description: >-
+            Type of resource, always 'vector_db' for vector databases
+        embedding_model:
+          type: string
+          description: >-
+            Name of the embedding model to use for vector generation
+        embedding_dimension:
+          type: integer
+          description: Dimension of the embedding vectors
+        vector_db_name:
+          type: string
+      additionalProperties: false
+      required:
+        - identifier
+        - provider_id
+        - type
+        - embedding_model
+        - embedding_dimension
+      title: VectorDB
+      description: >-
+        Vector database resource for storing and querying vector embeddings.
+    HealthInfo:
+      type: object
+      properties:
+        status:
+          type: string
+          enum:
+            - OK
+            - Error
+            - Not Implemented
+          description: Current health status of the service
+      additionalProperties: false
+      required:
+        - status
+      title: HealthInfo
+      description: >-
+        Health status information for the service.
+    RAGDocument:
+      type: object
+      properties:
+        document_id:
+          type: string
+          description: The unique identifier for the document.
+        content:
+          oneOf:
+            - type: string
+            - $ref: '#/components/schemas/InterleavedContentItem'
+            - type: array
+              items:
+                $ref: '#/components/schemas/InterleavedContentItem'
+            - $ref: '#/components/schemas/URL'
+          description: The content of the document.
+        mime_type:
+          type: string
+          description: The MIME type of the document.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Additional metadata for the document.
+      additionalProperties: false
+      required:
+        - document_id
+        - content
+        - metadata
+      title: RAGDocument
+      description: >-
+        A document to be used for document ingestion in the RAG Tool.
+    InsertRequest:
+      type: object
+      properties:
+        documents:
+          type: array
+          items:
+            $ref: '#/components/schemas/RAGDocument'
+          description: >-
+            List of documents to index in the RAG system
+        vector_db_id:
+          type: string
+          description: >-
+            ID of the vector database to store the document embeddings
+        chunk_size_in_tokens:
+          type: integer
+          description: >-
+            (Optional) Size in tokens for document chunking during indexing
+      additionalProperties: false
+      required:
+        - documents
+        - vector_db_id
+        - chunk_size_in_tokens
+      title: InsertRequest
+    Chunk:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The content of the chunk, which can be interleaved text, images, or other
+            types.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Metadata associated with the chunk that will be used in the model context
+            during inference.
+        embedding:
+          type: array
+          items:
+            type: number
+          description: >-
+            Optional embedding for the chunk. If not provided, it will be computed
+            later.
+        stored_chunk_id:
+          type: string
+          description: >-
+            The chunk ID that is stored in the vector database. Used for backend functionality.
+        chunk_metadata:
+          $ref: '#/components/schemas/ChunkMetadata'
+          description: >-
+            Metadata for the chunk that will NOT be used in the context during inference.
+            The `chunk_metadata` is required backend functionality.
+      additionalProperties: false
+      required:
+        - content
+        - metadata
+      title: Chunk
+      description: >-
+        A chunk of content that can be inserted into a vector database.
+    ChunkMetadata:
+      type: object
+      properties:
+        chunk_id:
+          type: string
+          description: >-
+            The ID of the chunk. If not set, it will be generated based on the document
+            ID and content.
+        document_id:
+          type: string
+          description: >-
+            The ID of the document this chunk belongs to.
+        source:
+          type: string
+          description: >-
+            The source of the content, such as a URL, file path, or other identifier.
+        created_timestamp:
+          type: integer
+          description: >-
+            An optional timestamp indicating when the chunk was created.
+        updated_timestamp:
+          type: integer
+          description: >-
+            An optional timestamp indicating when the chunk was last updated.
+        chunk_window:
+          type: string
+          description: >-
+            The window of the chunk, which can be used to group related chunks together.
+        chunk_tokenizer:
+          type: string
+          description: >-
+            The tokenizer used to create the chunk. Default is Tiktoken.
+        chunk_embedding_model:
+          type: string
+          description: >-
+            The embedding model used to create the chunk's embedding.
+        chunk_embedding_dimension:
+          type: integer
+          description: >-
+            The dimension of the embedding vector for the chunk.
+        content_token_count:
+          type: integer
+          description: >-
+            The number of tokens in the content of the chunk.
+        metadata_token_count:
+          type: integer
+          description: >-
+            The number of tokens in the metadata of the chunk.
+      additionalProperties: false
+      title: ChunkMetadata
+      description: >-
+        `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional
+        information about the chunk that     will not be used in the context during
+        inference, but is required for backend functionality. The `ChunkMetadata`     is
+        set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not
+        expected to change after.     Use `Chunk.metadata` for metadata that will
+        be used in the context during inference.
+    InsertChunksRequest:
+      type: object
+      properties:
+        vector_db_id:
+          type: string
+          description: >-
+            The identifier of the vector database to insert the chunks into.
+        chunks:
+          type: array
+          items:
+            $ref: '#/components/schemas/Chunk'
+          description: >-
+            The chunks to insert. Each `Chunk` should contain content which can be
+            interleaved text, images, or other types. `metadata`: `dict[str, Any]`
+            and `embedding`: `List[float]` are optional. If `metadata` is provided,
+            you configure how Llama Stack formats the chunk during generation. If
+            `embedding` is not provided, it will be computed later.
+        ttl_seconds:
+          type: integer
+          description: The time to live of the chunks.
+      additionalProperties: false
+      required:
+        - vector_db_id
+        - chunks
+      title: InsertChunksRequest
+    ProviderInfo:
+      type: object
+      properties:
+        api:
+          type: string
+          description: The API name this provider implements
+        provider_id:
+          type: string
+          description: Unique identifier for the provider
+        provider_type:
+          type: string
+          description: The type of provider implementation
+        config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Configuration parameters for the provider
+        health:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Current health status of the provider
+      additionalProperties: false
+      required:
+        - api
+        - provider_id
+        - provider_type
+        - config
+        - health
+      title: ProviderInfo
+      description: >-
+        Information about a registered provider including its configuration and health
+        status.
+    InvokeToolRequest:
+      type: object
+      properties:
+        tool_name:
+          type: string
+          description: The name of the tool to invoke.
+        kwargs:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool.
+      additionalProperties: false
+      required:
+        - tool_name
+        - kwargs
+      title: InvokeToolRequest
+    ToolInvocationResult:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            (Optional) The output content from the tool execution
+        error_message:
+          type: string
+          description: >-
+            (Optional) Error message if the tool execution failed
+        error_code:
+          type: integer
+          description: >-
+            (Optional) Numeric error code if the tool execution failed
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Additional metadata about the tool execution
+      additionalProperties: false
+      title: ToolInvocationResult
+      description: Result of a tool invocation.
+    PaginatedResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: The list of items for the current page
+        has_more:
+          type: boolean
+          description: >-
+            Whether there are more items available after this set
+        url:
+          type: string
+          description: The URL for accessing this list
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+      title: PaginatedResponse
+      description: >-
+        A generic paginated response that follows a simple format.
+    Job:
+      type: object
+      properties:
+        job_id:
+          type: string
+          description: Unique identifier for the job
+        status:
+          type: string
+          enum:
+            - completed
+            - in_progress
+            - failed
+            - scheduled
+            - cancelled
+          description: Current execution status of the job
+      additionalProperties: false
+      required:
+        - job_id
+        - status
+      title: Job
+      description: >-
+        A job execution instance with status tracking.
+    ListBenchmarksResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Benchmark'
+      additionalProperties: false
+      required:
+        - data
+      title: ListBenchmarksResponse
+    Order:
+      type: string
+      enum:
+        - asc
+        - desc
+      title: Order
+      description: Sort order for paginated responses.
+    ListOpenAIChatCompletionResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            properties:
+              id:
+                type: string
+                description: The ID of the chat completion
+              choices:
+                type: array
+                items:
+                  $ref: '#/components/schemas/OpenAIChoice'
+                description: List of choices
+              object:
+                type: string
+                const: chat.completion
+                default: chat.completion
+                description: >-
+                  The object type, which will be "chat.completion"
+              created:
+                type: integer
+                description: >-
+                  The Unix timestamp in seconds when the chat completion was created
+              model:
+                type: string
+                description: >-
+                  The model that was used to generate the chat completion
+              input_messages:
+                type: array
+                items:
+                  $ref: '#/components/schemas/OpenAIMessageParam'
+            additionalProperties: false
+            required:
+              - id
+              - choices
+              - object
+              - created
+              - model
+              - input_messages
+            title: OpenAICompletionWithInputMessages
+          description: >-
+            List of chat completion objects with their input messages
+        has_more:
+          type: boolean
+          description: >-
+            Whether there are more completions available beyond this list
+        first_id:
+          type: string
+          description: ID of the first completion in this list
+        last_id:
+          type: string
+          description: ID of the last completion in this list
+        object:
+          type: string
+          const: list
+          default: list
+          description: >-
+            Must be "list" to identify this as a list response
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+        - first_id
+        - last_id
+        - object
+      title: ListOpenAIChatCompletionResponse
+      description: >-
+        Response from listing OpenAI-compatible chat completions.
+    ListDatasetsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Dataset'
+          description: List of datasets
+      additionalProperties: false
+      required:
+        - data
+      title: ListDatasetsResponse
+      description: Response from listing datasets.
+    ListModelsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Model'
+      additionalProperties: false
+      required:
+        - data
+      title: ListModelsResponse
+    ListOpenAIResponseInputItem:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+          description: List of input items
+        object:
+          type: string
+          const: list
+          default: list
+          description: Object type identifier, always "list"
+      additionalProperties: false
+      required:
+        - data
+        - object
+      title: ListOpenAIResponseInputItem
+      description: >-
+        List container for OpenAI response input items.
+    ListOpenAIResponseObject:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseObjectWithInput'
+          description: >-
+            List of response objects with their input context
+        has_more:
+          type: boolean
+          description: >-
+            Whether there are more results available beyond this page
+        first_id:
+          type: string
+          description: >-
+            Identifier of the first item in this page
+        last_id:
+          type: string
+          description: Identifier of the last item in this page
+        object:
+          type: string
+          const: list
+          default: list
+          description: Object type identifier, always "list"
+      additionalProperties: false
+      required:
+        - data
+        - has_more
+        - first_id
+        - last_id
+        - object
+      title: ListOpenAIResponseObject
+      description: >-
+        Paginated list of OpenAI response objects with navigation metadata.
+    OpenAIResponseObjectWithInput:
+      type: object
+      properties:
+        created_at:
+          type: integer
+          description: >-
+            Unix timestamp when the response was created
+        error:
+          $ref: '#/components/schemas/OpenAIResponseError'
+          description: >-
+            (Optional) Error details if the response generation failed
+        id:
+          type: string
+          description: Unique identifier for this response
+        model:
+          type: string
+          description: Model identifier used for generation
+        object:
+          type: string
+          const: response
+          default: response
+          description: >-
+            Object type identifier, always "response"
+        output:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseOutput'
+          description: >-
+            List of generated output items (messages, tool calls, etc.)
+        parallel_tool_calls:
+          type: boolean
+          default: false
+          description: >-
+            Whether tool calls can be executed in parallel
+        previous_response_id:
+          type: string
+          description: >-
+            (Optional) ID of the previous response in a conversation
+        status:
+          type: string
+          description: >-
+            Current status of the response generation
+        temperature:
+          type: number
+          description: >-
+            (Optional) Sampling temperature used for generation
+        text:
+          $ref: '#/components/schemas/OpenAIResponseText'
+          description: >-
+            Text formatting configuration for the response
+        top_p:
+          type: number
+          description: >-
+            (Optional) Nucleus sampling parameter used for generation
+        truncation:
+          type: string
+          description: >-
+            (Optional) Truncation strategy applied to the response
+        user:
+          type: string
+          description: >-
+            (Optional) User identifier associated with the request
+        input:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIResponseInput'
+          description: >-
+            List of input items that led to this response
+      additionalProperties: false
+      required:
+        - created_at
+        - id
+        - model
+        - object
+        - output
+        - parallel_tool_calls
+        - status
+        - text
+        - input
+      title: OpenAIResponseObjectWithInput
+      description: >-
+        OpenAI response object extended with input context information.
+    ListPromptsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Prompt'
+      additionalProperties: false
+      required:
+        - data
+      title: ListPromptsResponse
+      description: Response model to list prompts.
+    ListProvidersResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ProviderInfo'
+          description: List of provider information objects
+      additionalProperties: false
+      required:
+        - data
+      title: ListProvidersResponse
+      description: >-
+        Response containing a list of all available providers.
+    RouteInfo:
+      type: object
+      properties:
+        route:
+          type: string
+          description: The API endpoint path
+        method:
+          type: string
+          description: HTTP method for the route
+        provider_types:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of provider types that implement this route
+      additionalProperties: false
+      required:
+        - route
+        - method
+        - provider_types
+      title: RouteInfo
+      description: >-
+        Information about an API route including its path, method, and implementing
+        providers.
+    ListRoutesResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/RouteInfo'
+          description: >-
+            List of available route information objects
+      additionalProperties: false
+      required:
+        - data
+      title: ListRoutesResponse
+      description: >-
+        Response containing a list of all available API routes.
+    ListToolDefsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolDef'
+          description: List of tool definitions
+      additionalProperties: false
+      required:
+        - data
+      title: ListToolDefsResponse
+      description: >-
+        Response containing a list of tool definitions.
+    ListScoringFunctionsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ScoringFn'
+      additionalProperties: false
+      required:
+        - data
+      title: ListScoringFunctionsResponse
+    ListShieldsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Shield'
+      additionalProperties: false
+      required:
+        - data
+      title: ListShieldsResponse
+    ListToolGroupsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolGroup'
+          description: List of tool groups
+      additionalProperties: false
+      required:
+        - data
+      title: ListToolGroupsResponse
+      description: >-
+        Response containing a list of tool groups.
+    ListToolsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Tool'
+          description: List of tools
+      additionalProperties: false
+      required:
+        - data
+      title: ListToolsResponse
+      description: Response containing a list of tools.
+    ListVectorDBsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/VectorDB'
+          description: List of vector databases
+      additionalProperties: false
+      required:
+        - data
+      title: ListVectorDBsResponse
+      description: Response from listing vector databases.
+    Event:
+      oneOf:
+        - $ref: '#/components/schemas/UnstructuredLogEvent'
+        - $ref: '#/components/schemas/MetricEvent'
+        - $ref: '#/components/schemas/StructuredLogEvent'
+      discriminator:
+        propertyName: type
+        mapping:
+          unstructured_log: '#/components/schemas/UnstructuredLogEvent'
+          metric: '#/components/schemas/MetricEvent'
+          structured_log: '#/components/schemas/StructuredLogEvent'
+    EventType:
+      type: string
+      enum:
+        - unstructured_log
+        - structured_log
+        - metric
+      title: EventType
+      description: >-
+        The type of telemetry event being logged.
+    LogSeverity:
+      type: string
+      enum:
+        - verbose
+        - debug
+        - info
+        - warn
+        - error
+        - critical
+      title: LogSeverity
+      description: The severity level of a log message.
+    MetricEvent:
+      type: object
+      properties:
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this event belongs to
+        span_id:
+          type: string
+          description: >-
+            Unique identifier for the span this event belongs to
+        timestamp:
+          type: string
+          format: date-time
+          description: Timestamp when the event occurred
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the event
+        type:
+          $ref: '#/components/schemas/EventType'
+          const: metric
+          default: metric
+          description: Event type identifier set to METRIC
+        metric:
+          type: string
+          description: The name of the metric being measured
+        value:
+          oneOf:
+            - type: integer
+            - type: number
+          description: >-
+            The numeric value of the metric measurement
+        unit:
+          type: string
+          description: >-
+            The unit of measurement for the metric value
+      additionalProperties: false
+      required:
+        - trace_id
+        - span_id
+        - timestamp
+        - type
+        - metric
+        - value
+        - unit
+      title: MetricEvent
+      description: >-
+        A metric event containing a measured value.
+    SpanEndPayload:
+      type: object
+      properties:
+        type:
+          $ref: '#/components/schemas/StructuredLogType'
+          const: span_end
+          default: span_end
+          description: Payload type identifier set to SPAN_END
+        status:
+          $ref: '#/components/schemas/SpanStatus'
+          description: >-
+            The final status of the span indicating success or failure
+      additionalProperties: false
+      required:
+        - type
+        - status
+      title: SpanEndPayload
+      description: Payload for a span end event.
+    SpanStartPayload:
+      type: object
+      properties:
+        type:
+          $ref: '#/components/schemas/StructuredLogType'
+          const: span_start
+          default: span_start
+          description: >-
+            Payload type identifier set to SPAN_START
+        name:
+          type: string
+          description: >-
+            Human-readable name describing the operation this span represents
+        parent_span_id:
+          type: string
+          description: >-
+            (Optional) Unique identifier for the parent span, if this is a child span
+      additionalProperties: false
+      required:
+        - type
+        - name
+      title: SpanStartPayload
+      description: Payload for a span start event.
+    StructuredLogEvent:
+      type: object
+      properties:
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this event belongs to
+        span_id:
+          type: string
+          description: >-
+            Unique identifier for the span this event belongs to
+        timestamp:
+          type: string
+          format: date-time
+          description: Timestamp when the event occurred
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the event
+        type:
+          $ref: '#/components/schemas/EventType'
+          const: structured_log
+          default: structured_log
+          description: >-
+            Event type identifier set to STRUCTURED_LOG
+        payload:
+          $ref: '#/components/schemas/StructuredLogPayload'
+          description: >-
+            The structured payload data for the log event
+      additionalProperties: false
+      required:
+        - trace_id
+        - span_id
+        - timestamp
+        - type
+        - payload
+      title: StructuredLogEvent
+      description: >-
+        A structured log event containing typed payload data.
+    StructuredLogPayload:
+      oneOf:
+        - $ref: '#/components/schemas/SpanStartPayload'
+        - $ref: '#/components/schemas/SpanEndPayload'
+      discriminator:
+        propertyName: type
+        mapping:
+          span_start: '#/components/schemas/SpanStartPayload'
+          span_end: '#/components/schemas/SpanEndPayload'
+    StructuredLogType:
+      type: string
+      enum:
+        - span_start
+        - span_end
+      title: StructuredLogType
+      description: >-
+        The type of structured log event payload.
+    UnstructuredLogEvent:
+      type: object
+      properties:
+        trace_id:
+          type: string
+          description: >-
+            Unique identifier for the trace this event belongs to
+        span_id:
+          type: string
+          description: >-
+            Unique identifier for the span this event belongs to
+        timestamp:
+          type: string
+          format: date-time
+          description: Timestamp when the event occurred
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: string
+              - type: integer
+              - type: number
+              - type: boolean
+              - type: 'null'
+          description: >-
+            (Optional) Key-value pairs containing additional metadata about the event
+        type:
+          $ref: '#/components/schemas/EventType'
+          const: unstructured_log
+          default: unstructured_log
+          description: >-
+            Event type identifier set to UNSTRUCTURED_LOG
+        message:
+          type: string
+          description: The log message text
+        severity:
+          $ref: '#/components/schemas/LogSeverity'
+          description: The severity level of the log message
+      additionalProperties: false
+      required:
+        - trace_id
+        - span_id
+        - timestamp
+        - type
+        - message
+        - severity
+      title: UnstructuredLogEvent
+      description: >-
+        An unstructured log event containing a simple text message.
+    LogEventRequest:
+      type: object
+      properties:
+        event:
+          $ref: '#/components/schemas/Event'
+          description: The event to log.
+        ttl_seconds:
+          type: integer
+          description: The time to live of the event.
+      additionalProperties: false
+      required:
+        - event
+        - ttl_seconds
+      title: LogEventRequest
+    VectorStoreChunkingStrategy:
+      oneOf:
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+        - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+      discriminator:
+        propertyName: type
+        mapping:
+          auto: '#/components/schemas/VectorStoreChunkingStrategyAuto'
+          static: '#/components/schemas/VectorStoreChunkingStrategyStatic'
+    VectorStoreChunkingStrategyAuto:
+      type: object
+      properties:
+        type:
+          type: string
+          const: auto
+          default: auto
+          description: >-
+            Strategy type, always "auto" for automatic chunking
+      additionalProperties: false
+      required:
+        - type
+      title: VectorStoreChunkingStrategyAuto
+      description: >-
+        Automatic chunking strategy for vector store files.
+    VectorStoreChunkingStrategyStatic:
+      type: object
+      properties:
+        type:
+          type: string
+          const: static
+          default: static
+          description: >-
+            Strategy type, always "static" for static chunking
+        static:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig'
+          description: >-
+            Configuration parameters for the static chunking strategy
+      additionalProperties: false
+      required:
+        - type
+        - static
+      title: VectorStoreChunkingStrategyStatic
+      description: >-
+        Static chunking strategy with configurable parameters.
+    VectorStoreChunkingStrategyStaticConfig:
+      type: object
+      properties:
+        chunk_overlap_tokens:
+          type: integer
+          default: 400
+          description: >-
+            Number of tokens to overlap between adjacent chunks
+        max_chunk_size_tokens:
+          type: integer
+          default: 800
+          description: >-
+            Maximum number of tokens per chunk, must be between 100 and 4096
+      additionalProperties: false
+      required:
+        - chunk_overlap_tokens
+        - max_chunk_size_tokens
+      title: VectorStoreChunkingStrategyStaticConfig
+      description: >-
+        Configuration for static chunking strategy.
+    OpenaiAttachFileToVectorStoreRequest:
+      type: object
+      properties:
+        file_id:
+          type: string
+          description: >-
+            The ID of the file to attach to the vector store.
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The key-value attributes stored with the file, which can be used for filtering.
+        chunking_strategy:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
+          description: >-
+            The chunking strategy to use for the file.
+      additionalProperties: false
+      required:
+        - file_id
+      title: OpenaiAttachFileToVectorStoreRequest
+    VectorStoreFileLastError:
+      type: object
+      properties:
+        code:
+          oneOf:
+            - type: string
+              const: server_error
+            - type: string
+              const: rate_limit_exceeded
+          description: >-
+            Error code indicating the type of failure
+        message:
+          type: string
+          description: >-
+            Human-readable error message describing the failure
+      additionalProperties: false
+      required:
+        - code
+        - message
+      title: VectorStoreFileLastError
+      description: >-
+        Error information for failed vector store file processing.
+    VectorStoreFileObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: Unique identifier for the file
+        object:
+          type: string
+          default: vector_store.file
+          description: >-
+            Object type identifier, always "vector_store.file"
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Key-value attributes associated with the file
+        chunking_strategy:
+          $ref: '#/components/schemas/VectorStoreChunkingStrategy'
+          description: >-
+            Strategy used for splitting the file into chunks
+        created_at:
+          type: integer
+          description: >-
+            Timestamp when the file was added to the vector store
+        last_error:
+          $ref: '#/components/schemas/VectorStoreFileLastError'
+          description: >-
+            (Optional) Error information if file processing failed
+        status:
+          $ref: '#/components/schemas/VectorStoreFileStatus'
+          description: Current processing status of the file
+        usage_bytes:
+          type: integer
+          default: 0
+          description: Storage space used by this file in bytes
+        vector_store_id:
+          type: string
+          description: >-
+            ID of the vector store containing this file
+      additionalProperties: false
+      required:
+        - id
+        - object
+        - attributes
+        - chunking_strategy
+        - created_at
+        - status
+        - usage_bytes
+        - vector_store_id
+      title: VectorStoreFileObject
+      description: OpenAI Vector Store File object.
+    VectorStoreFileStatus:
+      oneOf:
+        - type: string
+          const: completed
+        - type: string
+          const: in_progress
+        - type: string
+          const: cancelled
+        - type: string
+          const: failed
+>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
     OpenAIJSONSchema:
       type: object
       properties:
@@ -9582,6 +11706,1606 @@ components:
       title: VectorStoreSearchResponsePage
       description: >-
         Paginated response from searching a vector store.
+<<<<<<< HEAD
+=======
+    OpenaiUpdateVectorStoreRequest:
+      type: object
+      properties:
+        name:
+          type: string
+          description: The name of the vector store.
+        expires_after:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The expiration policy for a vector store.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Set of 16 key-value pairs that can be attached to an object.
+      additionalProperties: false
+      title: OpenaiUpdateVectorStoreRequest
+    OpenaiUpdateVectorStoreFileRequest:
+      type: object
+      properties:
+        attributes:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The updated key-value attributes to store with the file.
+      additionalProperties: false
+      required:
+        - attributes
+      title: OpenaiUpdateVectorStoreFileRequest
+    DPOAlignmentConfig:
+      type: object
+      properties:
+        beta:
+          type: number
+          description: Temperature parameter for the DPO loss
+        loss_type:
+          $ref: '#/components/schemas/DPOLossType'
+          default: sigmoid
+          description: The type of loss function to use for DPO
+      additionalProperties: false
+      required:
+        - beta
+        - loss_type
+      title: DPOAlignmentConfig
+      description: >-
+        Configuration for Direct Preference Optimization (DPO) alignment.
+    DPOLossType:
+      type: string
+      enum:
+        - sigmoid
+        - hinge
+        - ipo
+        - kto_pair
+      title: DPOLossType
+    DataConfig:
+      type: object
+      properties:
+        dataset_id:
+          type: string
+          description: >-
+            Unique identifier for the training dataset
+        batch_size:
+          type: integer
+          description: Number of samples per training batch
+        shuffle:
+          type: boolean
+          description: >-
+            Whether to shuffle the dataset during training
+        data_format:
+          $ref: '#/components/schemas/DatasetFormat'
+          description: >-
+            Format of the dataset (instruct or dialog)
+        validation_dataset_id:
+          type: string
+          description: >-
+            (Optional) Unique identifier for the validation dataset
+        packed:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to pack multiple samples into a single sequence for
+            efficiency
+        train_on_input:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to compute loss on input tokens as well as output tokens
+      additionalProperties: false
+      required:
+        - dataset_id
+        - batch_size
+        - shuffle
+        - data_format
+      title: DataConfig
+      description: >-
+        Configuration for training data and data loading.
+    DatasetFormat:
+      type: string
+      enum:
+        - instruct
+        - dialog
+      title: DatasetFormat
+      description: Format of the training dataset.
+    EfficiencyConfig:
+      type: object
+      properties:
+        enable_activation_checkpointing:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to use activation checkpointing to reduce memory usage
+        enable_activation_offloading:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to offload activations to CPU to save GPU memory
+        memory_efficient_fsdp_wrap:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to use memory-efficient FSDP wrapping
+        fsdp_cpu_offload:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to offload FSDP parameters to CPU
+      additionalProperties: false
+      title: EfficiencyConfig
+      description: >-
+        Configuration for memory and compute efficiency optimizations.
+    OptimizerConfig:
+      type: object
+      properties:
+        optimizer_type:
+          $ref: '#/components/schemas/OptimizerType'
+          description: >-
+            Type of optimizer to use (adam, adamw, or sgd)
+        lr:
+          type: number
+          description: Learning rate for the optimizer
+        weight_decay:
+          type: number
+          description: >-
+            Weight decay coefficient for regularization
+        num_warmup_steps:
+          type: integer
+          description: Number of steps for learning rate warmup
+      additionalProperties: false
+      required:
+        - optimizer_type
+        - lr
+        - weight_decay
+        - num_warmup_steps
+      title: OptimizerConfig
+      description: >-
+        Configuration parameters for the optimization algorithm.
+    OptimizerType:
+      type: string
+      enum:
+        - adam
+        - adamw
+        - sgd
+      title: OptimizerType
+      description: >-
+        Available optimizer algorithms for training.
+    TrainingConfig:
+      type: object
+      properties:
+        n_epochs:
+          type: integer
+          description: Number of training epochs to run
+        max_steps_per_epoch:
+          type: integer
+          default: 1
+          description: Maximum number of steps to run per epoch
+        gradient_accumulation_steps:
+          type: integer
+          default: 1
+          description: >-
+            Number of steps to accumulate gradients before updating
+        max_validation_steps:
+          type: integer
+          default: 1
+          description: >-
+            (Optional) Maximum number of validation steps per epoch
+        data_config:
+          $ref: '#/components/schemas/DataConfig'
+          description: >-
+            (Optional) Configuration for data loading and formatting
+        optimizer_config:
+          $ref: '#/components/schemas/OptimizerConfig'
+          description: >-
+            (Optional) Configuration for the optimization algorithm
+        efficiency_config:
+          $ref: '#/components/schemas/EfficiencyConfig'
+          description: >-
+            (Optional) Configuration for memory and compute optimizations
+        dtype:
+          type: string
+          default: bf16
+          description: >-
+            (Optional) Data type for model parameters (bf16, fp16, fp32)
+      additionalProperties: false
+      required:
+        - n_epochs
+        - max_steps_per_epoch
+        - gradient_accumulation_steps
+      title: TrainingConfig
+      description: >-
+        Comprehensive configuration for the training process.
+    PreferenceOptimizeRequest:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+          description: The UUID of the job to create.
+        finetuned_model:
+          type: string
+          description: The model to fine-tune.
+        algorithm_config:
+          $ref: '#/components/schemas/DPOAlignmentConfig'
+          description: The algorithm configuration.
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+          description: The training configuration.
+        hyperparam_search_config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The hyperparam search configuration.
+        logger_config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The logger configuration.
+      additionalProperties: false
+      required:
+        - job_uuid
+        - finetuned_model
+        - algorithm_config
+        - training_config
+        - hyperparam_search_config
+        - logger_config
+      title: PreferenceOptimizeRequest
+    PostTrainingJob:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+      additionalProperties: false
+      required:
+        - job_uuid
+      title: PostTrainingJob
+    DefaultRAGQueryGeneratorConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: default
+          default: default
+          description: >-
+            Type of query generator, always 'default'
+        separator:
+          type: string
+          default: ' '
+          description: >-
+            String separator used to join query terms
+      additionalProperties: false
+      required:
+        - type
+        - separator
+      title: DefaultRAGQueryGeneratorConfig
+      description: >-
+        Configuration for the default RAG query generator.
+    LLMRAGQueryGeneratorConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: llm
+          default: llm
+          description: Type of query generator, always 'llm'
+        model:
+          type: string
+          description: >-
+            Name of the language model to use for query generation
+        template:
+          type: string
+          description: >-
+            Template string for formatting the query generation prompt
+      additionalProperties: false
+      required:
+        - type
+        - model
+        - template
+      title: LLMRAGQueryGeneratorConfig
+      description: >-
+        Configuration for the LLM-based RAG query generator.
+    RAGQueryConfig:
+      type: object
+      properties:
+        query_generator_config:
+          $ref: '#/components/schemas/RAGQueryGeneratorConfig'
+          description: Configuration for the query generator.
+        max_tokens_in_context:
+          type: integer
+          default: 4096
+          description: Maximum number of tokens in the context.
+        max_chunks:
+          type: integer
+          default: 5
+          description: Maximum number of chunks to retrieve.
+        chunk_template:
+          type: string
+          default: >
+            Result {index}
+
+            Content: {chunk.content}
+
+            Metadata: {metadata}
+          description: >-
+            Template for formatting each retrieved chunk in the context. Available
+            placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
+            content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent:
+            {chunk.content}\nMetadata: {metadata}\n"
+        mode:
+          $ref: '#/components/schemas/RAGSearchMode'
+          default: vector
+          description: >-
+            Search mode for retrieval—either "vector", "keyword", or "hybrid". Default
+            "vector".
+        ranker:
+          $ref: '#/components/schemas/Ranker'
+          description: >-
+            Configuration for the ranker to use in hybrid search. Defaults to RRF
+            ranker.
+      additionalProperties: false
+      required:
+        - query_generator_config
+        - max_tokens_in_context
+        - max_chunks
+        - chunk_template
+      title: RAGQueryConfig
+      description: >-
+        Configuration for the RAG query generation.
+    RAGQueryGeneratorConfig:
+      oneOf:
+        - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+        - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+      discriminator:
+        propertyName: type
+        mapping:
+          default: '#/components/schemas/DefaultRAGQueryGeneratorConfig'
+          llm: '#/components/schemas/LLMRAGQueryGeneratorConfig'
+    RAGSearchMode:
+      type: string
+      enum:
+        - vector
+        - keyword
+        - hybrid
+      title: RAGSearchMode
+      description: >-
+        Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search
+        for semantic matching - KEYWORD: Uses keyword-based search for exact matching
+        - HYBRID: Combines both vector and keyword search for better results
+    RRFRanker:
+      type: object
+      properties:
+        type:
+          type: string
+          const: rrf
+          default: rrf
+          description: The type of ranker, always "rrf"
+        impact_factor:
+          type: number
+          default: 60.0
+          description: >-
+            The impact factor for RRF scoring. Higher values give more weight to higher-ranked
+            results. Must be greater than 0
+      additionalProperties: false
+      required:
+        - type
+        - impact_factor
+      title: RRFRanker
+      description: >-
+        Reciprocal Rank Fusion (RRF) ranker configuration.
+    Ranker:
+      oneOf:
+        - $ref: '#/components/schemas/RRFRanker'
+        - $ref: '#/components/schemas/WeightedRanker'
+      discriminator:
+        propertyName: type
+        mapping:
+          rrf: '#/components/schemas/RRFRanker'
+          weighted: '#/components/schemas/WeightedRanker'
+    WeightedRanker:
+      type: object
+      properties:
+        type:
+          type: string
+          const: weighted
+          default: weighted
+          description: The type of ranker, always "weighted"
+        alpha:
+          type: number
+          default: 0.5
+          description: >-
+            Weight factor between 0 and 1. 0 means only use keyword scores, 1 means
+            only use vector scores, values in between blend both scores.
+      additionalProperties: false
+      required:
+        - type
+        - alpha
+      title: WeightedRanker
+      description: >-
+        Weighted ranker configuration that combines vector and keyword scores.
+    QueryRequest:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            The query content to search for in the indexed documents
+        vector_db_ids:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of vector database IDs to search within
+        query_config:
+          $ref: '#/components/schemas/RAGQueryConfig'
+          description: >-
+            (Optional) Configuration parameters for the query operation
+      additionalProperties: false
+      required:
+        - content
+        - vector_db_ids
+      title: QueryRequest
+    RAGQueryResult:
+      type: object
+      properties:
+        content:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: >-
+            (Optional) The retrieved content from the query
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            Additional metadata about the query result
+      additionalProperties: false
+      required:
+        - metadata
+      title: RAGQueryResult
+      description: >-
+        Result of a RAG query containing retrieved content and metadata.
+    QueryChunksRequest:
+      type: object
+      properties:
+        vector_db_id:
+          type: string
+          description: >-
+            The identifier of the vector database to query.
+        query:
+          $ref: '#/components/schemas/InterleavedContent'
+          description: The query to search for.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the query.
+      additionalProperties: false
+      required:
+        - vector_db_id
+        - query
+      title: QueryChunksRequest
+    QueryChunksResponse:
+      type: object
+      properties:
+        chunks:
+          type: array
+          items:
+            $ref: '#/components/schemas/Chunk'
+          description: >-
+            List of content chunks returned from the query
+        scores:
+          type: array
+          items:
+            type: number
+          description: >-
+            Relevance scores corresponding to each returned chunk
+      additionalProperties: false
+      required:
+        - chunks
+        - scores
+      title: QueryChunksResponse
+      description: >-
+        Response from querying chunks in a vector database.
+    QueryMetricsRequest:
+      type: object
+      properties:
+        start_time:
+          type: integer
+          description: The start time of the metric to query.
+        end_time:
+          type: integer
+          description: The end time of the metric to query.
+        granularity:
+          type: string
+          description: The granularity of the metric to query.
+        query_type:
+          type: string
+          enum:
+            - range
+            - instant
+          description: The type of query to perform.
+        label_matchers:
+          type: array
+          items:
+            type: object
+            properties:
+              name:
+                type: string
+                description: The name of the label to match
+              value:
+                type: string
+                description: The value to match against
+              operator:
+                type: string
+                enum:
+                  - '='
+                  - '!='
+                  - =~
+                  - '!~'
+                description: >-
+                  The comparison operator to use for matching
+                default: '='
+            additionalProperties: false
+            required:
+              - name
+              - value
+              - operator
+            title: MetricLabelMatcher
+            description: >-
+              A matcher for filtering metrics by label values.
+          description: >-
+            The label matchers to apply to the metric.
+      additionalProperties: false
+      required:
+        - start_time
+        - query_type
+      title: QueryMetricsRequest
+    MetricDataPoint:
+      type: object
+      properties:
+        timestamp:
+          type: integer
+          description: >-
+            Unix timestamp when the metric value was recorded
+        value:
+          type: number
+          description: >-
+            The numeric value of the metric at this timestamp
+        unit:
+          type: string
+      additionalProperties: false
+      required:
+        - timestamp
+        - value
+        - unit
+      title: MetricDataPoint
+      description: >-
+        A single data point in a metric time series.
+    MetricLabel:
+      type: object
+      properties:
+        name:
+          type: string
+          description: The name of the label
+        value:
+          type: string
+          description: The value of the label
+      additionalProperties: false
+      required:
+        - name
+        - value
+      title: MetricLabel
+      description: A label associated with a metric.
+    MetricSeries:
+      type: object
+      properties:
+        metric:
+          type: string
+          description: The name of the metric
+        labels:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricLabel'
+          description: >-
+            List of labels associated with this metric series
+        values:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricDataPoint'
+          description: >-
+            List of data points in chronological order
+      additionalProperties: false
+      required:
+        - metric
+        - labels
+        - values
+      title: MetricSeries
+      description: A time series of metric data points.
+    QueryMetricsResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/MetricSeries'
+          description: >-
+            List of metric series matching the query criteria
+      additionalProperties: false
+      required:
+        - data
+      title: QueryMetricsResponse
+      description: >-
+        Response containing metric time series data.
+    QueryCondition:
+      type: object
+      properties:
+        key:
+          type: string
+          description: The attribute key to filter on
+        op:
+          $ref: '#/components/schemas/QueryConditionOp'
+          description: The comparison operator to apply
+        value:
+          oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          description: The value to compare against
+      additionalProperties: false
+      required:
+        - key
+        - op
+        - value
+      title: QueryCondition
+      description: A condition for filtering query results.
+    QueryConditionOp:
+      type: string
+      enum:
+        - eq
+        - ne
+        - gt
+        - lt
+      title: QueryConditionOp
+      description: >-
+        Comparison operators for query conditions.
+    QuerySpansRequest:
+      type: object
+      properties:
+        attribute_filters:
+          type: array
+          items:
+            $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the spans.
+        attributes_to_return:
+          type: array
+          items:
+            type: string
+          description: The attributes to return in the spans.
+        max_depth:
+          type: integer
+          description: The maximum depth of the tree.
+      additionalProperties: false
+      required:
+        - attribute_filters
+        - attributes_to_return
+      title: QuerySpansRequest
+    QuerySpansResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Span'
+          description: >-
+            List of spans matching the query criteria
+      additionalProperties: false
+      required:
+        - data
+      title: QuerySpansResponse
+      description: Response containing a list of spans.
+    QueryTracesRequest:
+      type: object
+      properties:
+        attribute_filters:
+          type: array
+          items:
+            $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the traces.
+        limit:
+          type: integer
+          description: The limit of traces to return.
+        offset:
+          type: integer
+          description: The offset of the traces to return.
+        order_by:
+          type: array
+          items:
+            type: string
+          description: The order by of the traces to return.
+      additionalProperties: false
+      title: QueryTracesRequest
+    QueryTracesResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Trace'
+          description: >-
+            List of traces matching the query criteria
+      additionalProperties: false
+      required:
+        - data
+      title: QueryTracesResponse
+      description: Response containing a list of traces.
+    RegisterBenchmarkRequest:
+      type: object
+      properties:
+        benchmark_id:
+          type: string
+          description: The ID of the benchmark to register.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to use for the benchmark.
+        scoring_functions:
+          type: array
+          items:
+            type: string
+          description: >-
+            The scoring functions to use for the benchmark.
+        provider_benchmark_id:
+          type: string
+          description: >-
+            The ID of the provider benchmark to use for the benchmark.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the benchmark.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The metadata to use for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_id
+        - dataset_id
+        - scoring_functions
+      title: RegisterBenchmarkRequest
+    RegisterDatasetRequest:
+      type: object
+      properties:
+        purpose:
+          type: string
+          enum:
+            - post-training/messages
+            - eval/question-answer
+            - eval/messages-answer
+          description: >-
+            The purpose of the dataset. One of: - "post-training/messages": The dataset
+            contains a messages column with list of messages for post-training. {
+            "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant",
+            "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset
+            contains a question column and an answer column for evaluation. { "question":
+            "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer":
+            The dataset contains a messages column with list of messages and an answer
+            column for evaluation. { "messages": [ {"role": "user", "content": "Hello,
+            my name is John Doe."}, {"role": "assistant", "content": "Hello, John
+            Doe. How can I help you today?"}, {"role": "user", "content": "What's
+            my name?"}, ], "answer": "John Doe" }
+        source:
+          $ref: '#/components/schemas/DataSource'
+          description: >-
+            The data source of the dataset. Ensure that the data source schema is
+            compatible with the purpose of the dataset. Examples: - { "type": "uri",
+            "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
+            "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}"
+            } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train"
+            } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content":
+            "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ]
+            } ] }
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            The metadata for the dataset. - E.g. {"description": "My dataset"}.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset. If not provided, an ID will be generated.
+      additionalProperties: false
+      required:
+        - purpose
+        - source
+      title: RegisterDatasetRequest
+    RegisterModelRequest:
+      type: object
+      properties:
+        model_id:
+          type: string
+          description: The identifier of the model to register.
+        provider_model_id:
+          type: string
+          description: >-
+            The identifier of the model in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: Any additional metadata for this model.
+        model_type:
+          $ref: '#/components/schemas/ModelType'
+          description: The type of model to register.
+      additionalProperties: false
+      required:
+        - model_id
+      title: RegisterModelRequest
+    RegisterScoringFunctionRequest:
+      type: object
+      properties:
+        scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the scoring function to register.
+        description:
+          type: string
+          description: The description of the scoring function.
+        return_type:
+          $ref: '#/components/schemas/ParamType'
+          description: The return type of the scoring function.
+        provider_scoring_fn_id:
+          type: string
+          description: >-
+            The ID of the provider scoring function to use for the scoring function.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the scoring function.
+        params:
+          $ref: '#/components/schemas/ScoringFnParams'
+          description: >-
+            The parameters for the scoring function for benchmark eval, these can
+            be overridden for app eval.
+      additionalProperties: false
+      required:
+        - scoring_fn_id
+        - description
+        - return_type
+      title: RegisterScoringFunctionRequest
+    RegisterShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: >-
+            The identifier of the shield to register.
+        provider_shield_id:
+          type: string
+          description: >-
+            The identifier of the shield in the provider.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+      title: RegisterShieldRequest
+    RegisterToolGroupRequest:
+      type: object
+      properties:
+        toolgroup_id:
+          type: string
+          description: The ID of the tool group to register.
+        provider_id:
+          type: string
+          description: >-
+            The ID of the provider to use for the tool group.
+        mcp_endpoint:
+          $ref: '#/components/schemas/URL'
+          description: >-
+            The MCP endpoint to use for the tool group.
+        args:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            A dictionary of arguments to pass to the tool group.
+      additionalProperties: false
+      required:
+        - toolgroup_id
+        - provider_id
+      title: RegisterToolGroupRequest
+    RegisterVectorDbRequest:
+      type: object
+      properties:
+        vector_db_id:
+          type: string
+          description: >-
+            The identifier of the vector database to register.
+        embedding_model:
+          type: string
+          description: The embedding model to use.
+        embedding_dimension:
+          type: integer
+          description: The dimension of the embedding model.
+        provider_id:
+          type: string
+          description: The identifier of the provider.
+        vector_db_name:
+          type: string
+          description: The name of the vector database.
+        provider_vector_db_id:
+          type: string
+          description: >-
+            The identifier of the vector database in the provider.
+      additionalProperties: false
+      required:
+        - vector_db_id
+        - embedding_model
+      title: RegisterVectorDbRequest
+    RerankRequest:
+      type: object
+      properties:
+        model:
+          type: string
+          description: >-
+            The identifier of the reranking model to use. The model must be a reranking
+            model registered with Llama Stack and available via the /models endpoint.
+        query:
+          oneOf:
+            - type: string
+            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+            - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          description: >-
+            The search query to rank items against. Can be a string, text content
+            part, or image content part. The input must not exceed the model's max
+            input token length.
+        items:
+          type: array
+          items:
+            oneOf:
+              - type: string
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
+              - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
+          description: >-
+            List of items to rerank. Each item can be a string, text content part,
+            or image content part. Each input must not exceed the model's max input
+            token length.
+        max_num_results:
+          type: integer
+          description: >-
+            (Optional) Maximum number of results to return. Default: returns all.
+      additionalProperties: false
+      required:
+        - model
+        - query
+        - items
+      title: RerankRequest
+    RerankData:
+      type: object
+      properties:
+        index:
+          type: integer
+          description: >-
+            The original index of the document in the input list
+        relevance_score:
+          type: number
+          description: >-
+            The relevance score from the model output. Values are inverted when applicable
+            so that higher scores indicate greater relevance.
+      additionalProperties: false
+      required:
+        - index
+        - relevance_score
+      title: RerankData
+      description: >-
+        A single rerank result from a reranking response.
+    RerankResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/RerankData'
+          description: >-
+            List of rerank result objects, sorted by relevance score (descending)
+      additionalProperties: false
+      required:
+        - data
+      title: RerankResponse
+      description: Response from a reranking request.
+    ResumeAgentTurnRequest:
+      type: object
+      properties:
+        tool_responses:
+          type: array
+          items:
+            $ref: '#/components/schemas/ToolResponse'
+          description: >-
+            The tool call responses to resume the turn with.
+        stream:
+          type: boolean
+          description: Whether to stream the response.
+      additionalProperties: false
+      required:
+        - tool_responses
+      title: ResumeAgentTurnRequest
+    RunEvalRequest:
+      type: object
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+          description: The configuration for the benchmark.
+      additionalProperties: false
+      required:
+        - benchmark_config
+      title: RunEvalRequest
+    RunModerationRequest:
+      type: object
+      properties:
+        input:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+          description: >-
+            Input (or inputs) to classify. Can be a single string, an array of strings,
+            or an array of multi-modal input objects similar to other models.
+        model:
+          type: string
+          description: >-
+            The content moderation model you would like to use.
+      additionalProperties: false
+      required:
+        - input
+        - model
+      title: RunModerationRequest
+    ModerationObject:
+      type: object
+      properties:
+        id:
+          type: string
+          description: >-
+            The unique identifier for the moderation request.
+        model:
+          type: string
+          description: >-
+            The model used to generate the moderation results.
+        results:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModerationObjectResults'
+          description: A list of moderation objects
+      additionalProperties: false
+      required:
+        - id
+        - model
+        - results
+      title: ModerationObject
+      description: A moderation object.
+    ModerationObjectResults:
+      type: object
+      properties:
+        flagged:
+          type: boolean
+          description: >-
+            Whether any of the below categories are flagged.
+        categories:
+          type: object
+          additionalProperties:
+            type: boolean
+          description: >-
+            A list of the categories, and whether they are flagged or not.
+        category_applied_input_types:
+          type: object
+          additionalProperties:
+            type: array
+            items:
+              type: string
+          description: >-
+            A list of the categories along with the input type(s) that the score applies
+            to.
+        category_scores:
+          type: object
+          additionalProperties:
+            type: number
+          description: >-
+            A list of the categories along with their scores as predicted by model.
+        user_message:
+          type: string
+        metadata:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+      additionalProperties: false
+      required:
+        - flagged
+        - metadata
+      title: ModerationObjectResults
+      description: A moderation object.
+    RunShieldRequest:
+      type: object
+      properties:
+        shield_id:
+          type: string
+          description: The identifier of the shield to run.
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/Message'
+          description: The messages to run the shield on.
+        params:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The parameters of the shield.
+      additionalProperties: false
+      required:
+        - shield_id
+        - messages
+        - params
+      title: RunShieldRequest
+    RunShieldResponse:
+      type: object
+      properties:
+        violation:
+          $ref: '#/components/schemas/SafetyViolation'
+          description: >-
+            (Optional) Safety violation detected by the shield, if any
+      additionalProperties: false
+      title: RunShieldResponse
+      description: Response from running a safety shield.
+    SaveSpansToDatasetRequest:
+      type: object
+      properties:
+        attribute_filters:
+          type: array
+          items:
+            $ref: '#/components/schemas/QueryCondition'
+          description: >-
+            The attribute filters to apply to the spans.
+        attributes_to_save:
+          type: array
+          items:
+            type: string
+          description: The attributes to save to the dataset.
+        dataset_id:
+          type: string
+          description: >-
+            The ID of the dataset to save the spans to.
+        max_depth:
+          type: integer
+          description: The maximum depth of the tree.
+      additionalProperties: false
+      required:
+        - attribute_filters
+        - attributes_to_save
+        - dataset_id
+      title: SaveSpansToDatasetRequest
+    ScoreRequest:
+      type: object
+      properties:
+        input_rows:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: The rows to score.
+        scoring_functions:
+          type: object
+          additionalProperties:
+            oneOf:
+              - $ref: '#/components/schemas/ScoringFnParams'
+              - type: 'null'
+          description: >-
+            The scoring functions to use for the scoring.
+      additionalProperties: false
+      required:
+        - input_rows
+        - scoring_functions
+      title: ScoreRequest
+    ScoreResponse:
+      type: object
+      properties:
+        results:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          description: >-
+            A map of scoring function name to ScoringResult.
+      additionalProperties: false
+      required:
+        - results
+      title: ScoreResponse
+      description: The response from scoring.
+    ScoreBatchRequest:
+      type: object
+      properties:
+        dataset_id:
+          type: string
+          description: The ID of the dataset to score.
+        scoring_functions:
+          type: object
+          additionalProperties:
+            oneOf:
+              - $ref: '#/components/schemas/ScoringFnParams'
+              - type: 'null'
+          description: >-
+            The scoring functions to use for the scoring.
+        save_results_dataset:
+          type: boolean
+          description: >-
+            Whether to save the results to a dataset.
+      additionalProperties: false
+      required:
+        - dataset_id
+        - scoring_functions
+        - save_results_dataset
+      title: ScoreBatchRequest
+    ScoreBatchResponse:
+      type: object
+      properties:
+        dataset_id:
+          type: string
+          description: >-
+            (Optional) The identifier of the dataset that was scored
+        results:
+          type: object
+          additionalProperties:
+            $ref: '#/components/schemas/ScoringResult'
+          description: >-
+            A map of scoring function name to ScoringResult
+      additionalProperties: false
+      required:
+        - results
+      title: ScoreBatchResponse
+      description: >-
+        Response from batch scoring operations on datasets.
+    SetDefaultVersionRequest:
+      type: object
+      properties:
+        version:
+          type: integer
+          description: The version to set as default.
+      additionalProperties: false
+      required:
+        - version
+      title: SetDefaultVersionRequest
+    AlgorithmConfig:
+      oneOf:
+        - $ref: '#/components/schemas/LoraFinetuningConfig'
+        - $ref: '#/components/schemas/QATFinetuningConfig'
+      discriminator:
+        propertyName: type
+        mapping:
+          LoRA: '#/components/schemas/LoraFinetuningConfig'
+          QAT: '#/components/schemas/QATFinetuningConfig'
+    LoraFinetuningConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: LoRA
+          default: LoRA
+          description: Algorithm type identifier, always "LoRA"
+        lora_attn_modules:
+          type: array
+          items:
+            type: string
+          description: >-
+            List of attention module names to apply LoRA to
+        apply_lora_to_mlp:
+          type: boolean
+          description: Whether to apply LoRA to MLP layers
+        apply_lora_to_output:
+          type: boolean
+          description: >-
+            Whether to apply LoRA to output projection layers
+        rank:
+          type: integer
+          description: >-
+            Rank of the LoRA adaptation (lower rank = fewer parameters)
+        alpha:
+          type: integer
+          description: >-
+            LoRA scaling parameter that controls adaptation strength
+        use_dora:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)
+        quantize_base:
+          type: boolean
+          default: false
+          description: >-
+            (Optional) Whether to quantize the base model weights
+      additionalProperties: false
+      required:
+        - type
+        - lora_attn_modules
+        - apply_lora_to_mlp
+        - apply_lora_to_output
+        - rank
+        - alpha
+      title: LoraFinetuningConfig
+      description: >-
+        Configuration for Low-Rank Adaptation (LoRA) fine-tuning.
+    QATFinetuningConfig:
+      type: object
+      properties:
+        type:
+          type: string
+          const: QAT
+          default: QAT
+          description: Algorithm type identifier, always "QAT"
+        quantizer_name:
+          type: string
+          description: >-
+            Name of the quantization algorithm to use
+        group_size:
+          type: integer
+          description: Size of groups for grouped quantization
+      additionalProperties: false
+      required:
+        - type
+        - quantizer_name
+        - group_size
+      title: QATFinetuningConfig
+      description: >-
+        Configuration for Quantization-Aware Training (QAT) fine-tuning.
+    SupervisedFineTuneRequest:
+      type: object
+      properties:
+        job_uuid:
+          type: string
+          description: The UUID of the job to create.
+        training_config:
+          $ref: '#/components/schemas/TrainingConfig'
+          description: The training configuration.
+        hyperparam_search_config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The hyperparam search configuration.
+        logger_config:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: The logger configuration.
+        model:
+          type: string
+          description: The model to fine-tune.
+        checkpoint_dir:
+          type: string
+          description: The directory to save checkpoint(s) to.
+        algorithm_config:
+          $ref: '#/components/schemas/AlgorithmConfig'
+          description: The algorithm configuration.
+      additionalProperties: false
+      required:
+        - job_uuid
+        - training_config
+        - hyperparam_search_config
+        - logger_config
+      title: SupervisedFineTuneRequest
+    SyntheticDataGenerateRequest:
+      type: object
+      properties:
+        dialogs:
+          type: array
+          items:
+            $ref: '#/components/schemas/Message'
+          description: >-
+            List of conversation messages to use as input for synthetic data generation
+        filtering_function:
+          type: string
+          enum:
+            - none
+            - random
+            - top_k
+            - top_p
+            - top_k_top_p
+            - sigmoid
+          description: >-
+            Type of filtering to apply to generated synthetic data samples
+        model:
+          type: string
+          description: >-
+            (Optional) The identifier of the model to use. The model must be registered
+            with Llama Stack and available via the /models endpoint
+      additionalProperties: false
+      required:
+        - dialogs
+        - filtering_function
+      title: SyntheticDataGenerateRequest
+    SyntheticDataGenerationResponse:
+      type: object
+      properties:
+        synthetic_data:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+          description: >-
+            List of generated synthetic data samples that passed the filtering criteria
+        statistics:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+          description: >-
+            (Optional) Statistical information about the generation process and filtering
+            results
+      additionalProperties: false
+      required:
+        - synthetic_data
+      title: SyntheticDataGenerationResponse
+      description: >-
+        Response from the synthetic data generation. Batch of (prompt, response, score)
+        tuples that pass the threshold.
+    UpdatePromptRequest:
+      type: object
+      properties:
+        prompt:
+          type: string
+          description: The updated prompt text content.
+        version:
+          type: integer
+          description: >-
+            The current version of the prompt being updated.
+        variables:
+          type: array
+          items:
+            type: string
+          description: >-
+            Updated list of variable names that can be used in the prompt template.
+        set_as_default:
+          type: boolean
+          description: >-
+            Set the new version as the default (default=True).
+      additionalProperties: false
+      required:
+        - prompt
+        - version
+        - set_as_default
+      title: UpdatePromptRequest
+>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider)
     VersionInfo:
       type: object
       properties:
diff --git a/example.py b/example.py
new file mode 100644
index 000000000..7e968e24a
--- /dev/null
+++ b/example.py
@@ -0,0 +1,257 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+
+os.environ["NVIDIA_API_KEY"] = "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"
+# Option 1: Use default NIM URL (will auto-switch to ai.api.nvidia.com for rerank)
+# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com"
+# Option 2: Use AI Foundation URL directly for rerank models
+# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com/v1"
+os.environ["NVIDIA_BASE_URL"] = "https://integrate.api.nvidia.com"
+
+import base64
+import io
+from PIL import Image
+
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+
+# # response = client.inference.completion(
+# #     model_id="meta/llama-3.1-8b-instruct",
+# #     content="Complete the sentence using one word: Roses are red, violets are :",
+# #     stream=False,
+# #     sampling_params={
+# #         "max_tokens": 50,
+# #     },
+# # )
+# # print(f"Response: {response.content}")
+
+
+# response = client.inference.chat_completion(
+#     model_id="nvidia/nvidia-nemotron-nano-9b-v2",
+#     messages=[
+#         {
+#             "role": "system",
+#             "content": "/think",
+#         },
+#         {
+#             "role": "user",
+#             "content": "How are you?",
+#         },
+#     ],
+#     stream=False,
+#     sampling_params={
+#         "max_tokens": 1024,
+#     },
+# )
+# print(f"Response: {response}")
+
+
+print(client.models.list())
+rerank_response = client.inference.rerank(
+    model="nvidia/llama-3.2-nv-rerankqa-1b-v2",
+    query="query",
+    items=[
+        "item_1",
+        "item_2",
+        "item_3",
+    ]
+)
+
+print(rerank_response)
+for i, result in enumerate(rerank_response):
+    print(f"{i+1}. [Index: {result.index}, "
+          f"Score: {(result.relevance_score):.3f}]")
+
+# # from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition
+
+# # tool_definition = ToolDefinition(
+# #     tool_name="get_weather",
+# #     description="Get current weather information for a location",
+# #     parameters={
+# #         "location": ToolParamDefinition(
+# #             param_type="string",
+# #             description="The city and state, e.g. San Francisco, CA",
+# #             required=True
+# #         ),
+# #         "unit": ToolParamDefinition(
+# #             param_type="string",
+# #             description="Temperature unit (celsius or fahrenheit)",
+# #             required=False,
+# #             default="celsius"
+# #         )
+# #     }
+# # )
+
+# # # tool_response = client.inference.chat_completion(
+# # #     model_id="meta-llama/Llama-3.1-8B-Instruct",
+# # #     messages=[
+# # #         {"role": "user", "content": "What's the weather like in San Francisco?"}
+# # #     ],
+# # #     tools=[tool_definition],
+# # # )
+
+# # # print(f"Tool Response: {tool_response.completion_message.content}")
+# # # if tool_response.completion_message.tool_calls:
+# # #     for tool_call in tool_response.completion_message.tool_calls:
+# # #         print(f"Tool Called: {tool_call.tool_name}")
+# # #         print(f"Arguments: {tool_call.arguments}")
+
+
+# # # from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType
+
+# # # person_schema = {
+# # #     "type": "object",
+# # #     "properties": {
+# # #         "name": {"type": "string"},
+# # #         "age": {"type": "integer"},
+# # #         "occupation": {"type": "string"},
+# # #     },
+# # #     "required": ["name", "age", "occupation"]
+# # # }
+
+# # # response_format = JsonSchemaResponseFormat(
+# # #     type=ResponseFormatType.json_schema,
+# # #     json_schema=person_schema
+# # # )
+
+# # # structured_response = client.inference.chat_completion(
+# # #     model_id="meta-llama/Llama-3.1-8B-Instruct",
+# # #     messages=[
+# # #         {
+# # #             "role": "user",
+# # #             "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. "
+# # #         }
+# # #     ],
+# # #     response_format=response_format,
+# # # )
+
+# # # print(f"Structured Response: {structured_response.completion_message.content}")
+
+# # # print("\n" + "="*50)
+# # # print("VISION LANGUAGE MODEL (VLM) EXAMPLE")
+# # # print("="*50)
+
+# # def load_image_as_base64(image_path):
+# #     with open(image_path, "rb") as image_file:
+# #         img_bytes = image_file.read()
+# #         return base64.b64encode(img_bytes).decode("utf-8")
+
+# # image_path = "/home/jiayin/llama-stack/docs/dog.jpg"
+# # demo_image_b64 = load_image_as_base64(image_path)
+
+# # vlm_response = client.inference.chat_completion(
+# #     model_id="nvidia/vila",
+# #     messages=[
+# #         {
+# #             "role": "user",
+# #             "content": [
+# #                 {
+# #                     "type": "image",
+# #                     "image": {
+# #                         "data": demo_image_b64,
+# #                     },
+# #                 },
+# #                 {
+# #                     "type": "text",
+# #                     "text": "Please describe what you see in this image in detail.",
+# #                 },
+# #             ],
+# #         }
+# #     ],
+# # )
+
+# # print(f"VLM Response: {vlm_response.completion_message.content}")
+
+# # # print("\n" + "="*50)
+# # # print("EMBEDDING EXAMPLE")
+# # # print("="*50)
+
+# # # # Embedding example
+# # # embedding_response = client.inference.embeddings(
+# # #     model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
+# # #     contents=["Hello world", "How are you today?"],
+# # #     task_type="query"
+# # # )
+
+# # # print(f"Number of embeddings: {len(embedding_response.embeddings)}")
+# # # print(f"Embedding dimension: {len(embedding_response.embeddings[0])}")
+# # # print(f"First few values: {embedding_response.embeddings[0][:5]}")
+
+# # # # from openai import OpenAI
+
+# # # # client = OpenAI(
+# # # #   base_url = "http://10.176.230.61:8000/v1",
+# # # #   api_key = "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"
+# # # # )
+
+# # # # # completion = client.completions.create(
+# # # # #   model="meta/llama-3.1-405b-instruct",
+# # # # #   prompt="How are you?",
+# # # # #   temperature=0.2,
+# # # # #   top_p=0.7,
+# # # # #   max_tokens=1024,
+# # # # #   stream=False
+# # # # # )
+
+# # # # # # completion = client.chat.completions.create(
+# # # # # #   model="meta/llama-3.1-8b-instruct",
+# # # # # #   messages=[{"role":"user","content":"hi"}],
+# # # # # #   temperature=0.2,
+# # # # # #   top_p=0.7,
+# # # # # #   max_tokens=1024,
+# # # # # #   stream=True
+# # # # # # )
+
+# # # # # for chunk in completion:
+# # # # #   if chunk.choices[0].delta.content is not None:
+# # # # #     print(chunk.choices[0].delta.content, end="")
+
+
+# # # # # response = client.inference.completion(
+# # # # #     model_id="meta/llama-3.1-8b-instruct",
+# # # # #     content="Complete the sentence using one word: Roses are red, violets are :",
+# # # # #     stream=False,
+# # # # #     sampling_params={
+# # # # #         "max_tokens": 50,
+# # # # #     },
+# # # # # )
+# # # # # print(f"Response: {response.content}")
+
+
+
+
+# from openai import OpenAI
+
+# client = OpenAI(
+#   base_url = "https://integrate.api.nvidia.com/v1",
+#   api_key = "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"
+# )
+
+# completion = client.chat.completions.create(
+#   model="nvidia/nvidia-nemotron-nano-9b-v2",
+#   messages=[{"role":"system","content":"/think"}],
+#   temperature=0.6,
+#   top_p=0.95,
+#   max_tokens=2048,
+#   frequency_penalty=0,
+#   presence_penalty=0,
+#   stream=True,
+#   extra_body={
+#     "min_thinking_tokens": 1024,
+#     "max_thinking_tokens": 2048
+#   }
+# )
+
+# for chunk in completion:
+#   reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None)
+#   if reasoning:
+#     print(reasoning, end="")
+#   if chunk.choices[0].delta.content is not None:
+#     print(chunk.choices[0].delta.content, end="")
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index e88a16315..e452d8157 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1016,7 +1016,7 @@ class InferenceProvider(Protocol):
     ) -> RerankResponse:
         """Rerank a list of documents based on their relevance to a query.
 
-        :param model: The identifier of the reranking model to use.
+        :param model: The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint.
         :param query: The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length.
         :param items: List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length.
         :param max_num_results: (Optional) Maximum number of results to return. Default: returns all.
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index 210ed9246..359f5bf0c 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -27,10 +27,12 @@ class ModelType(StrEnum):
     """Enumeration of supported model types in Llama Stack.
     :cvar llm: Large language model for text generation and completion
     :cvar embedding: Embedding model for converting text to vector representations
+    :cvar rerank: Reranking model for reordering documents by relevance
     """
 
     llm = "llm"
     embedding = "embedding"
+    rerank = "rerank"
 
 
 @json_schema_type
diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index c4338e614..e5826685e 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -41,9 +41,14 @@ from llama_stack.apis.inference import (
     OpenAIMessageParam,
     OpenAIResponseFormatParam,
     Order,
+    RerankResponse,
     StopReason,
     ToolPromptFormat,
 )
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
+)
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
 from llama_stack.log import get_logger
@@ -179,6 +184,25 @@ class InferenceRouter(Inference):
             raise ModelTypeError(model_id, model.model_type, expected_model_type)
         return model
 
+    async def rerank(
+        self,
+        model: str,
+        query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
+        items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
+        max_num_results: int | None = None,
+    ) -> RerankResponse:
+        """Route rerank requests to the appropriate provider based on the model."""
+        logger.debug(f"InferenceRouter.rerank: {model}")
+        model_obj = await self._get_model(model, ModelType.rerank)
+        provider = await self.routing_table.get_provider_impl(model_obj.identifier)
+        return await provider.rerank(
+            model=model_obj.identifier,
+            query=query,
+            items=items,
+            max_num_results=max_num_results,
+        )
+
+
     async def openai_completion(
         self,
         model: str,
diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py
new file mode 100644
index 000000000..a79a1c6aa
--- /dev/null
+++ b/llama_stack/providers/remote/inference/nvidia/models.py
@@ -0,0 +1,131 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.models import ModelType
+from llama_stack.models.llama.sku_types import CoreModelId
+from llama_stack.providers.utils.inference.model_registry import (
+    ProviderModelEntry,
+    build_hf_repo_model_entry,
+)
+
+SAFETY_MODELS_ENTRIES = []
+
+# https://docs.nvidia.com/nim/large-language-models/latest/supported-llm-agnostic-architectures.html
+MODEL_ENTRIES = [
+    build_hf_repo_model_entry(
+        "meta/llama3-8b-instruct",
+        CoreModelId.llama3_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama3-70b-instruct",
+        CoreModelId.llama3_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.1-8b-instruct",
+        CoreModelId.llama3_1_8b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.1-70b-instruct",
+        CoreModelId.llama3_1_70b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.1-405b-instruct",
+        CoreModelId.llama3_1_405b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-1b-instruct",
+        CoreModelId.llama3_2_1b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-3b-instruct",
+        CoreModelId.llama3_2_3b_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-11b-vision-instruct",
+        CoreModelId.llama3_2_11b_vision_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.2-90b-vision-instruct",
+        CoreModelId.llama3_2_90b_vision_instruct.value,
+    ),
+    build_hf_repo_model_entry(
+        "meta/llama-3.3-70b-instruct",
+        CoreModelId.llama3_3_70b_instruct.value,
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/vila",
+        model_type=ModelType.llm,
+    ),
+    # NeMo Retriever Text Embedding models -
+    #
+    # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
+    #
+    # +-----------------------------------+--------+-----------+-----------+------------+
+    # | Model ID                          | Max    | Publisher | Embedding | Dynamic    |
+    # |                                   | Tokens |           | Dimension | Embeddings |
+    # +-----------------------------------+--------+-----------+-----------+------------+
+    # | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192   | NVIDIA    | 2048      | Yes        |
+    # | nvidia/nv-embedqa-e5-v5           |  512   | NVIDIA    | 1024      |  No        |
+    # | nvidia/nv-embedqa-mistral-7b-v2   |  512   | NVIDIA    | 4096      |  No        |
+    # | snowflake/arctic-embed-l          |  512   | Snowflake | 1024      |  No        |
+    # +-----------------------------------+--------+-----------+-----------+------------+
+    ProviderModelEntry(
+        provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 2048,
+            "context_length": 8192,
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/nv-embedqa-e5-v5",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 1024,
+            "context_length": 512,
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/nv-embedqa-mistral-7b-v2",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 4096,
+            "context_length": 512,
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="snowflake/arctic-embed-l",
+        model_type=ModelType.embedding,
+        metadata={
+            "embedding_dimension": 1024,
+            "context_length": 512,
+        },
+    ),
+    # NVIDIA Reranking models
+    ProviderModelEntry(
+        provider_model_id="nv-rerank-qa-mistral-4b:1",
+        model_type=ModelType.rerank,
+        metadata={
+            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/nv-rerankqa-mistral-4b-v3",
+        model_type=ModelType.rerank,
+        metadata={
+            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking",
+        },
+    ),
+    ProviderModelEntry(
+        provider_model_id="nvidia/llama-3.2-nv-rerankqa-1b-v2",
+        model_type=ModelType.rerank,
+        metadata={
+            "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking",
+        },
+    ),
+    # TODO(mf): how do we handle Nemotron models?
+    # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct",
+] + SAFETY_MODELS_ENTRIES
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py
index 2e6c3d769..b2fdec61f 100644
--- a/llama_stack/providers/remote/inference/nvidia/nvidia.py
+++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py
@@ -12,6 +12,12 @@ from llama_stack.apis.inference import (
     OpenAIEmbeddingData,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    RerankData,
+    RerankResponse,
+)
+from llama_stack.apis.inference.inference import (
+    OpenAIChatCompletionContentPartImageParam,
+    OpenAIChatCompletionContentPartTextParam,
 )
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@@ -80,6 +86,80 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference):
         """
         return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url
 
+    async def rerank(
+        self,
+        model: str,
+        query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
+        items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
+        max_num_results: int | None = None,
+    ) -> RerankResponse:
+        provider_model_id = await self._get_provider_model_id(model)
+
+        ranking_url = self.get_base_url()
+        model_obj = await self.model_store.get_model(model)
+
+        if _is_nvidia_hosted(self._config) and "endpoint" in model_obj.metadata:
+            ranking_url = model_obj.metadata["endpoint"]
+
+        logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}")
+
+        # Convert query to text format
+        if isinstance(query, str):
+            query_text = query
+        elif hasattr(query, "text"):
+            query_text = query.text
+        else:
+            raise ValueError("Query must be a string or text content part")
+
+        # Convert items to text format
+        passages = []
+        for item in items:
+            if isinstance(item, str):
+                passages.append({"text": item})
+            elif hasattr(item, "text"):
+                passages.append({"text": item.text})
+            else:
+                raise ValueError("Items must be strings or text content parts")
+
+        payload = {
+            "model": provider_model_id,
+            "query": {"text": query_text},
+            "passages": passages,
+        }
+
+        headers = {
+            "Authorization": f"Bearer {self.get_api_key()}",
+            "Content-Type": "application/json",
+        }
+
+        import aiohttp
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(ranking_url, headers=headers, json=payload) as response:
+                    if response.status != 200:
+                        response_text = await response.text()
+                        raise ConnectionError(
+                            f"NVIDIA rerank API request failed with status {response.status}: {response_text}"
+                        )
+
+                    result = await response.json()
+                    rankings = result.get("rankings", [])
+
+                    # Convert to RerankData format
+                    rerank_data = []
+                    for ranking in rankings:
+                        rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"]))
+
+                    # Apply max_num_results limit if specified
+                    if max_num_results is not None:
+                        rerank_data = rerank_data[:max_num_results]
+
+                    return RerankResponse(data=rerank_data)
+
+        except aiohttp.ClientError as e:
+            raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e
+
     async def openai_embeddings(
         self,
         model: str,