From bab9d7aaea9ab3fd7d68cabed6e22345f1d7f739 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Wed, 3 Sep 2025 17:34:05 -0700 Subject: [PATCH 01/18] Add rerank API for NVIDIA Inference Provider --- docs/docs/providers/inference/index.mdx | 2 + docs/static/llama-stack-spec.html | 4992 +++++++++++++++++ docs/static/llama-stack-spec.yaml | 3724 ++++++++++++ example.py | 257 + llama_stack/apis/inference/inference.py | 2 +- llama_stack/apis/models/models.py | 2 + llama_stack/core/routers/inference.py | 24 + .../remote/inference/nvidia/models.py | 131 + .../remote/inference/nvidia/nvidia.py | 80 + 9 files changed, 9213 insertions(+), 1 deletion(-) create mode 100644 example.py create mode 100644 llama_stack/providers/remote/inference/nvidia/models.py diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index ebbaf1be1..e96169cad 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -4,6 +4,7 @@ description: "Llama Stack Inference API for generating completions, chat complet This API provides the raw interface to the underlying models. Two kinds of models are supported: - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search." + - Rerank models: these models rerank the documents by relevance." sidebar_label: Inference title: Inference --- @@ -17,5 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe This API provides the raw interface to the underlying models. Two kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models rerank the documents by relevance. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 96e97035f..b260f01a7 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -4819,6 +4819,2834 @@ "title": "OpenAIUserMessageParam", "description": "A message from the user in an OpenAI-compatible chat completion request." }, + "OpenAICompletionWithInputMessages": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The ID of the chat completion" + }, + "choices": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChoice" + }, + "description": "List of choices" + }, + "object": { + "type": "string", + "const": "chat.completion", + "default": "chat.completion", + "description": "The object type, which will be \"chat.completion\"" + }, + "created": { + "type": "integer", + "description": "The Unix timestamp in seconds when the chat completion was created" + }, + "model": { + "type": "string", + "description": "The model that was used to generate the chat completion" + }, + "input_messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIMessageParam" + } + } + }, + "additionalProperties": false, + "required": [ + "id", + "choices", + "object", + "created", + "model", + "input_messages" + ], + "title": "OpenAICompletionWithInputMessages" + }, + "DataSource": { + "oneOf": [ + { + "$ref": "#/components/schemas/URIDataSource" + }, + { + "$ref": "#/components/schemas/RowsDataSource" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "uri": "#/components/schemas/URIDataSource", + "rows": "#/components/schemas/RowsDataSource" + } + } + }, + "Dataset": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "dataset", + "default": "dataset", + "description": "Type of resource, always 'dataset' for datasets" + }, + "purpose": { + "type": "string", + "enum": [ + "post-training/messages", + "eval/question-answer", + "eval/messages-answer" + ], + "description": "Purpose of the dataset indicating its intended use" + }, + "source": { + "$ref": "#/components/schemas/DataSource", + "description": "Data source configuration for the dataset" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Additional metadata for the dataset" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "purpose", + "source", + "metadata" + ], + "title": "Dataset", + "description": "Dataset resource for storing and accessing training or evaluation data." + }, + "RowsDataSource": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "rows", + "default": "rows" + }, + "rows": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]" + } + }, + "additionalProperties": false, + "required": [ + "type", + "rows" + ], + "title": "RowsDataSource", + "description": "A dataset stored in rows." + }, + "URIDataSource": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "uri", + "default": "uri" + }, + "uri": { + "type": "string", + "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\"" + } + }, + "additionalProperties": false, + "required": [ + "type", + "uri" + ], + "title": "URIDataSource", + "description": "A dataset that can be obtained from a URI." + }, + "Model": { + "type": "object", + "properties": { + "identifier": { + "type": "string", + "description": "Unique identifier for this resource in llama stack" + }, + "provider_resource_id": { + "type": "string", + "description": "Unique identifier for this resource in the provider" + }, + "provider_id": { + "type": "string", + "description": "ID of the provider that owns this resource" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "model", + "default": "model", + "description": "The resource type, always 'model' for model resources" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Any additional metadata for this model" + }, + "model_type": { + "$ref": "#/components/schemas/ModelType", + "default": "llm", + "description": "The type of model (LLM or embedding model)" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "metadata", + "model_type" + ], + "title": "Model", + "description": "A model resource representing an AI model registered in Llama Stack." + }, + "ModelType": { + "type": "string", + "enum": [ + "llm", + "embedding", + "rerank" + ], + "title": "ModelType", + "description": "Enumeration of supported model types in Llama Stack." + }, + "AgentTurnInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "agent_turn_input", + "default": "agent_turn_input", + "description": "Discriminator type. Always \"agent_turn_input\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "AgentTurnInputType", + "description": "Parameter type for agent turn input." + }, + "ArrayType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "array", + "default": "array", + "description": "Discriminator type. Always \"array\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ArrayType", + "description": "Parameter type for array values." + }, + "BooleanType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "boolean", + "default": "boolean", + "description": "Discriminator type. Always \"boolean\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "BooleanType", + "description": "Parameter type for boolean values." + }, + "ChatCompletionInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "chat_completion_input", + "default": "chat_completion_input", + "description": "Discriminator type. Always \"chat_completion_input\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ChatCompletionInputType", + "description": "Parameter type for chat completion input." + }, + "CompletionInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "completion_input", + "default": "completion_input", + "description": "Discriminator type. Always \"completion_input\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "CompletionInputType", + "description": "Parameter type for completion input." + }, + "JsonType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "json", + "default": "json", + "description": "Discriminator type. Always \"json\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "JsonType", + "description": "Parameter type for JSON values." + }, + "NumberType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "number", + "default": "number", + "description": "Discriminator type. Always \"number\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "NumberType", + "description": "Parameter type for numeric values." + }, + "ObjectType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "object", + "default": "object", + "description": "Discriminator type. Always \"object\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ObjectType", + "description": "Parameter type for object values." + }, + "ParamType": { + "oneOf": [ + { + "$ref": "#/components/schemas/StringType" + }, + { + "$ref": "#/components/schemas/NumberType" + }, + { + "$ref": "#/components/schemas/BooleanType" + }, + { + "$ref": "#/components/schemas/ArrayType" + }, + { + "$ref": "#/components/schemas/ObjectType" + }, + { + "$ref": "#/components/schemas/JsonType" + }, + { + "$ref": "#/components/schemas/UnionType" + }, + { + "$ref": "#/components/schemas/ChatCompletionInputType" + }, + { + "$ref": "#/components/schemas/CompletionInputType" + }, + { + "$ref": "#/components/schemas/AgentTurnInputType" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "string": "#/components/schemas/StringType", + "number": "#/components/schemas/NumberType", + "boolean": "#/components/schemas/BooleanType", + "array": "#/components/schemas/ArrayType", + "object": "#/components/schemas/ObjectType", + "json": "#/components/schemas/JsonType", + "union": "#/components/schemas/UnionType", + "chat_completion_input": "#/components/schemas/ChatCompletionInputType", + "completion_input": "#/components/schemas/CompletionInputType", + "agent_turn_input": "#/components/schemas/AgentTurnInputType" + } + } + }, + "ScoringFn": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "scoring_function", + "default": "scoring_function", + "description": "The resource type, always scoring_function" + }, + "description": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "return_type": { + "$ref": "#/components/schemas/ParamType" + }, + "params": { + "$ref": "#/components/schemas/ScoringFnParams" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "metadata", + "return_type" + ], + "title": "ScoringFn", + "description": "A scoring function resource for evaluating model outputs." + }, + "StringType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "string", + "default": "string", + "description": "Discriminator type. Always \"string\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "StringType", + "description": "Parameter type for string values." + }, + "UnionType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "union", + "default": "union", + "description": "Discriminator type. Always \"union\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "UnionType", + "description": "Parameter type for union values." + }, + "Shield": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "shield", + "default": "shield", + "description": "The resource type, always shield" + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Configuration parameters for the shield" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type" + ], + "title": "Shield", + "description": "A safety shield resource that can be used to check content." + }, + "Span": { + "type": "object", + "properties": { + "span_id": { + "type": "string", + "description": "Unique identifier for the span" + }, + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this span belongs to" + }, + "parent_span_id": { + "type": "string", + "description": "(Optional) Unique identifier for the parent span, if this is a child span" + }, + "name": { + "type": "string", + "description": "Human-readable name describing the operation this span represents" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the operation began" + }, + "end_time": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the operation finished, if completed" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the span" + } + }, + "additionalProperties": false, + "required": [ + "span_id", + "trace_id", + "name", + "start_time" + ], + "title": "Span", + "description": "A span representing a single operation within a trace." + }, + "GetSpanTreeRequest": { + "type": "object", + "properties": { + "attributes_to_return": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The attributes to return in the tree." + }, + "max_depth": { + "type": "integer", + "description": "The maximum depth of the tree." + } + }, + "additionalProperties": false, + "title": "GetSpanTreeRequest" + }, + "SpanStatus": { + "type": "string", + "enum": [ + "ok", + "error" + ], + "title": "SpanStatus", + "description": "The status of a span indicating whether it completed successfully or with an error." + }, + "SpanWithStatus": { + "type": "object", + "properties": { + "span_id": { + "type": "string", + "description": "Unique identifier for the span" + }, + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this span belongs to" + }, + "parent_span_id": { + "type": "string", + "description": "(Optional) Unique identifier for the parent span, if this is a child span" + }, + "name": { + "type": "string", + "description": "Human-readable name describing the operation this span represents" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the operation began" + }, + "end_time": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the operation finished, if completed" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the span" + }, + "status": { + "$ref": "#/components/schemas/SpanStatus", + "description": "(Optional) The current status of the span" + } + }, + "additionalProperties": false, + "required": [ + "span_id", + "trace_id", + "name", + "start_time" + ], + "title": "SpanWithStatus", + "description": "A span that includes status information." + }, + "QuerySpanTreeResponse": { + "type": "object", + "properties": { + "data": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/SpanWithStatus" + }, + "description": "Dictionary mapping span IDs to spans with status information" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "QuerySpanTreeResponse", + "description": "Response containing a tree structure of spans." + }, + "Tool": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "tool", + "default": "tool", + "description": "Type of resource, always 'tool'" + }, + "toolgroup_id": { + "type": "string", + "description": "ID of the tool group this tool belongs to" + }, + "description": { + "type": "string", + "description": "Human-readable description of what the tool does" + }, + "parameters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolParameter" + }, + "description": "List of parameters this tool accepts" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Additional metadata about the tool" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "toolgroup_id", + "description", + "parameters" + ], + "title": "Tool", + "description": "A tool that can be invoked by agents." + }, + "ToolGroup": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "tool_group", + "default": "tool_group", + "description": "Type of resource, always 'tool_group'" + }, + "mcp_endpoint": { + "$ref": "#/components/schemas/URL", + "description": "(Optional) Model Context Protocol endpoint for remote tools" + }, + "args": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Additional arguments for the tool group" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type" + ], + "title": "ToolGroup", + "description": "A group of related tools managed together." + }, + "Trace": { + "type": "object", + "properties": { + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace" + }, + "root_span_id": { + "type": "string", + "description": "Unique identifier for the root span that started this trace" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the trace began" + }, + "end_time": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the trace finished, if completed" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "root_span_id", + "start_time" + ], + "title": "Trace", + "description": "A trace representing the complete execution path of a request across multiple operations." + }, + "Checkpoint": { + "type": "object", + "properties": { + "identifier": { + "type": "string", + "description": "Unique identifier for the checkpoint" + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the checkpoint was created" + }, + "epoch": { + "type": "integer", + "description": "Training epoch when the checkpoint was saved" + }, + "post_training_job_id": { + "type": "string", + "description": "Identifier of the training job that created this checkpoint" + }, + "path": { + "type": "string", + "description": "File system path where the checkpoint is stored" + }, + "training_metrics": { + "$ref": "#/components/schemas/PostTrainingMetric", + "description": "(Optional) Training metrics associated with this checkpoint" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "created_at", + "epoch", + "post_training_job_id", + "path" + ], + "title": "Checkpoint", + "description": "Checkpoint created during training runs." + }, + "PostTrainingJobArtifactsResponse": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string", + "description": "Unique identifier for the training job" + }, + "checkpoints": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Checkpoint" + }, + "description": "List of model checkpoints created during training" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "checkpoints" + ], + "title": "PostTrainingJobArtifactsResponse", + "description": "Artifacts of a finetuning job." + }, + "PostTrainingMetric": { + "type": "object", + "properties": { + "epoch": { + "type": "integer", + "description": "Training epoch number" + }, + "train_loss": { + "type": "number", + "description": "Loss value on the training dataset" + }, + "validation_loss": { + "type": "number", + "description": "Loss value on the validation dataset" + }, + "perplexity": { + "type": "number", + "description": "Perplexity metric indicating model confidence" + } + }, + "additionalProperties": false, + "required": [ + "epoch", + "train_loss", + "validation_loss", + "perplexity" + ], + "title": "PostTrainingMetric", + "description": "Training metrics captured during post-training jobs." + }, + "PostTrainingJobStatusResponse": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string", + "description": "Unique identifier for the training job" + }, + "status": { + "type": "string", + "enum": [ + "completed", + "in_progress", + "failed", + "scheduled", + "cancelled" + ], + "description": "Current status of the training job" + }, + "scheduled_at": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the job was scheduled" + }, + "started_at": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the job execution began" + }, + "completed_at": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the job finished, if completed" + }, + "resources_allocated": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Information about computational resources allocated to the job" + }, + "checkpoints": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Checkpoint" + }, + "description": "List of model checkpoints created during training" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "status", + "checkpoints" + ], + "title": "PostTrainingJobStatusResponse", + "description": "Status of a finetuning job." + }, + "ListPostTrainingJobsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid" + ], + "title": "PostTrainingJob" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListPostTrainingJobsResponse" + }, + "VectorDB": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "vector_db", + "default": "vector_db", + "description": "Type of resource, always 'vector_db' for vector databases" + }, + "embedding_model": { + "type": "string", + "description": "Name of the embedding model to use for vector generation" + }, + "embedding_dimension": { + "type": "integer", + "description": "Dimension of the embedding vectors" + }, + "vector_db_name": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "embedding_model", + "embedding_dimension" + ], + "title": "VectorDB", + "description": "Vector database resource for storing and querying vector embeddings." + }, + "HealthInfo": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "OK", + "Error", + "Not Implemented" + ], + "description": "Current health status of the service" + } + }, + "additionalProperties": false, + "required": [ + "status" + ], + "title": "HealthInfo", + "description": "Health status information for the service." + }, + "RAGDocument": { + "type": "object", + "properties": { + "document_id": { + "type": "string", + "description": "The unique identifier for the document." + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/InterleavedContentItem" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/InterleavedContentItem" + } + }, + { + "$ref": "#/components/schemas/URL" + } + ], + "description": "The content of the document." + }, + "mime_type": { + "type": "string", + "description": "The MIME type of the document." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Additional metadata for the document." + } + }, + "additionalProperties": false, + "required": [ + "document_id", + "content", + "metadata" + ], + "title": "RAGDocument", + "description": "A document to be used for document ingestion in the RAG Tool." + }, + "InsertRequest": { + "type": "object", + "properties": { + "documents": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RAGDocument" + }, + "description": "List of documents to index in the RAG system" + }, + "vector_db_id": { + "type": "string", + "description": "ID of the vector database to store the document embeddings" + }, + "chunk_size_in_tokens": { + "type": "integer", + "description": "(Optional) Size in tokens for document chunking during indexing" + } + }, + "additionalProperties": false, + "required": [ + "documents", + "vector_db_id", + "chunk_size_in_tokens" + ], + "title": "InsertRequest" + }, + "Chunk": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The content of the chunk, which can be interleaved text, images, or other types." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Metadata associated with the chunk that will be used in the model context during inference." + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Optional embedding for the chunk. If not provided, it will be computed later." + }, + "stored_chunk_id": { + "type": "string", + "description": "The chunk ID that is stored in the vector database. Used for backend functionality." + }, + "chunk_metadata": { + "$ref": "#/components/schemas/ChunkMetadata", + "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ], + "title": "Chunk", + "description": "A chunk of content that can be inserted into a vector database." + }, + "ChunkMetadata": { + "type": "object", + "properties": { + "chunk_id": { + "type": "string", + "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content." + }, + "document_id": { + "type": "string", + "description": "The ID of the document this chunk belongs to." + }, + "source": { + "type": "string", + "description": "The source of the content, such as a URL, file path, or other identifier." + }, + "created_timestamp": { + "type": "integer", + "description": "An optional timestamp indicating when the chunk was created." + }, + "updated_timestamp": { + "type": "integer", + "description": "An optional timestamp indicating when the chunk was last updated." + }, + "chunk_window": { + "type": "string", + "description": "The window of the chunk, which can be used to group related chunks together." + }, + "chunk_tokenizer": { + "type": "string", + "description": "The tokenizer used to create the chunk. Default is Tiktoken." + }, + "chunk_embedding_model": { + "type": "string", + "description": "The embedding model used to create the chunk's embedding." + }, + "chunk_embedding_dimension": { + "type": "integer", + "description": "The dimension of the embedding vector for the chunk." + }, + "content_token_count": { + "type": "integer", + "description": "The number of tokens in the content of the chunk." + }, + "metadata_token_count": { + "type": "integer", + "description": "The number of tokens in the metadata of the chunk." + } + }, + "additionalProperties": false, + "title": "ChunkMetadata", + "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. Use `Chunk.metadata` for metadata that will be used in the context during inference." + }, + "InsertChunksRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string", + "description": "The identifier of the vector database to insert the chunks into." + }, + "chunks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Chunk" + }, + "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later." + }, + "ttl_seconds": { + "type": "integer", + "description": "The time to live of the chunks." + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "chunks" + ], + "title": "InsertChunksRequest" + }, + "ProviderInfo": { + "type": "object", + "properties": { + "api": { + "type": "string", + "description": "The API name this provider implements" + }, + "provider_id": { + "type": "string", + "description": "Unique identifier for the provider" + }, + "provider_type": { + "type": "string", + "description": "The type of provider implementation" + }, + "config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Configuration parameters for the provider" + }, + "health": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Current health status of the provider" + } + }, + "additionalProperties": false, + "required": [ + "api", + "provider_id", + "provider_type", + "config", + "health" + ], + "title": "ProviderInfo", + "description": "Information about a registered provider including its configuration and health status." + }, + "InvokeToolRequest": { + "type": "object", + "properties": { + "tool_name": { + "type": "string", + "description": "The name of the tool to invoke." + }, + "kwargs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "A dictionary of arguments to pass to the tool." + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "kwargs" + ], + "title": "InvokeToolRequest" + }, + "ToolInvocationResult": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "(Optional) The output content from the tool execution" + }, + "error_message": { + "type": "string", + "description": "(Optional) Error message if the tool execution failed" + }, + "error_code": { + "type": "integer", + "description": "(Optional) Numeric error code if the tool execution failed" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Additional metadata about the tool execution" + } + }, + "additionalProperties": false, + "title": "ToolInvocationResult", + "description": "Result of a tool invocation." + }, + "PaginatedResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "The list of items for the current page" + }, + "has_more": { + "type": "boolean", + "description": "Whether there are more items available after this set" + }, + "url": { + "type": "string", + "description": "The URL for accessing this list" + } + }, + "additionalProperties": false, + "required": [ + "data", + "has_more" + ], + "title": "PaginatedResponse", + "description": "A generic paginated response that follows a simple format." + }, + "Job": { + "type": "object", + "properties": { + "job_id": { + "type": "string", + "description": "Unique identifier for the job" + }, + "status": { + "type": "string", + "enum": [ + "completed", + "in_progress", + "failed", + "scheduled", + "cancelled" + ], + "description": "Current execution status of the job" + } + }, + "additionalProperties": false, + "required": [ + "job_id", + "status" + ], + "title": "Job", + "description": "A job execution instance with status tracking." + }, + "ListBenchmarksResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Benchmark" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListBenchmarksResponse" + }, + "Order": { + "type": "string", + "enum": [ + "asc", + "desc" + ], + "title": "Order", + "description": "Sort order for paginated responses." + }, + "ListOpenAIChatCompletionResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The ID of the chat completion" + }, + "choices": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChoice" + }, + "description": "List of choices" + }, + "object": { + "type": "string", + "const": "chat.completion", + "default": "chat.completion", + "description": "The object type, which will be \"chat.completion\"" + }, + "created": { + "type": "integer", + "description": "The Unix timestamp in seconds when the chat completion was created" + }, + "model": { + "type": "string", + "description": "The model that was used to generate the chat completion" + }, + "input_messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIMessageParam" + } + } + }, + "additionalProperties": false, + "required": [ + "id", + "choices", + "object", + "created", + "model", + "input_messages" + ], + "title": "OpenAICompletionWithInputMessages" + }, + "description": "List of chat completion objects with their input messages" + }, + "has_more": { + "type": "boolean", + "description": "Whether there are more completions available beyond this list" + }, + "first_id": { + "type": "string", + "description": "ID of the first completion in this list" + }, + "last_id": { + "type": "string", + "description": "ID of the last completion in this list" + }, + "object": { + "type": "string", + "const": "list", + "default": "list", + "description": "Must be \"list\" to identify this as a list response" + } + }, + "additionalProperties": false, + "required": [ + "data", + "has_more", + "first_id", + "last_id", + "object" + ], + "title": "ListOpenAIChatCompletionResponse", + "description": "Response from listing OpenAI-compatible chat completions." + }, + "ListDatasetsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Dataset" + }, + "description": "List of datasets" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListDatasetsResponse", + "description": "Response from listing datasets." + }, + "ListModelsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Model" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListModelsResponse" + }, + "ListOpenAIResponseInputItem": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + }, + "description": "List of input items" + }, + "object": { + "type": "string", + "const": "list", + "default": "list", + "description": "Object type identifier, always \"list\"" + } + }, + "additionalProperties": false, + "required": [ + "data", + "object" + ], + "title": "ListOpenAIResponseInputItem", + "description": "List container for OpenAI response input items." + }, + "ListOpenAIResponseObject": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseObjectWithInput" + }, + "description": "List of response objects with their input context" + }, + "has_more": { + "type": "boolean", + "description": "Whether there are more results available beyond this page" + }, + "first_id": { + "type": "string", + "description": "Identifier of the first item in this page" + }, + "last_id": { + "type": "string", + "description": "Identifier of the last item in this page" + }, + "object": { + "type": "string", + "const": "list", + "default": "list", + "description": "Object type identifier, always \"list\"" + } + }, + "additionalProperties": false, + "required": [ + "data", + "has_more", + "first_id", + "last_id", + "object" + ], + "title": "ListOpenAIResponseObject", + "description": "Paginated list of OpenAI response objects with navigation metadata." + }, + "OpenAIResponseObjectWithInput": { + "type": "object", + "properties": { + "created_at": { + "type": "integer", + "description": "Unix timestamp when the response was created" + }, + "error": { + "$ref": "#/components/schemas/OpenAIResponseError", + "description": "(Optional) Error details if the response generation failed" + }, + "id": { + "type": "string", + "description": "Unique identifier for this response" + }, + "model": { + "type": "string", + "description": "Model identifier used for generation" + }, + "object": { + "type": "string", + "const": "response", + "default": "response", + "description": "Object type identifier, always \"response\"" + }, + "output": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseOutput" + }, + "description": "List of generated output items (messages, tool calls, etc.)" + }, + "parallel_tool_calls": { + "type": "boolean", + "default": false, + "description": "Whether tool calls can be executed in parallel" + }, + "previous_response_id": { + "type": "string", + "description": "(Optional) ID of the previous response in a conversation" + }, + "status": { + "type": "string", + "description": "Current status of the response generation" + }, + "temperature": { + "type": "number", + "description": "(Optional) Sampling temperature used for generation" + }, + "text": { + "$ref": "#/components/schemas/OpenAIResponseText", + "description": "Text formatting configuration for the response" + }, + "top_p": { + "type": "number", + "description": "(Optional) Nucleus sampling parameter used for generation" + }, + "truncation": { + "type": "string", + "description": "(Optional) Truncation strategy applied to the response" + }, + "user": { + "type": "string", + "description": "(Optional) User identifier associated with the request" + }, + "input": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + }, + "description": "List of input items that led to this response" + } + }, + "additionalProperties": false, + "required": [ + "created_at", + "id", + "model", + "object", + "output", + "parallel_tool_calls", + "status", + "text", + "input" + ], + "title": "OpenAIResponseObjectWithInput", + "description": "OpenAI response object extended with input context information." + }, + "ListPromptsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Prompt" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListPromptsResponse", + "description": "Response model to list prompts." + }, + "ListProvidersResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ProviderInfo" + }, + "description": "List of provider information objects" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListProvidersResponse", + "description": "Response containing a list of all available providers." + }, + "RouteInfo": { + "type": "object", + "properties": { + "route": { + "type": "string", + "description": "The API endpoint path" + }, + "method": { + "type": "string", + "description": "HTTP method for the route" + }, + "provider_types": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of provider types that implement this route" + } + }, + "additionalProperties": false, + "required": [ + "route", + "method", + "provider_types" + ], + "title": "RouteInfo", + "description": "Information about an API route including its path, method, and implementing providers." + }, + "ListRoutesResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RouteInfo" + }, + "description": "List of available route information objects" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListRoutesResponse", + "description": "Response containing a list of all available API routes." + }, + "ListToolDefsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDef" + }, + "description": "List of tool definitions" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListToolDefsResponse", + "description": "Response containing a list of tool definitions." + }, + "ListScoringFunctionsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ScoringFn" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListScoringFunctionsResponse" + }, + "ListShieldsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Shield" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListShieldsResponse" + }, + "ListToolGroupsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolGroup" + }, + "description": "List of tool groups" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListToolGroupsResponse", + "description": "Response containing a list of tool groups." + }, + "ListToolsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tool" + }, + "description": "List of tools" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListToolsResponse", + "description": "Response containing a list of tools." + }, + "ListVectorDBsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/VectorDB" + }, + "description": "List of vector databases" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListVectorDBsResponse", + "description": "Response from listing vector databases." + }, + "Event": { + "oneOf": [ + { + "$ref": "#/components/schemas/UnstructuredLogEvent" + }, + { + "$ref": "#/components/schemas/MetricEvent" + }, + { + "$ref": "#/components/schemas/StructuredLogEvent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "unstructured_log": "#/components/schemas/UnstructuredLogEvent", + "metric": "#/components/schemas/MetricEvent", + "structured_log": "#/components/schemas/StructuredLogEvent" + } + } + }, + "EventType": { + "type": "string", + "enum": [ + "unstructured_log", + "structured_log", + "metric" + ], + "title": "EventType", + "description": "The type of telemetry event being logged." + }, + "LogSeverity": { + "type": "string", + "enum": [ + "verbose", + "debug", + "info", + "warn", + "error", + "critical" + ], + "title": "LogSeverity", + "description": "The severity level of a log message." + }, + "MetricEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this event belongs to" + }, + "span_id": { + "type": "string", + "description": "Unique identifier for the span this event belongs to" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the event occurred" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the event" + }, + "type": { + "$ref": "#/components/schemas/EventType", + "const": "metric", + "default": "metric", + "description": "Event type identifier set to METRIC" + }, + "metric": { + "type": "string", + "description": "The name of the metric being measured" + }, + "value": { + "oneOf": [ + { + "type": "integer" + }, + { + "type": "number" + } + ], + "description": "The numeric value of the metric measurement" + }, + "unit": { + "type": "string", + "description": "The unit of measurement for the metric value" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "metric", + "value", + "unit" + ], + "title": "MetricEvent", + "description": "A metric event containing a measured value." + }, + "SpanEndPayload": { + "type": "object", + "properties": { + "type": { + "$ref": "#/components/schemas/StructuredLogType", + "const": "span_end", + "default": "span_end", + "description": "Payload type identifier set to SPAN_END" + }, + "status": { + "$ref": "#/components/schemas/SpanStatus", + "description": "The final status of the span indicating success or failure" + } + }, + "additionalProperties": false, + "required": [ + "type", + "status" + ], + "title": "SpanEndPayload", + "description": "Payload for a span end event." + }, + "SpanStartPayload": { + "type": "object", + "properties": { + "type": { + "$ref": "#/components/schemas/StructuredLogType", + "const": "span_start", + "default": "span_start", + "description": "Payload type identifier set to SPAN_START" + }, + "name": { + "type": "string", + "description": "Human-readable name describing the operation this span represents" + }, + "parent_span_id": { + "type": "string", + "description": "(Optional) Unique identifier for the parent span, if this is a child span" + } + }, + "additionalProperties": false, + "required": [ + "type", + "name" + ], + "title": "SpanStartPayload", + "description": "Payload for a span start event." + }, + "StructuredLogEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this event belongs to" + }, + "span_id": { + "type": "string", + "description": "Unique identifier for the span this event belongs to" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the event occurred" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the event" + }, + "type": { + "$ref": "#/components/schemas/EventType", + "const": "structured_log", + "default": "structured_log", + "description": "Event type identifier set to STRUCTURED_LOG" + }, + "payload": { + "$ref": "#/components/schemas/StructuredLogPayload", + "description": "The structured payload data for the log event" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "payload" + ], + "title": "StructuredLogEvent", + "description": "A structured log event containing typed payload data." + }, + "StructuredLogPayload": { + "oneOf": [ + { + "$ref": "#/components/schemas/SpanStartPayload" + }, + { + "$ref": "#/components/schemas/SpanEndPayload" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "span_start": "#/components/schemas/SpanStartPayload", + "span_end": "#/components/schemas/SpanEndPayload" + } + } + }, + "StructuredLogType": { + "type": "string", + "enum": [ + "span_start", + "span_end" + ], + "title": "StructuredLogType", + "description": "The type of structured log event payload." + }, + "UnstructuredLogEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this event belongs to" + }, + "span_id": { + "type": "string", + "description": "Unique identifier for the span this event belongs to" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the event occurred" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the event" + }, + "type": { + "$ref": "#/components/schemas/EventType", + "const": "unstructured_log", + "default": "unstructured_log", + "description": "Event type identifier set to UNSTRUCTURED_LOG" + }, + "message": { + "type": "string", + "description": "The log message text" + }, + "severity": { + "$ref": "#/components/schemas/LogSeverity", + "description": "The severity level of the log message" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "message", + "severity" + ], + "title": "UnstructuredLogEvent", + "description": "An unstructured log event containing a simple text message." + }, + "LogEventRequest": { + "type": "object", + "properties": { + "event": { + "$ref": "#/components/schemas/Event", + "description": "The event to log." + }, + "ttl_seconds": { + "type": "integer", + "description": "The time to live of the event." + } + }, + "additionalProperties": false, + "required": [ + "event", + "ttl_seconds" + ], + "title": "LogEventRequest" + }, + "VectorStoreChunkingStrategy": { + "oneOf": [ + { + "$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto" + }, + { + "$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "auto": "#/components/schemas/VectorStoreChunkingStrategyAuto", + "static": "#/components/schemas/VectorStoreChunkingStrategyStatic" + } + } + }, + "VectorStoreChunkingStrategyAuto": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "auto", + "default": "auto", + "description": "Strategy type, always \"auto\" for automatic chunking" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "VectorStoreChunkingStrategyAuto", + "description": "Automatic chunking strategy for vector store files." + }, + "VectorStoreChunkingStrategyStatic": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "static", + "default": "static", + "description": "Strategy type, always \"static\" for static chunking" + }, + "static": { + "$ref": "#/components/schemas/VectorStoreChunkingStrategyStaticConfig", + "description": "Configuration parameters for the static chunking strategy" + } + }, + "additionalProperties": false, + "required": [ + "type", + "static" + ], + "title": "VectorStoreChunkingStrategyStatic", + "description": "Static chunking strategy with configurable parameters." + }, + "VectorStoreChunkingStrategyStaticConfig": { + "type": "object", + "properties": { + "chunk_overlap_tokens": { + "type": "integer", + "default": 400, + "description": "Number of tokens to overlap between adjacent chunks" + }, + "max_chunk_size_tokens": { + "type": "integer", + "default": 800, + "description": "Maximum number of tokens per chunk, must be between 100 and 4096" + } + }, + "additionalProperties": false, + "required": [ + "chunk_overlap_tokens", + "max_chunk_size_tokens" + ], + "title": "VectorStoreChunkingStrategyStaticConfig", + "description": "Configuration for static chunking strategy." + }, + "OpenaiAttachFileToVectorStoreRequest": { + "type": "object", + "properties": { + "file_id": { + "type": "string", + "description": "The ID of the file to attach to the vector store." + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The key-value attributes stored with the file, which can be used for filtering." + }, + "chunking_strategy": { + "$ref": "#/components/schemas/VectorStoreChunkingStrategy", + "description": "The chunking strategy to use for the file." + } + }, + "additionalProperties": false, + "required": [ + "file_id" + ], + "title": "OpenaiAttachFileToVectorStoreRequest" + }, + "VectorStoreFileLastError": { + "type": "object", + "properties": { + "code": { + "oneOf": [ + { + "type": "string", + "const": "server_error" + }, + { + "type": "string", + "const": "rate_limit_exceeded" + } + ], + "description": "Error code indicating the type of failure" + }, + "message": { + "type": "string", + "description": "Human-readable error message describing the failure" + } + }, + "additionalProperties": false, + "required": [ + "code", + "message" + ], + "title": "VectorStoreFileLastError", + "description": "Error information for failed vector store file processing." + }, + "VectorStoreFileObject": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the file" + }, + "object": { + "type": "string", + "default": "vector_store.file", + "description": "Object type identifier, always \"vector_store.file\"" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Key-value attributes associated with the file" + }, + "chunking_strategy": { + "$ref": "#/components/schemas/VectorStoreChunkingStrategy", + "description": "Strategy used for splitting the file into chunks" + }, + "created_at": { + "type": "integer", + "description": "Timestamp when the file was added to the vector store" + }, + "last_error": { + "$ref": "#/components/schemas/VectorStoreFileLastError", + "description": "(Optional) Error information if file processing failed" + }, + "status": { + "$ref": "#/components/schemas/VectorStoreFileStatus", + "description": "Current processing status of the file" + }, + "usage_bytes": { + "type": "integer", + "default": 0, + "description": "Storage space used by this file in bytes" + }, + "vector_store_id": { + "type": "string", + "description": "ID of the vector store containing this file" + } + }, + "additionalProperties": false, + "required": [ + "id", + "object", + "attributes", + "chunking_strategy", + "created_at", + "status", + "usage_bytes", + "vector_store_id" + ], + "title": "VectorStoreFileObject", + "description": "OpenAI Vector Store File object." + }, + "VectorStoreFileStatus": { + "oneOf": [ + { + "type": "string", + "const": "completed" + }, + { + "type": "string", + "const": "in_progress" + }, + { + "type": "string", + "const": "cancelled" + }, + { + "type": "string", + "const": "failed" + } + ] + }, "OpenAIJSONSchema": { "type": "object", "properties": { @@ -12782,6 +15610,2170 @@ "title": "VectorStoreSearchResponsePage", "description": "Paginated response from searching a vector store." }, +<<<<<<< HEAD +======= + "OpenaiUpdateVectorStoreRequest": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the vector store." + }, + "expires_after": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The expiration policy for a vector store." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Set of 16 key-value pairs that can be attached to an object." + } + }, + "additionalProperties": false, + "title": "OpenaiUpdateVectorStoreRequest" + }, + "OpenaiUpdateVectorStoreFileRequest": { + "type": "object", + "properties": { + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The updated key-value attributes to store with the file." + } + }, + "additionalProperties": false, + "required": [ + "attributes" + ], + "title": "OpenaiUpdateVectorStoreFileRequest" + }, + "DPOAlignmentConfig": { + "type": "object", + "properties": { + "beta": { + "type": "number", + "description": "Temperature parameter for the DPO loss" + }, + "loss_type": { + "$ref": "#/components/schemas/DPOLossType", + "default": "sigmoid", + "description": "The type of loss function to use for DPO" + } + }, + "additionalProperties": false, + "required": [ + "beta", + "loss_type" + ], + "title": "DPOAlignmentConfig", + "description": "Configuration for Direct Preference Optimization (DPO) alignment." + }, + "DPOLossType": { + "type": "string", + "enum": [ + "sigmoid", + "hinge", + "ipo", + "kto_pair" + ], + "title": "DPOLossType" + }, + "DataConfig": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "description": "Unique identifier for the training dataset" + }, + "batch_size": { + "type": "integer", + "description": "Number of samples per training batch" + }, + "shuffle": { + "type": "boolean", + "description": "Whether to shuffle the dataset during training" + }, + "data_format": { + "$ref": "#/components/schemas/DatasetFormat", + "description": "Format of the dataset (instruct or dialog)" + }, + "validation_dataset_id": { + "type": "string", + "description": "(Optional) Unique identifier for the validation dataset" + }, + "packed": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency" + }, + "train_on_input": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to compute loss on input tokens as well as output tokens" + } + }, + "additionalProperties": false, + "required": [ + "dataset_id", + "batch_size", + "shuffle", + "data_format" + ], + "title": "DataConfig", + "description": "Configuration for training data and data loading." + }, + "DatasetFormat": { + "type": "string", + "enum": [ + "instruct", + "dialog" + ], + "title": "DatasetFormat", + "description": "Format of the training dataset." + }, + "EfficiencyConfig": { + "type": "object", + "properties": { + "enable_activation_checkpointing": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to use activation checkpointing to reduce memory usage" + }, + "enable_activation_offloading": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to offload activations to CPU to save GPU memory" + }, + "memory_efficient_fsdp_wrap": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to use memory-efficient FSDP wrapping" + }, + "fsdp_cpu_offload": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to offload FSDP parameters to CPU" + } + }, + "additionalProperties": false, + "title": "EfficiencyConfig", + "description": "Configuration for memory and compute efficiency optimizations." + }, + "OptimizerConfig": { + "type": "object", + "properties": { + "optimizer_type": { + "$ref": "#/components/schemas/OptimizerType", + "description": "Type of optimizer to use (adam, adamw, or sgd)" + }, + "lr": { + "type": "number", + "description": "Learning rate for the optimizer" + }, + "weight_decay": { + "type": "number", + "description": "Weight decay coefficient for regularization" + }, + "num_warmup_steps": { + "type": "integer", + "description": "Number of steps for learning rate warmup" + } + }, + "additionalProperties": false, + "required": [ + "optimizer_type", + "lr", + "weight_decay", + "num_warmup_steps" + ], + "title": "OptimizerConfig", + "description": "Configuration parameters for the optimization algorithm." + }, + "OptimizerType": { + "type": "string", + "enum": [ + "adam", + "adamw", + "sgd" + ], + "title": "OptimizerType", + "description": "Available optimizer algorithms for training." + }, + "TrainingConfig": { + "type": "object", + "properties": { + "n_epochs": { + "type": "integer", + "description": "Number of training epochs to run" + }, + "max_steps_per_epoch": { + "type": "integer", + "default": 1, + "description": "Maximum number of steps to run per epoch" + }, + "gradient_accumulation_steps": { + "type": "integer", + "default": 1, + "description": "Number of steps to accumulate gradients before updating" + }, + "max_validation_steps": { + "type": "integer", + "default": 1, + "description": "(Optional) Maximum number of validation steps per epoch" + }, + "data_config": { + "$ref": "#/components/schemas/DataConfig", + "description": "(Optional) Configuration for data loading and formatting" + }, + "optimizer_config": { + "$ref": "#/components/schemas/OptimizerConfig", + "description": "(Optional) Configuration for the optimization algorithm" + }, + "efficiency_config": { + "$ref": "#/components/schemas/EfficiencyConfig", + "description": "(Optional) Configuration for memory and compute optimizations" + }, + "dtype": { + "type": "string", + "default": "bf16", + "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)" + } + }, + "additionalProperties": false, + "required": [ + "n_epochs", + "max_steps_per_epoch", + "gradient_accumulation_steps" + ], + "title": "TrainingConfig", + "description": "Comprehensive configuration for the training process." + }, + "PreferenceOptimizeRequest": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string", + "description": "The UUID of the job to create." + }, + "finetuned_model": { + "type": "string", + "description": "The model to fine-tune." + }, + "algorithm_config": { + "$ref": "#/components/schemas/DPOAlignmentConfig", + "description": "The algorithm configuration." + }, + "training_config": { + "$ref": "#/components/schemas/TrainingConfig", + "description": "The training configuration." + }, + "hyperparam_search_config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The hyperparam search configuration." + }, + "logger_config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The logger configuration." + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "finetuned_model", + "algorithm_config", + "training_config", + "hyperparam_search_config", + "logger_config" + ], + "title": "PreferenceOptimizeRequest" + }, + "PostTrainingJob": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid" + ], + "title": "PostTrainingJob" + }, + "DefaultRAGQueryGeneratorConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "default", + "default": "default", + "description": "Type of query generator, always 'default'" + }, + "separator": { + "type": "string", + "default": " ", + "description": "String separator used to join query terms" + } + }, + "additionalProperties": false, + "required": [ + "type", + "separator" + ], + "title": "DefaultRAGQueryGeneratorConfig", + "description": "Configuration for the default RAG query generator." + }, + "LLMRAGQueryGeneratorConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "llm", + "default": "llm", + "description": "Type of query generator, always 'llm'" + }, + "model": { + "type": "string", + "description": "Name of the language model to use for query generation" + }, + "template": { + "type": "string", + "description": "Template string for formatting the query generation prompt" + } + }, + "additionalProperties": false, + "required": [ + "type", + "model", + "template" + ], + "title": "LLMRAGQueryGeneratorConfig", + "description": "Configuration for the LLM-based RAG query generator." + }, + "RAGQueryConfig": { + "type": "object", + "properties": { + "query_generator_config": { + "$ref": "#/components/schemas/RAGQueryGeneratorConfig", + "description": "Configuration for the query generator." + }, + "max_tokens_in_context": { + "type": "integer", + "default": 4096, + "description": "Maximum number of tokens in the context." + }, + "max_chunks": { + "type": "integer", + "default": 5, + "description": "Maximum number of chunks to retrieve." + }, + "chunk_template": { + "type": "string", + "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", + "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\"" + }, + "mode": { + "$ref": "#/components/schemas/RAGSearchMode", + "default": "vector", + "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"." + }, + "ranker": { + "$ref": "#/components/schemas/Ranker", + "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker." + } + }, + "additionalProperties": false, + "required": [ + "query_generator_config", + "max_tokens_in_context", + "max_chunks", + "chunk_template" + ], + "title": "RAGQueryConfig", + "description": "Configuration for the RAG query generation." + }, + "RAGQueryGeneratorConfig": { + "oneOf": [ + { + "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig" + }, + { + "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig", + "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig" + } + } + }, + "RAGSearchMode": { + "type": "string", + "enum": [ + "vector", + "keyword", + "hybrid" + ], + "title": "RAGSearchMode", + "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results" + }, + "RRFRanker": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "rrf", + "default": "rrf", + "description": "The type of ranker, always \"rrf\"" + }, + "impact_factor": { + "type": "number", + "default": 60.0, + "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0" + } + }, + "additionalProperties": false, + "required": [ + "type", + "impact_factor" + ], + "title": "RRFRanker", + "description": "Reciprocal Rank Fusion (RRF) ranker configuration." + }, + "Ranker": { + "oneOf": [ + { + "$ref": "#/components/schemas/RRFRanker" + }, + { + "$ref": "#/components/schemas/WeightedRanker" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "rrf": "#/components/schemas/RRFRanker", + "weighted": "#/components/schemas/WeightedRanker" + } + } + }, + "WeightedRanker": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "weighted", + "default": "weighted", + "description": "The type of ranker, always \"weighted\"" + }, + "alpha": { + "type": "number", + "default": 0.5, + "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores." + } + }, + "additionalProperties": false, + "required": [ + "type", + "alpha" + ], + "title": "WeightedRanker", + "description": "Weighted ranker configuration that combines vector and keyword scores." + }, + "QueryRequest": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The query content to search for in the indexed documents" + }, + "vector_db_ids": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of vector database IDs to search within" + }, + "query_config": { + "$ref": "#/components/schemas/RAGQueryConfig", + "description": "(Optional) Configuration parameters for the query operation" + } + }, + "additionalProperties": false, + "required": [ + "content", + "vector_db_ids" + ], + "title": "QueryRequest" + }, + "RAGQueryResult": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "(Optional) The retrieved content from the query" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Additional metadata about the query result" + } + }, + "additionalProperties": false, + "required": [ + "metadata" + ], + "title": "RAGQueryResult", + "description": "Result of a RAG query containing retrieved content and metadata." + }, + "QueryChunksRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string", + "description": "The identifier of the vector database to query." + }, + "query": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The query to search for." + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The parameters of the query." + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "query" + ], + "title": "QueryChunksRequest" + }, + "QueryChunksResponse": { + "type": "object", + "properties": { + "chunks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Chunk" + }, + "description": "List of content chunks returned from the query" + }, + "scores": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Relevance scores corresponding to each returned chunk" + } + }, + "additionalProperties": false, + "required": [ + "chunks", + "scores" + ], + "title": "QueryChunksResponse", + "description": "Response from querying chunks in a vector database." + }, + "QueryMetricsRequest": { + "type": "object", + "properties": { + "start_time": { + "type": "integer", + "description": "The start time of the metric to query." + }, + "end_time": { + "type": "integer", + "description": "The end time of the metric to query." + }, + "granularity": { + "type": "string", + "description": "The granularity of the metric to query." + }, + "query_type": { + "type": "string", + "enum": [ + "range", + "instant" + ], + "description": "The type of query to perform." + }, + "label_matchers": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the label to match" + }, + "value": { + "type": "string", + "description": "The value to match against" + }, + "operator": { + "type": "string", + "enum": [ + "=", + "!=", + "=~", + "!~" + ], + "description": "The comparison operator to use for matching", + "default": "=" + } + }, + "additionalProperties": false, + "required": [ + "name", + "value", + "operator" + ], + "title": "MetricLabelMatcher", + "description": "A matcher for filtering metrics by label values." + }, + "description": "The label matchers to apply to the metric." + } + }, + "additionalProperties": false, + "required": [ + "start_time", + "query_type" + ], + "title": "QueryMetricsRequest" + }, + "MetricDataPoint": { + "type": "object", + "properties": { + "timestamp": { + "type": "integer", + "description": "Unix timestamp when the metric value was recorded" + }, + "value": { + "type": "number", + "description": "The numeric value of the metric at this timestamp" + }, + "unit": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "timestamp", + "value", + "unit" + ], + "title": "MetricDataPoint", + "description": "A single data point in a metric time series." + }, + "MetricLabel": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the label" + }, + "value": { + "type": "string", + "description": "The value of the label" + } + }, + "additionalProperties": false, + "required": [ + "name", + "value" + ], + "title": "MetricLabel", + "description": "A label associated with a metric." + }, + "MetricSeries": { + "type": "object", + "properties": { + "metric": { + "type": "string", + "description": "The name of the metric" + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricLabel" + }, + "description": "List of labels associated with this metric series" + }, + "values": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricDataPoint" + }, + "description": "List of data points in chronological order" + } + }, + "additionalProperties": false, + "required": [ + "metric", + "labels", + "values" + ], + "title": "MetricSeries", + "description": "A time series of metric data points." + }, + "QueryMetricsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricSeries" + }, + "description": "List of metric series matching the query criteria" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "QueryMetricsResponse", + "description": "Response containing metric time series data." + }, + "QueryCondition": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "The attribute key to filter on" + }, + "op": { + "$ref": "#/components/schemas/QueryConditionOp", + "description": "The comparison operator to apply" + }, + "value": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ], + "description": "The value to compare against" + } + }, + "additionalProperties": false, + "required": [ + "key", + "op", + "value" + ], + "title": "QueryCondition", + "description": "A condition for filtering query results." + }, + "QueryConditionOp": { + "type": "string", + "enum": [ + "eq", + "ne", + "gt", + "lt" + ], + "title": "QueryConditionOp", + "description": "Comparison operators for query conditions." + }, + "QuerySpansRequest": { + "type": "object", + "properties": { + "attribute_filters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/QueryCondition" + }, + "description": "The attribute filters to apply to the spans." + }, + "attributes_to_return": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The attributes to return in the spans." + }, + "max_depth": { + "type": "integer", + "description": "The maximum depth of the tree." + } + }, + "additionalProperties": false, + "required": [ + "attribute_filters", + "attributes_to_return" + ], + "title": "QuerySpansRequest" + }, + "QuerySpansResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Span" + }, + "description": "List of spans matching the query criteria" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "QuerySpansResponse", + "description": "Response containing a list of spans." + }, + "QueryTracesRequest": { + "type": "object", + "properties": { + "attribute_filters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/QueryCondition" + }, + "description": "The attribute filters to apply to the traces." + }, + "limit": { + "type": "integer", + "description": "The limit of traces to return." + }, + "offset": { + "type": "integer", + "description": "The offset of the traces to return." + }, + "order_by": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The order by of the traces to return." + } + }, + "additionalProperties": false, + "title": "QueryTracesRequest" + }, + "QueryTracesResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Trace" + }, + "description": "List of traces matching the query criteria" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "QueryTracesResponse", + "description": "Response containing a list of traces." + }, + "RegisterBenchmarkRequest": { + "type": "object", + "properties": { + "benchmark_id": { + "type": "string", + "description": "The ID of the benchmark to register." + }, + "dataset_id": { + "type": "string", + "description": "The ID of the dataset to use for the benchmark." + }, + "scoring_functions": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The scoring functions to use for the benchmark." + }, + "provider_benchmark_id": { + "type": "string", + "description": "The ID of the provider benchmark to use for the benchmark." + }, + "provider_id": { + "type": "string", + "description": "The ID of the provider to use for the benchmark." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The metadata to use for the benchmark." + } + }, + "additionalProperties": false, + "required": [ + "benchmark_id", + "dataset_id", + "scoring_functions" + ], + "title": "RegisterBenchmarkRequest" + }, + "RegisterDatasetRequest": { + "type": "object", + "properties": { + "purpose": { + "type": "string", + "enum": [ + "post-training/messages", + "eval/question-answer", + "eval/messages-answer" + ], + "description": "The purpose of the dataset. One of: - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }" + }, + "source": { + "$ref": "#/components/schemas/DataSource", + "description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}." + }, + "dataset_id": { + "type": "string", + "description": "The ID of the dataset. If not provided, an ID will be generated." + } + }, + "additionalProperties": false, + "required": [ + "purpose", + "source" + ], + "title": "RegisterDatasetRequest" + }, + "RegisterModelRequest": { + "type": "object", + "properties": { + "model_id": { + "type": "string", + "description": "The identifier of the model to register." + }, + "provider_model_id": { + "type": "string", + "description": "The identifier of the model in the provider." + }, + "provider_id": { + "type": "string", + "description": "The identifier of the provider." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Any additional metadata for this model." + }, + "model_type": { + "$ref": "#/components/schemas/ModelType", + "description": "The type of model to register." + } + }, + "additionalProperties": false, + "required": [ + "model_id" + ], + "title": "RegisterModelRequest" + }, + "RegisterScoringFunctionRequest": { + "type": "object", + "properties": { + "scoring_fn_id": { + "type": "string", + "description": "The ID of the scoring function to register." + }, + "description": { + "type": "string", + "description": "The description of the scoring function." + }, + "return_type": { + "$ref": "#/components/schemas/ParamType", + "description": "The return type of the scoring function." + }, + "provider_scoring_fn_id": { + "type": "string", + "description": "The ID of the provider scoring function to use for the scoring function." + }, + "provider_id": { + "type": "string", + "description": "The ID of the provider to use for the scoring function." + }, + "params": { + "$ref": "#/components/schemas/ScoringFnParams", + "description": "The parameters for the scoring function for benchmark eval, these can be overridden for app eval." + } + }, + "additionalProperties": false, + "required": [ + "scoring_fn_id", + "description", + "return_type" + ], + "title": "RegisterScoringFunctionRequest" + }, + "RegisterShieldRequest": { + "type": "object", + "properties": { + "shield_id": { + "type": "string", + "description": "The identifier of the shield to register." + }, + "provider_shield_id": { + "type": "string", + "description": "The identifier of the shield in the provider." + }, + "provider_id": { + "type": "string", + "description": "The identifier of the provider." + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The parameters of the shield." + } + }, + "additionalProperties": false, + "required": [ + "shield_id" + ], + "title": "RegisterShieldRequest" + }, + "RegisterToolGroupRequest": { + "type": "object", + "properties": { + "toolgroup_id": { + "type": "string", + "description": "The ID of the tool group to register." + }, + "provider_id": { + "type": "string", + "description": "The ID of the provider to use for the tool group." + }, + "mcp_endpoint": { + "$ref": "#/components/schemas/URL", + "description": "The MCP endpoint to use for the tool group." + }, + "args": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "A dictionary of arguments to pass to the tool group." + } + }, + "additionalProperties": false, + "required": [ + "toolgroup_id", + "provider_id" + ], + "title": "RegisterToolGroupRequest" + }, + "RegisterVectorDbRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string", + "description": "The identifier of the vector database to register." + }, + "embedding_model": { + "type": "string", + "description": "The embedding model to use." + }, + "embedding_dimension": { + "type": "integer", + "description": "The dimension of the embedding model." + }, + "provider_id": { + "type": "string", + "description": "The identifier of the provider." + }, + "vector_db_name": { + "type": "string", + "description": "The name of the vector database." + }, + "provider_vector_db_id": { + "type": "string", + "description": "The identifier of the vector database in the provider." + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "embedding_model" + ], + "title": "RegisterVectorDbRequest" + }, + "RerankRequest": { + "type": "object", + "properties": { + "model": { + "type": "string", + "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint." + }, + "query": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" + } + ], + "description": "The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length." + }, + "items": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" + } + ] + }, + "description": "List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length." + }, + "max_num_results": { + "type": "integer", + "description": "(Optional) Maximum number of results to return. Default: returns all." + } + }, + "additionalProperties": false, + "required": [ + "model", + "query", + "items" + ], + "title": "RerankRequest" + }, + "RerankData": { + "type": "object", + "properties": { + "index": { + "type": "integer", + "description": "The original index of the document in the input list" + }, + "relevance_score": { + "type": "number", + "description": "The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance." + } + }, + "additionalProperties": false, + "required": [ + "index", + "relevance_score" + ], + "title": "RerankData", + "description": "A single rerank result from a reranking response." + }, + "RerankResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RerankData" + }, + "description": "List of rerank result objects, sorted by relevance score (descending)" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "RerankResponse", + "description": "Response from a reranking request." + }, + "ResumeAgentTurnRequest": { + "type": "object", + "properties": { + "tool_responses": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolResponse" + }, + "description": "The tool call responses to resume the turn with." + }, + "stream": { + "type": "boolean", + "description": "Whether to stream the response." + } + }, + "additionalProperties": false, + "required": [ + "tool_responses" + ], + "title": "ResumeAgentTurnRequest" + }, + "RunEvalRequest": { + "type": "object", + "properties": { + "benchmark_config": { + "$ref": "#/components/schemas/BenchmarkConfig", + "description": "The configuration for the benchmark." + } + }, + "additionalProperties": false, + "required": [ + "benchmark_config" + ], + "title": "RunEvalRequest" + }, + "RunModerationRequest": { + "type": "object", + "properties": { + "input": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models." + }, + "model": { + "type": "string", + "description": "The content moderation model you would like to use." + } + }, + "additionalProperties": false, + "required": [ + "input", + "model" + ], + "title": "RunModerationRequest" + }, + "ModerationObject": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The unique identifier for the moderation request." + }, + "model": { + "type": "string", + "description": "The model used to generate the moderation results." + }, + "results": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ModerationObjectResults" + }, + "description": "A list of moderation objects" + } + }, + "additionalProperties": false, + "required": [ + "id", + "model", + "results" + ], + "title": "ModerationObject", + "description": "A moderation object." + }, + "ModerationObjectResults": { + "type": "object", + "properties": { + "flagged": { + "type": "boolean", + "description": "Whether any of the below categories are flagged." + }, + "categories": { + "type": "object", + "additionalProperties": { + "type": "boolean" + }, + "description": "A list of the categories, and whether they are flagged or not." + }, + "category_applied_input_types": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "string" + } + }, + "description": "A list of the categories along with the input type(s) that the score applies to." + }, + "category_scores": { + "type": "object", + "additionalProperties": { + "type": "number" + }, + "description": "A list of the categories along with their scores as predicted by model." + }, + "user_message": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "flagged", + "metadata" + ], + "title": "ModerationObjectResults", + "description": "A moderation object." + }, + "RunShieldRequest": { + "type": "object", + "properties": { + "shield_id": { + "type": "string", + "description": "The identifier of the shield to run." + }, + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + }, + "description": "The messages to run the shield on." + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The parameters of the shield." + } + }, + "additionalProperties": false, + "required": [ + "shield_id", + "messages", + "params" + ], + "title": "RunShieldRequest" + }, + "RunShieldResponse": { + "type": "object", + "properties": { + "violation": { + "$ref": "#/components/schemas/SafetyViolation", + "description": "(Optional) Safety violation detected by the shield, if any" + } + }, + "additionalProperties": false, + "title": "RunShieldResponse", + "description": "Response from running a safety shield." + }, + "SaveSpansToDatasetRequest": { + "type": "object", + "properties": { + "attribute_filters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/QueryCondition" + }, + "description": "The attribute filters to apply to the spans." + }, + "attributes_to_save": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The attributes to save to the dataset." + }, + "dataset_id": { + "type": "string", + "description": "The ID of the dataset to save the spans to." + }, + "max_depth": { + "type": "integer", + "description": "The maximum depth of the tree." + } + }, + "additionalProperties": false, + "required": [ + "attribute_filters", + "attributes_to_save", + "dataset_id" + ], + "title": "SaveSpansToDatasetRequest" + }, + "ScoreRequest": { + "type": "object", + "properties": { + "input_rows": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "The rows to score." + }, + "scoring_functions": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "$ref": "#/components/schemas/ScoringFnParams" + }, + { + "type": "null" + } + ] + }, + "description": "The scoring functions to use for the scoring." + } + }, + "additionalProperties": false, + "required": [ + "input_rows", + "scoring_functions" + ], + "title": "ScoreRequest" + }, + "ScoreResponse": { + "type": "object", + "properties": { + "results": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ScoringResult" + }, + "description": "A map of scoring function name to ScoringResult." + } + }, + "additionalProperties": false, + "required": [ + "results" + ], + "title": "ScoreResponse", + "description": "The response from scoring." + }, + "ScoreBatchRequest": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "description": "The ID of the dataset to score." + }, + "scoring_functions": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "$ref": "#/components/schemas/ScoringFnParams" + }, + { + "type": "null" + } + ] + }, + "description": "The scoring functions to use for the scoring." + }, + "save_results_dataset": { + "type": "boolean", + "description": "Whether to save the results to a dataset." + } + }, + "additionalProperties": false, + "required": [ + "dataset_id", + "scoring_functions", + "save_results_dataset" + ], + "title": "ScoreBatchRequest" + }, + "ScoreBatchResponse": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "description": "(Optional) The identifier of the dataset that was scored" + }, + "results": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ScoringResult" + }, + "description": "A map of scoring function name to ScoringResult" + } + }, + "additionalProperties": false, + "required": [ + "results" + ], + "title": "ScoreBatchResponse", + "description": "Response from batch scoring operations on datasets." + }, + "SetDefaultVersionRequest": { + "type": "object", + "properties": { + "version": { + "type": "integer", + "description": "The version to set as default." + } + }, + "additionalProperties": false, + "required": [ + "version" + ], + "title": "SetDefaultVersionRequest" + }, + "AlgorithmConfig": { + "oneOf": [ + { + "$ref": "#/components/schemas/LoraFinetuningConfig" + }, + { + "$ref": "#/components/schemas/QATFinetuningConfig" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "LoRA": "#/components/schemas/LoraFinetuningConfig", + "QAT": "#/components/schemas/QATFinetuningConfig" + } + } + }, + "LoraFinetuningConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "LoRA", + "default": "LoRA", + "description": "Algorithm type identifier, always \"LoRA\"" + }, + "lora_attn_modules": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of attention module names to apply LoRA to" + }, + "apply_lora_to_mlp": { + "type": "boolean", + "description": "Whether to apply LoRA to MLP layers" + }, + "apply_lora_to_output": { + "type": "boolean", + "description": "Whether to apply LoRA to output projection layers" + }, + "rank": { + "type": "integer", + "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)" + }, + "alpha": { + "type": "integer", + "description": "LoRA scaling parameter that controls adaptation strength" + }, + "use_dora": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)" + }, + "quantize_base": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to quantize the base model weights" + } + }, + "additionalProperties": false, + "required": [ + "type", + "lora_attn_modules", + "apply_lora_to_mlp", + "apply_lora_to_output", + "rank", + "alpha" + ], + "title": "LoraFinetuningConfig", + "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning." + }, + "QATFinetuningConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "QAT", + "default": "QAT", + "description": "Algorithm type identifier, always \"QAT\"" + }, + "quantizer_name": { + "type": "string", + "description": "Name of the quantization algorithm to use" + }, + "group_size": { + "type": "integer", + "description": "Size of groups for grouped quantization" + } + }, + "additionalProperties": false, + "required": [ + "type", + "quantizer_name", + "group_size" + ], + "title": "QATFinetuningConfig", + "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning." + }, + "SupervisedFineTuneRequest": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string", + "description": "The UUID of the job to create." + }, + "training_config": { + "$ref": "#/components/schemas/TrainingConfig", + "description": "The training configuration." + }, + "hyperparam_search_config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The hyperparam search configuration." + }, + "logger_config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The logger configuration." + }, + "model": { + "type": "string", + "description": "The model to fine-tune." + }, + "checkpoint_dir": { + "type": "string", + "description": "The directory to save checkpoint(s) to." + }, + "algorithm_config": { + "$ref": "#/components/schemas/AlgorithmConfig", + "description": "The algorithm configuration." + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "training_config", + "hyperparam_search_config", + "logger_config" + ], + "title": "SupervisedFineTuneRequest" + }, + "SyntheticDataGenerateRequest": { + "type": "object", + "properties": { + "dialogs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + }, + "description": "List of conversation messages to use as input for synthetic data generation" + }, + "filtering_function": { + "type": "string", + "enum": [ + "none", + "random", + "top_k", + "top_p", + "top_k_top_p", + "sigmoid" + ], + "description": "Type of filtering to apply to generated synthetic data samples" + }, + "model": { + "type": "string", + "description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint" + } + }, + "additionalProperties": false, + "required": [ + "dialogs", + "filtering_function" + ], + "title": "SyntheticDataGenerateRequest" + }, + "SyntheticDataGenerationResponse": { + "type": "object", + "properties": { + "synthetic_data": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "List of generated synthetic data samples that passed the filtering criteria" + }, + "statistics": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Statistical information about the generation process and filtering results" + } + }, + "additionalProperties": false, + "required": [ + "synthetic_data" + ], + "title": "SyntheticDataGenerationResponse", + "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." + }, + "UpdatePromptRequest": { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "The updated prompt text content." + }, + "version": { + "type": "integer", + "description": "The current version of the prompt being updated." + }, + "variables": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Updated list of variable names that can be used in the prompt template." + }, + "set_as_default": { + "type": "boolean", + "description": "Set the new version as the default (default=True)." + } + }, + "additionalProperties": false, + "required": [ + "prompt", + "version", + "set_as_default" + ], + "title": "UpdatePromptRequest" + }, +>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) "VersionInfo": { "type": "object", "properties": { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index b9e03d614..ebe142557 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -3634,6 +3634,2130 @@ components: title: OpenAIUserMessageParam description: >- A message from the user in an OpenAI-compatible chat completion request. +<<<<<<< HEAD +======= + OpenAICompletionWithInputMessages: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + input_messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + additionalProperties: false + required: + - id + - choices + - object + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + DataSource: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + - $ref: '#/components/schemas/RowsDataSource' + discriminator: + propertyName: type + mapping: + uri: '#/components/schemas/URIDataSource' + rows: '#/components/schemas/RowsDataSource' + Dataset: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: dataset + default: dataset + description: >- + Type of resource, always 'dataset' for datasets + purpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + description: >- + Purpose of the dataset indicating its intended use + source: + $ref: '#/components/schemas/DataSource' + description: >- + Data source configuration for the dataset + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Additional metadata for the dataset + additionalProperties: false + required: + - identifier + - provider_id + - type + - purpose + - source + - metadata + title: Dataset + description: >- + Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: + type: object + properties: + type: + type: string + const: rows + default: rows + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", + "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, + world!"}]} ] + additionalProperties: false + required: + - type + - rows + title: RowsDataSource + description: A dataset stored in rows. + URIDataSource: + type: object + properties: + type: + type: string + const: uri + default: uri + uri: + type: string + description: >- + The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" + - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" + additionalProperties: false + required: + - type + - uri + title: URIDataSource + description: >- + A dataset that can be obtained from a URI. + Model: + type: object + properties: + identifier: + type: string + description: >- + Unique identifier for this resource in llama stack + provider_resource_id: + type: string + description: >- + Unique identifier for this resource in the provider + provider_id: + type: string + description: >- + ID of the provider that owns this resource + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: model + default: model + description: >- + The resource type, always 'model' for model resources + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Any additional metadata for this model + model_type: + $ref: '#/components/schemas/ModelType' + default: llm + description: >- + The type of model (LLM or embedding model) + additionalProperties: false + required: + - identifier + - provider_id + - type + - metadata + - model_type + title: Model + description: >- + A model resource representing an AI model registered in Llama Stack. + ModelType: + type: string + enum: + - llm + - embedding + - rerank + title: ModelType + description: >- + Enumeration of supported model types in Llama Stack. + AgentTurnInputType: + type: object + properties: + type: + type: string + const: agent_turn_input + default: agent_turn_input + description: >- + Discriminator type. Always "agent_turn_input" + additionalProperties: false + required: + - type + title: AgentTurnInputType + description: Parameter type for agent turn input. + ArrayType: + type: object + properties: + type: + type: string + const: array + default: array + description: Discriminator type. Always "array" + additionalProperties: false + required: + - type + title: ArrayType + description: Parameter type for array values. + BooleanType: + type: object + properties: + type: + type: string + const: boolean + default: boolean + description: Discriminator type. Always "boolean" + additionalProperties: false + required: + - type + title: BooleanType + description: Parameter type for boolean values. + ChatCompletionInputType: + type: object + properties: + type: + type: string + const: chat_completion_input + default: chat_completion_input + description: >- + Discriminator type. Always "chat_completion_input" + additionalProperties: false + required: + - type + title: ChatCompletionInputType + description: >- + Parameter type for chat completion input. + CompletionInputType: + type: object + properties: + type: + type: string + const: completion_input + default: completion_input + description: >- + Discriminator type. Always "completion_input" + additionalProperties: false + required: + - type + title: CompletionInputType + description: Parameter type for completion input. + JsonType: + type: object + properties: + type: + type: string + const: json + default: json + description: Discriminator type. Always "json" + additionalProperties: false + required: + - type + title: JsonType + description: Parameter type for JSON values. + NumberType: + type: object + properties: + type: + type: string + const: number + default: number + description: Discriminator type. Always "number" + additionalProperties: false + required: + - type + title: NumberType + description: Parameter type for numeric values. + ObjectType: + type: object + properties: + type: + type: string + const: object + default: object + description: Discriminator type. Always "object" + additionalProperties: false + required: + - type + title: ObjectType + description: Parameter type for object values. + ParamType: + oneOf: + - $ref: '#/components/schemas/StringType' + - $ref: '#/components/schemas/NumberType' + - $ref: '#/components/schemas/BooleanType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/ObjectType' + - $ref: '#/components/schemas/JsonType' + - $ref: '#/components/schemas/UnionType' + - $ref: '#/components/schemas/ChatCompletionInputType' + - $ref: '#/components/schemas/CompletionInputType' + - $ref: '#/components/schemas/AgentTurnInputType' + discriminator: + propertyName: type + mapping: + string: '#/components/schemas/StringType' + number: '#/components/schemas/NumberType' + boolean: '#/components/schemas/BooleanType' + array: '#/components/schemas/ArrayType' + object: '#/components/schemas/ObjectType' + json: '#/components/schemas/JsonType' + union: '#/components/schemas/UnionType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + agent_turn_input: '#/components/schemas/AgentTurnInputType' + ScoringFn: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: scoring_function + default: scoring_function + description: >- + The resource type, always scoring_function + description: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + return_type: + $ref: '#/components/schemas/ParamType' + params: + $ref: '#/components/schemas/ScoringFnParams' + additionalProperties: false + required: + - identifier + - provider_id + - type + - metadata + - return_type + title: ScoringFn + description: >- + A scoring function resource for evaluating model outputs. + StringType: + type: object + properties: + type: + type: string + const: string + default: string + description: Discriminator type. Always "string" + additionalProperties: false + required: + - type + title: StringType + description: Parameter type for string values. + UnionType: + type: object + properties: + type: + type: string + const: union + default: union + description: Discriminator type. Always "union" + additionalProperties: false + required: + - type + title: UnionType + description: Parameter type for union values. + Shield: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: shield + default: shield + description: The resource type, always shield + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Configuration parameters for the shield + additionalProperties: false + required: + - identifier + - provider_id + - type + title: Shield + description: >- + A safety shield resource that can be used to check content. + Span: + type: object + properties: + span_id: + type: string + description: Unique identifier for the span + trace_id: + type: string + description: >- + Unique identifier for the trace this span belongs to + parent_span_id: + type: string + description: >- + (Optional) Unique identifier for the parent span, if this is a child span + name: + type: string + description: >- + Human-readable name describing the operation this span represents + start_time: + type: string + format: date-time + description: Timestamp when the operation began + end_time: + type: string + format: date-time + description: >- + (Optional) Timestamp when the operation finished, if completed + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Key-value pairs containing additional metadata about the span + additionalProperties: false + required: + - span_id + - trace_id + - name + - start_time + title: Span + description: >- + A span representing a single operation within a trace. + GetSpanTreeRequest: + type: object + properties: + attributes_to_return: + type: array + items: + type: string + description: The attributes to return in the tree. + max_depth: + type: integer + description: The maximum depth of the tree. + additionalProperties: false + title: GetSpanTreeRequest + SpanStatus: + type: string + enum: + - ok + - error + title: SpanStatus + description: >- + The status of a span indicating whether it completed successfully or with + an error. + SpanWithStatus: + type: object + properties: + span_id: + type: string + description: Unique identifier for the span + trace_id: + type: string + description: >- + Unique identifier for the trace this span belongs to + parent_span_id: + type: string + description: >- + (Optional) Unique identifier for the parent span, if this is a child span + name: + type: string + description: >- + Human-readable name describing the operation this span represents + start_time: + type: string + format: date-time + description: Timestamp when the operation began + end_time: + type: string + format: date-time + description: >- + (Optional) Timestamp when the operation finished, if completed + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Key-value pairs containing additional metadata about the span + status: + $ref: '#/components/schemas/SpanStatus' + description: >- + (Optional) The current status of the span + additionalProperties: false + required: + - span_id + - trace_id + - name + - start_time + title: SpanWithStatus + description: A span that includes status information. + QuerySpanTreeResponse: + type: object + properties: + data: + type: object + additionalProperties: + $ref: '#/components/schemas/SpanWithStatus' + description: >- + Dictionary mapping span IDs to spans with status information + additionalProperties: false + required: + - data + title: QuerySpanTreeResponse + description: >- + Response containing a tree structure of spans. + Tool: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: tool + default: tool + description: Type of resource, always 'tool' + toolgroup_id: + type: string + description: >- + ID of the tool group this tool belongs to + description: + type: string + description: >- + Human-readable description of what the tool does + parameters: + type: array + items: + $ref: '#/components/schemas/ToolParameter' + description: List of parameters this tool accepts + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool + additionalProperties: false + required: + - identifier + - provider_id + - type + - toolgroup_id + - description + - parameters + title: Tool + description: A tool that can be invoked by agents. + ToolGroup: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: tool_group + default: tool_group + description: Type of resource, always 'tool_group' + mcp_endpoint: + $ref: '#/components/schemas/URL' + description: >- + (Optional) Model Context Protocol endpoint for remote tools + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional arguments for the tool group + additionalProperties: false + required: + - identifier + - provider_id + - type + title: ToolGroup + description: >- + A group of related tools managed together. + Trace: + type: object + properties: + trace_id: + type: string + description: Unique identifier for the trace + root_span_id: + type: string + description: >- + Unique identifier for the root span that started this trace + start_time: + type: string + format: date-time + description: Timestamp when the trace began + end_time: + type: string + format: date-time + description: >- + (Optional) Timestamp when the trace finished, if completed + additionalProperties: false + required: + - trace_id + - root_span_id + - start_time + title: Trace + description: >- + A trace representing the complete execution path of a request across multiple + operations. + Checkpoint: + type: object + properties: + identifier: + type: string + description: Unique identifier for the checkpoint + created_at: + type: string + format: date-time + description: >- + Timestamp when the checkpoint was created + epoch: + type: integer + description: >- + Training epoch when the checkpoint was saved + post_training_job_id: + type: string + description: >- + Identifier of the training job that created this checkpoint + path: + type: string + description: >- + File system path where the checkpoint is stored + training_metrics: + $ref: '#/components/schemas/PostTrainingMetric' + description: >- + (Optional) Training metrics associated with this checkpoint + additionalProperties: false + required: + - identifier + - created_at + - epoch + - post_training_job_id + - path + title: Checkpoint + description: Checkpoint created during training runs. + PostTrainingJobArtifactsResponse: + type: object + properties: + job_uuid: + type: string + description: Unique identifier for the training job + checkpoints: + type: array + items: + $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training + additionalProperties: false + required: + - job_uuid + - checkpoints + title: PostTrainingJobArtifactsResponse + description: Artifacts of a finetuning job. + PostTrainingMetric: + type: object + properties: + epoch: + type: integer + description: Training epoch number + train_loss: + type: number + description: Loss value on the training dataset + validation_loss: + type: number + description: Loss value on the validation dataset + perplexity: + type: number + description: >- + Perplexity metric indicating model confidence + additionalProperties: false + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: >- + Training metrics captured during post-training jobs. + PostTrainingJobStatusResponse: + type: object + properties: + job_uuid: + type: string + description: Unique identifier for the training job + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: Current status of the training job + scheduled_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job was scheduled + started_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job execution began + completed_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job finished, if completed + resources_allocated: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Information about computational resources allocated to the + job + checkpoints: + type: array + items: + $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training + additionalProperties: false + required: + - job_uuid + - status + - checkpoints + title: PostTrainingJobStatusResponse + description: Status of a finetuning job. + ListPostTrainingJobsResponse: + type: object + properties: + data: + type: array + items: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: PostTrainingJob + additionalProperties: false + required: + - data + title: ListPostTrainingJobsResponse + VectorDB: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: vector_db + default: vector_db + description: >- + Type of resource, always 'vector_db' for vector databases + embedding_model: + type: string + description: >- + Name of the embedding model to use for vector generation + embedding_dimension: + type: integer + description: Dimension of the embedding vectors + vector_db_name: + type: string + additionalProperties: false + required: + - identifier + - provider_id + - type + - embedding_model + - embedding_dimension + title: VectorDB + description: >- + Vector database resource for storing and querying vector embeddings. + HealthInfo: + type: object + properties: + status: + type: string + enum: + - OK + - Error + - Not Implemented + description: Current health status of the service + additionalProperties: false + required: + - status + title: HealthInfo + description: >- + Health status information for the service. + RAGDocument: + type: object + properties: + document_id: + type: string + description: The unique identifier for the document. + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + - $ref: '#/components/schemas/URL' + description: The content of the document. + mime_type: + type: string + description: The MIME type of the document. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Additional metadata for the document. + additionalProperties: false + required: + - document_id + - content + - metadata + title: RAGDocument + description: >- + A document to be used for document ingestion in the RAG Tool. + InsertRequest: + type: object + properties: + documents: + type: array + items: + $ref: '#/components/schemas/RAGDocument' + description: >- + List of documents to index in the RAG system + vector_db_id: + type: string + description: >- + ID of the vector database to store the document embeddings + chunk_size_in_tokens: + type: integer + description: >- + (Optional) Size in tokens for document chunking during indexing + additionalProperties: false + required: + - documents + - vector_db_id + - chunk_size_in_tokens + title: InsertRequest + Chunk: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the chunk, which can be interleaved text, images, or other + types. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Metadata associated with the chunk that will be used in the model context + during inference. + embedding: + type: array + items: + type: number + description: >- + Optional embedding for the chunk. If not provided, it will be computed + later. + stored_chunk_id: + type: string + description: >- + The chunk ID that is stored in the vector database. Used for backend functionality. + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: >- + Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. + additionalProperties: false + required: + - content + - metadata + title: Chunk + description: >- + A chunk of content that can be inserted into a vector database. + ChunkMetadata: + type: object + properties: + chunk_id: + type: string + description: >- + The ID of the chunk. If not set, it will be generated based on the document + ID and content. + document_id: + type: string + description: >- + The ID of the document this chunk belongs to. + source: + type: string + description: >- + The source of the content, such as a URL, file path, or other identifier. + created_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was created. + updated_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was last updated. + chunk_window: + type: string + description: >- + The window of the chunk, which can be used to group related chunks together. + chunk_tokenizer: + type: string + description: >- + The tokenizer used to create the chunk. Default is Tiktoken. + chunk_embedding_model: + type: string + description: >- + The embedding model used to create the chunk's embedding. + chunk_embedding_dimension: + type: integer + description: >- + The dimension of the embedding vector for the chunk. + content_token_count: + type: integer + description: >- + The number of tokens in the content of the chunk. + metadata_token_count: + type: integer + description: >- + The number of tokens in the metadata of the chunk. + additionalProperties: false + title: ChunkMetadata + description: >- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional + information about the chunk that will not be used in the context during + inference, but is required for backend functionality. The `ChunkMetadata` is + set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not + expected to change after. Use `Chunk.metadata` for metadata that will + be used in the context during inference. + InsertChunksRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to insert the chunks into. + chunks: + type: array + items: + $ref: '#/components/schemas/Chunk' + description: >- + The chunks to insert. Each `Chunk` should contain content which can be + interleaved text, images, or other types. `metadata`: `dict[str, Any]` + and `embedding`: `List[float]` are optional. If `metadata` is provided, + you configure how Llama Stack formats the chunk during generation. If + `embedding` is not provided, it will be computed later. + ttl_seconds: + type: integer + description: The time to live of the chunks. + additionalProperties: false + required: + - vector_db_id + - chunks + title: InsertChunksRequest + ProviderInfo: + type: object + properties: + api: + type: string + description: The API name this provider implements + provider_id: + type: string + description: Unique identifier for the provider + provider_type: + type: string + description: The type of provider implementation + config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Configuration parameters for the provider + health: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Current health status of the provider + additionalProperties: false + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: >- + Information about a registered provider including its configuration and health + status. + InvokeToolRequest: + type: object + properties: + tool_name: + type: string + description: The name of the tool to invoke. + kwargs: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + A dictionary of arguments to pass to the tool. + additionalProperties: false + required: + - tool_name + - kwargs + title: InvokeToolRequest + ToolInvocationResult: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) The output content from the tool execution + error_message: + type: string + description: >- + (Optional) Error message if the tool execution failed + error_code: + type: integer + description: >- + (Optional) Numeric error code if the tool execution failed + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool execution + additionalProperties: false + title: ToolInvocationResult + description: Result of a tool invocation. + PaginatedResponse: + type: object + properties: + data: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The list of items for the current page + has_more: + type: boolean + description: >- + Whether there are more items available after this set + url: + type: string + description: The URL for accessing this list + additionalProperties: false + required: + - data + - has_more + title: PaginatedResponse + description: >- + A generic paginated response that follows a simple format. + Job: + type: object + properties: + job_id: + type: string + description: Unique identifier for the job + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: Current execution status of the job + additionalProperties: false + required: + - job_id + - status + title: Job + description: >- + A job execution instance with status tracking. + ListBenchmarksResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Benchmark' + additionalProperties: false + required: + - data + title: ListBenchmarksResponse + Order: + type: string + enum: + - asc + - desc + title: Order + description: Sort order for paginated responses. + ListOpenAIChatCompletionResponse: + type: object + properties: + data: + type: array + items: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + input_messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + additionalProperties: false + required: + - id + - choices + - object + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + description: >- + List of chat completion objects with their input messages + has_more: + type: boolean + description: >- + Whether there are more completions available beyond this list + first_id: + type: string + description: ID of the first completion in this list + last_id: + type: string + description: ID of the last completion in this list + object: + type: string + const: list + default: list + description: >- + Must be "list" to identify this as a list response + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIChatCompletionResponse + description: >- + Response from listing OpenAI-compatible chat completions. + ListDatasetsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Dataset' + description: List of datasets + additionalProperties: false + required: + - data + title: ListDatasetsResponse + description: Response from listing datasets. + ListModelsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Model' + additionalProperties: false + required: + - data + title: ListModelsResponse + ListOpenAIResponseInputItem: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: List of input items + object: + type: string + const: list + default: list + description: Object type identifier, always "list" + additionalProperties: false + required: + - data + - object + title: ListOpenAIResponseInputItem + description: >- + List container for OpenAI response input items. + ListOpenAIResponseObject: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseObjectWithInput' + description: >- + List of response objects with their input context + has_more: + type: boolean + description: >- + Whether there are more results available beyond this page + first_id: + type: string + description: >- + Identifier of the first item in this page + last_id: + type: string + description: Identifier of the last item in this page + object: + type: string + const: list + default: list + description: Object type identifier, always "list" + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIResponseObject + description: >- + Paginated list of OpenAI response objects with navigation metadata. + OpenAIResponseObjectWithInput: + type: object + properties: + created_at: + type: integer + description: >- + Unix timestamp when the response was created + error: + $ref: '#/components/schemas/OpenAIResponseError' + description: >- + (Optional) Error details if the response generation failed + id: + type: string + description: Unique identifier for this response + model: + type: string + description: Model identifier used for generation + object: + type: string + const: response + default: response + description: >- + Object type identifier, always "response" + output: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutput' + description: >- + List of generated output items (messages, tool calls, etc.) + parallel_tool_calls: + type: boolean + default: false + description: >- + Whether tool calls can be executed in parallel + previous_response_id: + type: string + description: >- + (Optional) ID of the previous response in a conversation + status: + type: string + description: >- + Current status of the response generation + temperature: + type: number + description: >- + (Optional) Sampling temperature used for generation + text: + $ref: '#/components/schemas/OpenAIResponseText' + description: >- + Text formatting configuration for the response + top_p: + type: number + description: >- + (Optional) Nucleus sampling parameter used for generation + truncation: + type: string + description: >- + (Optional) Truncation strategy applied to the response + user: + type: string + description: >- + (Optional) User identifier associated with the request + input: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + List of input items that led to this response + additionalProperties: false + required: + - created_at + - id + - model + - object + - output + - parallel_tool_calls + - status + - text + - input + title: OpenAIResponseObjectWithInput + description: >- + OpenAI response object extended with input context information. + ListPromptsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Prompt' + additionalProperties: false + required: + - data + title: ListPromptsResponse + description: Response model to list prompts. + ListProvidersResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ProviderInfo' + description: List of provider information objects + additionalProperties: false + required: + - data + title: ListProvidersResponse + description: >- + Response containing a list of all available providers. + RouteInfo: + type: object + properties: + route: + type: string + description: The API endpoint path + method: + type: string + description: HTTP method for the route + provider_types: + type: array + items: + type: string + description: >- + List of provider types that implement this route + additionalProperties: false + required: + - route + - method + - provider_types + title: RouteInfo + description: >- + Information about an API route including its path, method, and implementing + providers. + ListRoutesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/RouteInfo' + description: >- + List of available route information objects + additionalProperties: false + required: + - data + title: ListRoutesResponse + description: >- + Response containing a list of all available API routes. + ListToolDefsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ToolDef' + description: List of tool definitions + additionalProperties: false + required: + - data + title: ListToolDefsResponse + description: >- + Response containing a list of tool definitions. + ListScoringFunctionsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ScoringFn' + additionalProperties: false + required: + - data + title: ListScoringFunctionsResponse + ListShieldsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Shield' + additionalProperties: false + required: + - data + title: ListShieldsResponse + ListToolGroupsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ToolGroup' + description: List of tool groups + additionalProperties: false + required: + - data + title: ListToolGroupsResponse + description: >- + Response containing a list of tool groups. + ListToolsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Tool' + description: List of tools + additionalProperties: false + required: + - data + title: ListToolsResponse + description: Response containing a list of tools. + ListVectorDBsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/VectorDB' + description: List of vector databases + additionalProperties: false + required: + - data + title: ListVectorDBsResponse + description: Response from listing vector databases. + Event: + oneOf: + - $ref: '#/components/schemas/UnstructuredLogEvent' + - $ref: '#/components/schemas/MetricEvent' + - $ref: '#/components/schemas/StructuredLogEvent' + discriminator: + propertyName: type + mapping: + unstructured_log: '#/components/schemas/UnstructuredLogEvent' + metric: '#/components/schemas/MetricEvent' + structured_log: '#/components/schemas/StructuredLogEvent' + EventType: + type: string + enum: + - unstructured_log + - structured_log + - metric + title: EventType + description: >- + The type of telemetry event being logged. + LogSeverity: + type: string + enum: + - verbose + - debug + - info + - warn + - error + - critical + title: LogSeverity + description: The severity level of a log message. + MetricEvent: + type: object + properties: + trace_id: + type: string + description: >- + Unique identifier for the trace this event belongs to + span_id: + type: string + description: >- + Unique identifier for the span this event belongs to + timestamp: + type: string + format: date-time + description: Timestamp when the event occurred + attributes: + type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + description: >- + (Optional) Key-value pairs containing additional metadata about the event + type: + $ref: '#/components/schemas/EventType' + const: metric + default: metric + description: Event type identifier set to METRIC + metric: + type: string + description: The name of the metric being measured + value: + oneOf: + - type: integer + - type: number + description: >- + The numeric value of the metric measurement + unit: + type: string + description: >- + The unit of measurement for the metric value + additionalProperties: false + required: + - trace_id + - span_id + - timestamp + - type + - metric + - value + - unit + title: MetricEvent + description: >- + A metric event containing a measured value. + SpanEndPayload: + type: object + properties: + type: + $ref: '#/components/schemas/StructuredLogType' + const: span_end + default: span_end + description: Payload type identifier set to SPAN_END + status: + $ref: '#/components/schemas/SpanStatus' + description: >- + The final status of the span indicating success or failure + additionalProperties: false + required: + - type + - status + title: SpanEndPayload + description: Payload for a span end event. + SpanStartPayload: + type: object + properties: + type: + $ref: '#/components/schemas/StructuredLogType' + const: span_start + default: span_start + description: >- + Payload type identifier set to SPAN_START + name: + type: string + description: >- + Human-readable name describing the operation this span represents + parent_span_id: + type: string + description: >- + (Optional) Unique identifier for the parent span, if this is a child span + additionalProperties: false + required: + - type + - name + title: SpanStartPayload + description: Payload for a span start event. + StructuredLogEvent: + type: object + properties: + trace_id: + type: string + description: >- + Unique identifier for the trace this event belongs to + span_id: + type: string + description: >- + Unique identifier for the span this event belongs to + timestamp: + type: string + format: date-time + description: Timestamp when the event occurred + attributes: + type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + description: >- + (Optional) Key-value pairs containing additional metadata about the event + type: + $ref: '#/components/schemas/EventType' + const: structured_log + default: structured_log + description: >- + Event type identifier set to STRUCTURED_LOG + payload: + $ref: '#/components/schemas/StructuredLogPayload' + description: >- + The structured payload data for the log event + additionalProperties: false + required: + - trace_id + - span_id + - timestamp + - type + - payload + title: StructuredLogEvent + description: >- + A structured log event containing typed payload data. + StructuredLogPayload: + oneOf: + - $ref: '#/components/schemas/SpanStartPayload' + - $ref: '#/components/schemas/SpanEndPayload' + discriminator: + propertyName: type + mapping: + span_start: '#/components/schemas/SpanStartPayload' + span_end: '#/components/schemas/SpanEndPayload' + StructuredLogType: + type: string + enum: + - span_start + - span_end + title: StructuredLogType + description: >- + The type of structured log event payload. + UnstructuredLogEvent: + type: object + properties: + trace_id: + type: string + description: >- + Unique identifier for the trace this event belongs to + span_id: + type: string + description: >- + Unique identifier for the span this event belongs to + timestamp: + type: string + format: date-time + description: Timestamp when the event occurred + attributes: + type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + description: >- + (Optional) Key-value pairs containing additional metadata about the event + type: + $ref: '#/components/schemas/EventType' + const: unstructured_log + default: unstructured_log + description: >- + Event type identifier set to UNSTRUCTURED_LOG + message: + type: string + description: The log message text + severity: + $ref: '#/components/schemas/LogSeverity' + description: The severity level of the log message + additionalProperties: false + required: + - trace_id + - span_id + - timestamp + - type + - message + - severity + title: UnstructuredLogEvent + description: >- + An unstructured log event containing a simple text message. + LogEventRequest: + type: object + properties: + event: + $ref: '#/components/schemas/Event' + description: The event to log. + ttl_seconds: + type: integer + description: The time to live of the event. + additionalProperties: false + required: + - event + - ttl_seconds + title: LogEventRequest + VectorStoreChunkingStrategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + VectorStoreChunkingStrategyAuto: + type: object + properties: + type: + type: string + const: auto + default: auto + description: >- + Strategy type, always "auto" for automatic chunking + additionalProperties: false + required: + - type + title: VectorStoreChunkingStrategyAuto + description: >- + Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: + type: object + properties: + type: + type: string + const: static + default: static + description: >- + Strategy type, always "static" for static chunking + static: + $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' + description: >- + Configuration parameters for the static chunking strategy + additionalProperties: false + required: + - type + - static + title: VectorStoreChunkingStrategyStatic + description: >- + Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: + type: object + properties: + chunk_overlap_tokens: + type: integer + default: 400 + description: >- + Number of tokens to overlap between adjacent chunks + max_chunk_size_tokens: + type: integer + default: 800 + description: >- + Maximum number of tokens per chunk, must be between 100 and 4096 + additionalProperties: false + required: + - chunk_overlap_tokens + - max_chunk_size_tokens + title: VectorStoreChunkingStrategyStaticConfig + description: >- + Configuration for static chunking strategy. + OpenaiAttachFileToVectorStoreRequest: + type: object + properties: + file_id: + type: string + description: >- + The ID of the file to attach to the vector store. + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The key-value attributes stored with the file, which can be used for filtering. + chunking_strategy: + $ref: '#/components/schemas/VectorStoreChunkingStrategy' + description: >- + The chunking strategy to use for the file. + additionalProperties: false + required: + - file_id + title: OpenaiAttachFileToVectorStoreRequest + VectorStoreFileLastError: + type: object + properties: + code: + oneOf: + - type: string + const: server_error + - type: string + const: rate_limit_exceeded + description: >- + Error code indicating the type of failure + message: + type: string + description: >- + Human-readable error message describing the failure + additionalProperties: false + required: + - code + - message + title: VectorStoreFileLastError + description: >- + Error information for failed vector store file processing. + VectorStoreFileObject: + type: object + properties: + id: + type: string + description: Unique identifier for the file + object: + type: string + default: vector_store.file + description: >- + Object type identifier, always "vector_store.file" + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Key-value attributes associated with the file + chunking_strategy: + $ref: '#/components/schemas/VectorStoreChunkingStrategy' + description: >- + Strategy used for splitting the file into chunks + created_at: + type: integer + description: >- + Timestamp when the file was added to the vector store + last_error: + $ref: '#/components/schemas/VectorStoreFileLastError' + description: >- + (Optional) Error information if file processing failed + status: + $ref: '#/components/schemas/VectorStoreFileStatus' + description: Current processing status of the file + usage_bytes: + type: integer + default: 0 + description: Storage space used by this file in bytes + vector_store_id: + type: string + description: >- + ID of the vector store containing this file + additionalProperties: false + required: + - id + - object + - attributes + - chunking_strategy + - created_at + - status + - usage_bytes + - vector_store_id + title: VectorStoreFileObject + description: OpenAI Vector Store File object. + VectorStoreFileStatus: + oneOf: + - type: string + const: completed + - type: string + const: in_progress + - type: string + const: cancelled + - type: string + const: failed +>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) OpenAIJSONSchema: type: object properties: @@ -9582,6 +11706,1606 @@ components: title: VectorStoreSearchResponsePage description: >- Paginated response from searching a vector store. +<<<<<<< HEAD +======= + OpenaiUpdateVectorStoreRequest: + type: object + properties: + name: + type: string + description: The name of the vector store. + expires_after: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The expiration policy for a vector store. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Set of 16 key-value pairs that can be attached to an object. + additionalProperties: false + title: OpenaiUpdateVectorStoreRequest + OpenaiUpdateVectorStoreFileRequest: + type: object + properties: + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The updated key-value attributes to store with the file. + additionalProperties: false + required: + - attributes + title: OpenaiUpdateVectorStoreFileRequest + DPOAlignmentConfig: + type: object + properties: + beta: + type: number + description: Temperature parameter for the DPO loss + loss_type: + $ref: '#/components/schemas/DPOLossType' + default: sigmoid + description: The type of loss function to use for DPO + additionalProperties: false + required: + - beta + - loss_type + title: DPOAlignmentConfig + description: >- + Configuration for Direct Preference Optimization (DPO) alignment. + DPOLossType: + type: string + enum: + - sigmoid + - hinge + - ipo + - kto_pair + title: DPOLossType + DataConfig: + type: object + properties: + dataset_id: + type: string + description: >- + Unique identifier for the training dataset + batch_size: + type: integer + description: Number of samples per training batch + shuffle: + type: boolean + description: >- + Whether to shuffle the dataset during training + data_format: + $ref: '#/components/schemas/DatasetFormat' + description: >- + Format of the dataset (instruct or dialog) + validation_dataset_id: + type: string + description: >- + (Optional) Unique identifier for the validation dataset + packed: + type: boolean + default: false + description: >- + (Optional) Whether to pack multiple samples into a single sequence for + efficiency + train_on_input: + type: boolean + default: false + description: >- + (Optional) Whether to compute loss on input tokens as well as output tokens + additionalProperties: false + required: + - dataset_id + - batch_size + - shuffle + - data_format + title: DataConfig + description: >- + Configuration for training data and data loading. + DatasetFormat: + type: string + enum: + - instruct + - dialog + title: DatasetFormat + description: Format of the training dataset. + EfficiencyConfig: + type: object + properties: + enable_activation_checkpointing: + type: boolean + default: false + description: >- + (Optional) Whether to use activation checkpointing to reduce memory usage + enable_activation_offloading: + type: boolean + default: false + description: >- + (Optional) Whether to offload activations to CPU to save GPU memory + memory_efficient_fsdp_wrap: + type: boolean + default: false + description: >- + (Optional) Whether to use memory-efficient FSDP wrapping + fsdp_cpu_offload: + type: boolean + default: false + description: >- + (Optional) Whether to offload FSDP parameters to CPU + additionalProperties: false + title: EfficiencyConfig + description: >- + Configuration for memory and compute efficiency optimizations. + OptimizerConfig: + type: object + properties: + optimizer_type: + $ref: '#/components/schemas/OptimizerType' + description: >- + Type of optimizer to use (adam, adamw, or sgd) + lr: + type: number + description: Learning rate for the optimizer + weight_decay: + type: number + description: >- + Weight decay coefficient for regularization + num_warmup_steps: + type: integer + description: Number of steps for learning rate warmup + additionalProperties: false + required: + - optimizer_type + - lr + - weight_decay + - num_warmup_steps + title: OptimizerConfig + description: >- + Configuration parameters for the optimization algorithm. + OptimizerType: + type: string + enum: + - adam + - adamw + - sgd + title: OptimizerType + description: >- + Available optimizer algorithms for training. + TrainingConfig: + type: object + properties: + n_epochs: + type: integer + description: Number of training epochs to run + max_steps_per_epoch: + type: integer + default: 1 + description: Maximum number of steps to run per epoch + gradient_accumulation_steps: + type: integer + default: 1 + description: >- + Number of steps to accumulate gradients before updating + max_validation_steps: + type: integer + default: 1 + description: >- + (Optional) Maximum number of validation steps per epoch + data_config: + $ref: '#/components/schemas/DataConfig' + description: >- + (Optional) Configuration for data loading and formatting + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + description: >- + (Optional) Configuration for the optimization algorithm + efficiency_config: + $ref: '#/components/schemas/EfficiencyConfig' + description: >- + (Optional) Configuration for memory and compute optimizations + dtype: + type: string + default: bf16 + description: >- + (Optional) Data type for model parameters (bf16, fp16, fp32) + additionalProperties: false + required: + - n_epochs + - max_steps_per_epoch + - gradient_accumulation_steps + title: TrainingConfig + description: >- + Comprehensive configuration for the training process. + PreferenceOptimizeRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to create. + finetuned_model: + type: string + description: The model to fine-tune. + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + description: The algorithm configuration. + training_config: + $ref: '#/components/schemas/TrainingConfig' + description: The training configuration. + hyperparam_search_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The hyperparam search configuration. + logger_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The logger configuration. + additionalProperties: false + required: + - job_uuid + - finetuned_model + - algorithm_config + - training_config + - hyperparam_search_config + - logger_config + title: PreferenceOptimizeRequest + PostTrainingJob: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: PostTrainingJob + DefaultRAGQueryGeneratorConfig: + type: object + properties: + type: + type: string + const: default + default: default + description: >- + Type of query generator, always 'default' + separator: + type: string + default: ' ' + description: >- + String separator used to join query terms + additionalProperties: false + required: + - type + - separator + title: DefaultRAGQueryGeneratorConfig + description: >- + Configuration for the default RAG query generator. + LLMRAGQueryGeneratorConfig: + type: object + properties: + type: + type: string + const: llm + default: llm + description: Type of query generator, always 'llm' + model: + type: string + description: >- + Name of the language model to use for query generation + template: + type: string + description: >- + Template string for formatting the query generation prompt + additionalProperties: false + required: + - type + - model + - template + title: LLMRAGQueryGeneratorConfig + description: >- + Configuration for the LLM-based RAG query generator. + RAGQueryConfig: + type: object + properties: + query_generator_config: + $ref: '#/components/schemas/RAGQueryGeneratorConfig' + description: Configuration for the query generator. + max_tokens_in_context: + type: integer + default: 4096 + description: Maximum number of tokens in the context. + max_chunks: + type: integer + default: 5 + description: Maximum number of chunks to retrieve. + chunk_template: + type: string + default: > + Result {index} + + Content: {chunk.content} + + Metadata: {metadata} + description: >- + Template for formatting each retrieved chunk in the context. Available + placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk + content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: + {chunk.content}\nMetadata: {metadata}\n" + mode: + $ref: '#/components/schemas/RAGSearchMode' + default: vector + description: >- + Search mode for retrieval—either "vector", "keyword", or "hybrid". Default + "vector". + ranker: + $ref: '#/components/schemas/Ranker' + description: >- + Configuration for the ranker to use in hybrid search. Defaults to RRF + ranker. + additionalProperties: false + required: + - query_generator_config + - max_tokens_in_context + - max_chunks + - chunk_template + title: RAGQueryConfig + description: >- + Configuration for the RAG query generation. + RAGQueryGeneratorConfig: + oneOf: + - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' + - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' + discriminator: + propertyName: type + mapping: + default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' + llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' + RAGSearchMode: + type: string + enum: + - vector + - keyword + - hybrid + title: RAGSearchMode + description: >- + Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search + for semantic matching - KEYWORD: Uses keyword-based search for exact matching + - HYBRID: Combines both vector and keyword search for better results + RRFRanker: + type: object + properties: + type: + type: string + const: rrf + default: rrf + description: The type of ranker, always "rrf" + impact_factor: + type: number + default: 60.0 + description: >- + The impact factor for RRF scoring. Higher values give more weight to higher-ranked + results. Must be greater than 0 + additionalProperties: false + required: + - type + - impact_factor + title: RRFRanker + description: >- + Reciprocal Rank Fusion (RRF) ranker configuration. + Ranker: + oneOf: + - $ref: '#/components/schemas/RRFRanker' + - $ref: '#/components/schemas/WeightedRanker' + discriminator: + propertyName: type + mapping: + rrf: '#/components/schemas/RRFRanker' + weighted: '#/components/schemas/WeightedRanker' + WeightedRanker: + type: object + properties: + type: + type: string + const: weighted + default: weighted + description: The type of ranker, always "weighted" + alpha: + type: number + default: 0.5 + description: >- + Weight factor between 0 and 1. 0 means only use keyword scores, 1 means + only use vector scores, values in between blend both scores. + additionalProperties: false + required: + - type + - alpha + title: WeightedRanker + description: >- + Weighted ranker configuration that combines vector and keyword scores. + QueryRequest: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The query content to search for in the indexed documents + vector_db_ids: + type: array + items: + type: string + description: >- + List of vector database IDs to search within + query_config: + $ref: '#/components/schemas/RAGQueryConfig' + description: >- + (Optional) Configuration parameters for the query operation + additionalProperties: false + required: + - content + - vector_db_ids + title: QueryRequest + RAGQueryResult: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) The retrieved content from the query + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Additional metadata about the query result + additionalProperties: false + required: + - metadata + title: RAGQueryResult + description: >- + Result of a RAG query containing retrieved content and metadata. + QueryChunksRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to query. + query: + $ref: '#/components/schemas/InterleavedContent' + description: The query to search for. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the query. + additionalProperties: false + required: + - vector_db_id + - query + title: QueryChunksRequest + QueryChunksResponse: + type: object + properties: + chunks: + type: array + items: + $ref: '#/components/schemas/Chunk' + description: >- + List of content chunks returned from the query + scores: + type: array + items: + type: number + description: >- + Relevance scores corresponding to each returned chunk + additionalProperties: false + required: + - chunks + - scores + title: QueryChunksResponse + description: >- + Response from querying chunks in a vector database. + QueryMetricsRequest: + type: object + properties: + start_time: + type: integer + description: The start time of the metric to query. + end_time: + type: integer + description: The end time of the metric to query. + granularity: + type: string + description: The granularity of the metric to query. + query_type: + type: string + enum: + - range + - instant + description: The type of query to perform. + label_matchers: + type: array + items: + type: object + properties: + name: + type: string + description: The name of the label to match + value: + type: string + description: The value to match against + operator: + type: string + enum: + - '=' + - '!=' + - =~ + - '!~' + description: >- + The comparison operator to use for matching + default: '=' + additionalProperties: false + required: + - name + - value + - operator + title: MetricLabelMatcher + description: >- + A matcher for filtering metrics by label values. + description: >- + The label matchers to apply to the metric. + additionalProperties: false + required: + - start_time + - query_type + title: QueryMetricsRequest + MetricDataPoint: + type: object + properties: + timestamp: + type: integer + description: >- + Unix timestamp when the metric value was recorded + value: + type: number + description: >- + The numeric value of the metric at this timestamp + unit: + type: string + additionalProperties: false + required: + - timestamp + - value + - unit + title: MetricDataPoint + description: >- + A single data point in a metric time series. + MetricLabel: + type: object + properties: + name: + type: string + description: The name of the label + value: + type: string + description: The value of the label + additionalProperties: false + required: + - name + - value + title: MetricLabel + description: A label associated with a metric. + MetricSeries: + type: object + properties: + metric: + type: string + description: The name of the metric + labels: + type: array + items: + $ref: '#/components/schemas/MetricLabel' + description: >- + List of labels associated with this metric series + values: + type: array + items: + $ref: '#/components/schemas/MetricDataPoint' + description: >- + List of data points in chronological order + additionalProperties: false + required: + - metric + - labels + - values + title: MetricSeries + description: A time series of metric data points. + QueryMetricsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/MetricSeries' + description: >- + List of metric series matching the query criteria + additionalProperties: false + required: + - data + title: QueryMetricsResponse + description: >- + Response containing metric time series data. + QueryCondition: + type: object + properties: + key: + type: string + description: The attribute key to filter on + op: + $ref: '#/components/schemas/QueryConditionOp' + description: The comparison operator to apply + value: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The value to compare against + additionalProperties: false + required: + - key + - op + - value + title: QueryCondition + description: A condition for filtering query results. + QueryConditionOp: + type: string + enum: + - eq + - ne + - gt + - lt + title: QueryConditionOp + description: >- + Comparison operators for query conditions. + QuerySpansRequest: + type: object + properties: + attribute_filters: + type: array + items: + $ref: '#/components/schemas/QueryCondition' + description: >- + The attribute filters to apply to the spans. + attributes_to_return: + type: array + items: + type: string + description: The attributes to return in the spans. + max_depth: + type: integer + description: The maximum depth of the tree. + additionalProperties: false + required: + - attribute_filters + - attributes_to_return + title: QuerySpansRequest + QuerySpansResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Span' + description: >- + List of spans matching the query criteria + additionalProperties: false + required: + - data + title: QuerySpansResponse + description: Response containing a list of spans. + QueryTracesRequest: + type: object + properties: + attribute_filters: + type: array + items: + $ref: '#/components/schemas/QueryCondition' + description: >- + The attribute filters to apply to the traces. + limit: + type: integer + description: The limit of traces to return. + offset: + type: integer + description: The offset of the traces to return. + order_by: + type: array + items: + type: string + description: The order by of the traces to return. + additionalProperties: false + title: QueryTracesRequest + QueryTracesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Trace' + description: >- + List of traces matching the query criteria + additionalProperties: false + required: + - data + title: QueryTracesResponse + description: Response containing a list of traces. + RegisterBenchmarkRequest: + type: object + properties: + benchmark_id: + type: string + description: The ID of the benchmark to register. + dataset_id: + type: string + description: >- + The ID of the dataset to use for the benchmark. + scoring_functions: + type: array + items: + type: string + description: >- + The scoring functions to use for the benchmark. + provider_benchmark_id: + type: string + description: >- + The ID of the provider benchmark to use for the benchmark. + provider_id: + type: string + description: >- + The ID of the provider to use for the benchmark. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The metadata to use for the benchmark. + additionalProperties: false + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + RegisterDatasetRequest: + type: object + properties: + purpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + description: >- + The purpose of the dataset. One of: - "post-training/messages": The dataset + contains a messages column with list of messages for post-training. { + "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", + "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset + contains a question column and an answer column for evaluation. { "question": + "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": + The dataset contains a messages column with list of messages and an answer + column for evaluation. { "messages": [ {"role": "user", "content": "Hello, + my name is John Doe."}, {"role": "assistant", "content": "Hello, John + Doe. How can I help you today?"}, {"role": "user", "content": "What's + my name?"}, ], "answer": "John Doe" } + source: + $ref: '#/components/schemas/DataSource' + description: >- + The data source of the dataset. Ensure that the data source schema is + compatible with the purpose of the dataset. Examples: - { "type": "uri", + "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": + "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" + } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" + } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": + "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] + } ] } + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The metadata for the dataset. - E.g. {"description": "My dataset"}. + dataset_id: + type: string + description: >- + The ID of the dataset. If not provided, an ID will be generated. + additionalProperties: false + required: + - purpose + - source + title: RegisterDatasetRequest + RegisterModelRequest: + type: object + properties: + model_id: + type: string + description: The identifier of the model to register. + provider_model_id: + type: string + description: >- + The identifier of the model in the provider. + provider_id: + type: string + description: The identifier of the provider. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Any additional metadata for this model. + model_type: + $ref: '#/components/schemas/ModelType' + description: The type of model to register. + additionalProperties: false + required: + - model_id + title: RegisterModelRequest + RegisterScoringFunctionRequest: + type: object + properties: + scoring_fn_id: + type: string + description: >- + The ID of the scoring function to register. + description: + type: string + description: The description of the scoring function. + return_type: + $ref: '#/components/schemas/ParamType' + description: The return type of the scoring function. + provider_scoring_fn_id: + type: string + description: >- + The ID of the provider scoring function to use for the scoring function. + provider_id: + type: string + description: >- + The ID of the provider to use for the scoring function. + params: + $ref: '#/components/schemas/ScoringFnParams' + description: >- + The parameters for the scoring function for benchmark eval, these can + be overridden for app eval. + additionalProperties: false + required: + - scoring_fn_id + - description + - return_type + title: RegisterScoringFunctionRequest + RegisterShieldRequest: + type: object + properties: + shield_id: + type: string + description: >- + The identifier of the shield to register. + provider_shield_id: + type: string + description: >- + The identifier of the shield in the provider. + provider_id: + type: string + description: The identifier of the provider. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the shield. + additionalProperties: false + required: + - shield_id + title: RegisterShieldRequest + RegisterToolGroupRequest: + type: object + properties: + toolgroup_id: + type: string + description: The ID of the tool group to register. + provider_id: + type: string + description: >- + The ID of the provider to use for the tool group. + mcp_endpoint: + $ref: '#/components/schemas/URL' + description: >- + The MCP endpoint to use for the tool group. + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + A dictionary of arguments to pass to the tool group. + additionalProperties: false + required: + - toolgroup_id + - provider_id + title: RegisterToolGroupRequest + RegisterVectorDbRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to register. + embedding_model: + type: string + description: The embedding model to use. + embedding_dimension: + type: integer + description: The dimension of the embedding model. + provider_id: + type: string + description: The identifier of the provider. + vector_db_name: + type: string + description: The name of the vector database. + provider_vector_db_id: + type: string + description: >- + The identifier of the vector database in the provider. + additionalProperties: false + required: + - vector_db_id + - embedding_model + title: RegisterVectorDbRequest + RerankRequest: + type: object + properties: + model: + type: string + description: >- + The identifier of the reranking model to use. The model must be a reranking + model registered with Llama Stack and available via the /models endpoint. + query: + oneOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + description: >- + The search query to rank items against. Can be a string, text content + part, or image content part. The input must not exceed the model's max + input token length. + items: + type: array + items: + oneOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + description: >- + List of items to rerank. Each item can be a string, text content part, + or image content part. Each input must not exceed the model's max input + token length. + max_num_results: + type: integer + description: >- + (Optional) Maximum number of results to return. Default: returns all. + additionalProperties: false + required: + - model + - query + - items + title: RerankRequest + RerankData: + type: object + properties: + index: + type: integer + description: >- + The original index of the document in the input list + relevance_score: + type: number + description: >- + The relevance score from the model output. Values are inverted when applicable + so that higher scores indicate greater relevance. + additionalProperties: false + required: + - index + - relevance_score + title: RerankData + description: >- + A single rerank result from a reranking response. + RerankResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/RerankData' + description: >- + List of rerank result objects, sorted by relevance score (descending) + additionalProperties: false + required: + - data + title: RerankResponse + description: Response from a reranking request. + ResumeAgentTurnRequest: + type: object + properties: + tool_responses: + type: array + items: + $ref: '#/components/schemas/ToolResponse' + description: >- + The tool call responses to resume the turn with. + stream: + type: boolean + description: Whether to stream the response. + additionalProperties: false + required: + - tool_responses + title: ResumeAgentTurnRequest + RunEvalRequest: + type: object + properties: + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + description: The configuration for the benchmark. + additionalProperties: false + required: + - benchmark_config + title: RunEvalRequest + RunModerationRequest: + type: object + properties: + input: + oneOf: + - type: string + - type: array + items: + type: string + description: >- + Input (or inputs) to classify. Can be a single string, an array of strings, + or an array of multi-modal input objects similar to other models. + model: + type: string + description: >- + The content moderation model you would like to use. + additionalProperties: false + required: + - input + - model + title: RunModerationRequest + ModerationObject: + type: object + properties: + id: + type: string + description: >- + The unique identifier for the moderation request. + model: + type: string + description: >- + The model used to generate the moderation results. + results: + type: array + items: + $ref: '#/components/schemas/ModerationObjectResults' + description: A list of moderation objects + additionalProperties: false + required: + - id + - model + - results + title: ModerationObject + description: A moderation object. + ModerationObjectResults: + type: object + properties: + flagged: + type: boolean + description: >- + Whether any of the below categories are flagged. + categories: + type: object + additionalProperties: + type: boolean + description: >- + A list of the categories, and whether they are flagged or not. + category_applied_input_types: + type: object + additionalProperties: + type: array + items: + type: string + description: >- + A list of the categories along with the input type(s) that the score applies + to. + category_scores: + type: object + additionalProperties: + type: number + description: >- + A list of the categories along with their scores as predicted by model. + user_message: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - flagged + - metadata + title: ModerationObjectResults + description: A moderation object. + RunShieldRequest: + type: object + properties: + shield_id: + type: string + description: The identifier of the shield to run. + messages: + type: array + items: + $ref: '#/components/schemas/Message' + description: The messages to run the shield on. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the shield. + additionalProperties: false + required: + - shield_id + - messages + - params + title: RunShieldRequest + RunShieldResponse: + type: object + properties: + violation: + $ref: '#/components/schemas/SafetyViolation' + description: >- + (Optional) Safety violation detected by the shield, if any + additionalProperties: false + title: RunShieldResponse + description: Response from running a safety shield. + SaveSpansToDatasetRequest: + type: object + properties: + attribute_filters: + type: array + items: + $ref: '#/components/schemas/QueryCondition' + description: >- + The attribute filters to apply to the spans. + attributes_to_save: + type: array + items: + type: string + description: The attributes to save to the dataset. + dataset_id: + type: string + description: >- + The ID of the dataset to save the spans to. + max_depth: + type: integer + description: The maximum depth of the tree. + additionalProperties: false + required: + - attribute_filters + - attributes_to_save + - dataset_id + title: SaveSpansToDatasetRequest + ScoreRequest: + type: object + properties: + input_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows to score. + scoring_functions: + type: object + additionalProperties: + oneOf: + - $ref: '#/components/schemas/ScoringFnParams' + - type: 'null' + description: >- + The scoring functions to use for the scoring. + additionalProperties: false + required: + - input_rows + - scoring_functions + title: ScoreRequest + ScoreResponse: + type: object + properties: + results: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: >- + A map of scoring function name to ScoringResult. + additionalProperties: false + required: + - results + title: ScoreResponse + description: The response from scoring. + ScoreBatchRequest: + type: object + properties: + dataset_id: + type: string + description: The ID of the dataset to score. + scoring_functions: + type: object + additionalProperties: + oneOf: + - $ref: '#/components/schemas/ScoringFnParams' + - type: 'null' + description: >- + The scoring functions to use for the scoring. + save_results_dataset: + type: boolean + description: >- + Whether to save the results to a dataset. + additionalProperties: false + required: + - dataset_id + - scoring_functions + - save_results_dataset + title: ScoreBatchRequest + ScoreBatchResponse: + type: object + properties: + dataset_id: + type: string + description: >- + (Optional) The identifier of the dataset that was scored + results: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: >- + A map of scoring function name to ScoringResult + additionalProperties: false + required: + - results + title: ScoreBatchResponse + description: >- + Response from batch scoring operations on datasets. + SetDefaultVersionRequest: + type: object + properties: + version: + type: integer + description: The version to set as default. + additionalProperties: false + required: + - version + title: SetDefaultVersionRequest + AlgorithmConfig: + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + - $ref: '#/components/schemas/QATFinetuningConfig' + discriminator: + propertyName: type + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + LoraFinetuningConfig: + type: object + properties: + type: + type: string + const: LoRA + default: LoRA + description: Algorithm type identifier, always "LoRA" + lora_attn_modules: + type: array + items: + type: string + description: >- + List of attention module names to apply LoRA to + apply_lora_to_mlp: + type: boolean + description: Whether to apply LoRA to MLP layers + apply_lora_to_output: + type: boolean + description: >- + Whether to apply LoRA to output projection layers + rank: + type: integer + description: >- + Rank of the LoRA adaptation (lower rank = fewer parameters) + alpha: + type: integer + description: >- + LoRA scaling parameter that controls adaptation strength + use_dora: + type: boolean + default: false + description: >- + (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) + quantize_base: + type: boolean + default: false + description: >- + (Optional) Whether to quantize the base model weights + additionalProperties: false + required: + - type + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: >- + Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: + type: object + properties: + type: + type: string + const: QAT + default: QAT + description: Algorithm type identifier, always "QAT" + quantizer_name: + type: string + description: >- + Name of the quantization algorithm to use + group_size: + type: integer + description: Size of groups for grouped quantization + additionalProperties: false + required: + - type + - quantizer_name + - group_size + title: QATFinetuningConfig + description: >- + Configuration for Quantization-Aware Training (QAT) fine-tuning. + SupervisedFineTuneRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to create. + training_config: + $ref: '#/components/schemas/TrainingConfig' + description: The training configuration. + hyperparam_search_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The hyperparam search configuration. + logger_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The logger configuration. + model: + type: string + description: The model to fine-tune. + checkpoint_dir: + type: string + description: The directory to save checkpoint(s) to. + algorithm_config: + $ref: '#/components/schemas/AlgorithmConfig' + description: The algorithm configuration. + additionalProperties: false + required: + - job_uuid + - training_config + - hyperparam_search_config + - logger_config + title: SupervisedFineTuneRequest + SyntheticDataGenerateRequest: + type: object + properties: + dialogs: + type: array + items: + $ref: '#/components/schemas/Message' + description: >- + List of conversation messages to use as input for synthetic data generation + filtering_function: + type: string + enum: + - none + - random + - top_k + - top_p + - top_k_top_p + - sigmoid + description: >- + Type of filtering to apply to generated synthetic data samples + model: + type: string + description: >- + (Optional) The identifier of the model to use. The model must be registered + with Llama Stack and available via the /models endpoint + additionalProperties: false + required: + - dialogs + - filtering_function + title: SyntheticDataGenerateRequest + SyntheticDataGenerationResponse: + type: object + properties: + synthetic_data: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + List of generated synthetic data samples that passed the filtering criteria + statistics: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Statistical information about the generation process and filtering + results + additionalProperties: false + required: + - synthetic_data + title: SyntheticDataGenerationResponse + description: >- + Response from the synthetic data generation. Batch of (prompt, response, score) + tuples that pass the threshold. + UpdatePromptRequest: + type: object + properties: + prompt: + type: string + description: The updated prompt text content. + version: + type: integer + description: >- + The current version of the prompt being updated. + variables: + type: array + items: + type: string + description: >- + Updated list of variable names that can be used in the prompt template. + set_as_default: + type: boolean + description: >- + Set the new version as the default (default=True). + additionalProperties: false + required: + - prompt + - version + - set_as_default + title: UpdatePromptRequest +>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) VersionInfo: type: object properties: diff --git a/example.py b/example.py new file mode 100644 index 000000000..7e968e24a --- /dev/null +++ b/example.py @@ -0,0 +1,257 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os + +os.environ["NVIDIA_API_KEY"] = "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" +# Option 1: Use default NIM URL (will auto-switch to ai.api.nvidia.com for rerank) +# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com" +# Option 2: Use AI Foundation URL directly for rerank models +# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com/v1" +os.environ["NVIDIA_BASE_URL"] = "https://integrate.api.nvidia.com" + +import base64 +import io +from PIL import Image + +from llama_stack.core.library_client import LlamaStackAsLibraryClient + +client = LlamaStackAsLibraryClient("nvidia") +client.initialize() + +# # response = client.inference.completion( +# # model_id="meta/llama-3.1-8b-instruct", +# # content="Complete the sentence using one word: Roses are red, violets are :", +# # stream=False, +# # sampling_params={ +# # "max_tokens": 50, +# # }, +# # ) +# # print(f"Response: {response.content}") + + +# response = client.inference.chat_completion( +# model_id="nvidia/nvidia-nemotron-nano-9b-v2", +# messages=[ +# { +# "role": "system", +# "content": "/think", +# }, +# { +# "role": "user", +# "content": "How are you?", +# }, +# ], +# stream=False, +# sampling_params={ +# "max_tokens": 1024, +# }, +# ) +# print(f"Response: {response}") + + +print(client.models.list()) +rerank_response = client.inference.rerank( + model="nvidia/llama-3.2-nv-rerankqa-1b-v2", + query="query", + items=[ + "item_1", + "item_2", + "item_3", + ] +) + +print(rerank_response) +for i, result in enumerate(rerank_response): + print(f"{i+1}. [Index: {result.index}, " + f"Score: {(result.relevance_score):.3f}]") + +# # from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition + +# # tool_definition = ToolDefinition( +# # tool_name="get_weather", +# # description="Get current weather information for a location", +# # parameters={ +# # "location": ToolParamDefinition( +# # param_type="string", +# # description="The city and state, e.g. San Francisco, CA", +# # required=True +# # ), +# # "unit": ToolParamDefinition( +# # param_type="string", +# # description="Temperature unit (celsius or fahrenheit)", +# # required=False, +# # default="celsius" +# # ) +# # } +# # ) + +# # # tool_response = client.inference.chat_completion( +# # # model_id="meta-llama/Llama-3.1-8B-Instruct", +# # # messages=[ +# # # {"role": "user", "content": "What's the weather like in San Francisco?"} +# # # ], +# # # tools=[tool_definition], +# # # ) + +# # # print(f"Tool Response: {tool_response.completion_message.content}") +# # # if tool_response.completion_message.tool_calls: +# # # for tool_call in tool_response.completion_message.tool_calls: +# # # print(f"Tool Called: {tool_call.tool_name}") +# # # print(f"Arguments: {tool_call.arguments}") + + +# # # from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType + +# # # person_schema = { +# # # "type": "object", +# # # "properties": { +# # # "name": {"type": "string"}, +# # # "age": {"type": "integer"}, +# # # "occupation": {"type": "string"}, +# # # }, +# # # "required": ["name", "age", "occupation"] +# # # } + +# # # response_format = JsonSchemaResponseFormat( +# # # type=ResponseFormatType.json_schema, +# # # json_schema=person_schema +# # # ) + +# # # structured_response = client.inference.chat_completion( +# # # model_id="meta-llama/Llama-3.1-8B-Instruct", +# # # messages=[ +# # # { +# # # "role": "user", +# # # "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. " +# # # } +# # # ], +# # # response_format=response_format, +# # # ) + +# # # print(f"Structured Response: {structured_response.completion_message.content}") + +# # # print("\n" + "="*50) +# # # print("VISION LANGUAGE MODEL (VLM) EXAMPLE") +# # # print("="*50) + +# # def load_image_as_base64(image_path): +# # with open(image_path, "rb") as image_file: +# # img_bytes = image_file.read() +# # return base64.b64encode(img_bytes).decode("utf-8") + +# # image_path = "/home/jiayin/llama-stack/docs/dog.jpg" +# # demo_image_b64 = load_image_as_base64(image_path) + +# # vlm_response = client.inference.chat_completion( +# # model_id="nvidia/vila", +# # messages=[ +# # { +# # "role": "user", +# # "content": [ +# # { +# # "type": "image", +# # "image": { +# # "data": demo_image_b64, +# # }, +# # }, +# # { +# # "type": "text", +# # "text": "Please describe what you see in this image in detail.", +# # }, +# # ], +# # } +# # ], +# # ) + +# # print(f"VLM Response: {vlm_response.completion_message.content}") + +# # # print("\n" + "="*50) +# # # print("EMBEDDING EXAMPLE") +# # # print("="*50) + +# # # # Embedding example +# # # embedding_response = client.inference.embeddings( +# # # model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", +# # # contents=["Hello world", "How are you today?"], +# # # task_type="query" +# # # ) + +# # # print(f"Number of embeddings: {len(embedding_response.embeddings)}") +# # # print(f"Embedding dimension: {len(embedding_response.embeddings[0])}") +# # # print(f"First few values: {embedding_response.embeddings[0][:5]}") + +# # # # from openai import OpenAI + +# # # # client = OpenAI( +# # # # base_url = "http://10.176.230.61:8000/v1", +# # # # api_key = "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" +# # # # ) + +# # # # # completion = client.completions.create( +# # # # # model="meta/llama-3.1-405b-instruct", +# # # # # prompt="How are you?", +# # # # # temperature=0.2, +# # # # # top_p=0.7, +# # # # # max_tokens=1024, +# # # # # stream=False +# # # # # ) + +# # # # # # completion = client.chat.completions.create( +# # # # # # model="meta/llama-3.1-8b-instruct", +# # # # # # messages=[{"role":"user","content":"hi"}], +# # # # # # temperature=0.2, +# # # # # # top_p=0.7, +# # # # # # max_tokens=1024, +# # # # # # stream=True +# # # # # # ) + +# # # # # for chunk in completion: +# # # # # if chunk.choices[0].delta.content is not None: +# # # # # print(chunk.choices[0].delta.content, end="") + + +# # # # # response = client.inference.completion( +# # # # # model_id="meta/llama-3.1-8b-instruct", +# # # # # content="Complete the sentence using one word: Roses are red, violets are :", +# # # # # stream=False, +# # # # # sampling_params={ +# # # # # "max_tokens": 50, +# # # # # }, +# # # # # ) +# # # # # print(f"Response: {response.content}") + + + + +# from openai import OpenAI + +# client = OpenAI( +# base_url = "https://integrate.api.nvidia.com/v1", +# api_key = "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" +# ) + +# completion = client.chat.completions.create( +# model="nvidia/nvidia-nemotron-nano-9b-v2", +# messages=[{"role":"system","content":"/think"}], +# temperature=0.6, +# top_p=0.95, +# max_tokens=2048, +# frequency_penalty=0, +# presence_penalty=0, +# stream=True, +# extra_body={ +# "min_thinking_tokens": 1024, +# "max_thinking_tokens": 2048 +# } +# ) + +# for chunk in completion: +# reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None) +# if reasoning: +# print(reasoning, end="") +# if chunk.choices[0].delta.content is not None: +# print(chunk.choices[0].delta.content, end="") diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index e88a16315..e452d8157 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1016,7 +1016,7 @@ class InferenceProvider(Protocol): ) -> RerankResponse: """Rerank a list of documents based on their relevance to a query. - :param model: The identifier of the reranking model to use. + :param model: The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint. :param query: The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length. :param items: List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length. :param max_num_results: (Optional) Maximum number of results to return. Default: returns all. diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 210ed9246..359f5bf0c 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -27,10 +27,12 @@ class ModelType(StrEnum): """Enumeration of supported model types in Llama Stack. :cvar llm: Large language model for text generation and completion :cvar embedding: Embedding model for converting text to vector representations + :cvar rerank: Reranking model for reordering documents by relevance """ llm = "llm" embedding = "embedding" + rerank = "rerank" @json_schema_type diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index c4338e614..e5826685e 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -41,9 +41,14 @@ from llama_stack.apis.inference import ( OpenAIMessageParam, OpenAIResponseFormatParam, Order, + RerankResponse, StopReason, ToolPromptFormat, ) +from llama_stack.apis.inference.inference import ( + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, +) from llama_stack.apis.models import Model, ModelType from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.log import get_logger @@ -179,6 +184,25 @@ class InferenceRouter(Inference): raise ModelTypeError(model_id, model.model_type, expected_model_type) return model + async def rerank( + self, + model: str, + query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, + items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], + max_num_results: int | None = None, + ) -> RerankResponse: + """Route rerank requests to the appropriate provider based on the model.""" + logger.debug(f"InferenceRouter.rerank: {model}") + model_obj = await self._get_model(model, ModelType.rerank) + provider = await self.routing_table.get_provider_impl(model_obj.identifier) + return await provider.rerank( + model=model_obj.identifier, + query=query, + items=items, + max_num_results=max_num_results, + ) + + async def openai_completion( self, model: str, diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py new file mode 100644 index 000000000..a79a1c6aa --- /dev/null +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -0,0 +1,131 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.models import ModelType +from llama_stack.models.llama.sku_types import CoreModelId +from llama_stack.providers.utils.inference.model_registry import ( + ProviderModelEntry, + build_hf_repo_model_entry, +) + +SAFETY_MODELS_ENTRIES = [] + +# https://docs.nvidia.com/nim/large-language-models/latest/supported-llm-agnostic-architectures.html +MODEL_ENTRIES = [ + build_hf_repo_model_entry( + "meta/llama3-8b-instruct", + CoreModelId.llama3_8b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama3-70b-instruct", + CoreModelId.llama3_70b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.1-8b-instruct", + CoreModelId.llama3_1_8b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.1-70b-instruct", + CoreModelId.llama3_1_70b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.1-405b-instruct", + CoreModelId.llama3_1_405b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.2-1b-instruct", + CoreModelId.llama3_2_1b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.2-3b-instruct", + CoreModelId.llama3_2_3b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.2-11b-vision-instruct", + CoreModelId.llama3_2_11b_vision_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.2-90b-vision-instruct", + CoreModelId.llama3_2_90b_vision_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.3-70b-instruct", + CoreModelId.llama3_3_70b_instruct.value, + ), + ProviderModelEntry( + provider_model_id="nvidia/vila", + model_type=ModelType.llm, + ), + # NeMo Retriever Text Embedding models - + # + # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html + # + # +-----------------------------------+--------+-----------+-----------+------------+ + # | Model ID | Max | Publisher | Embedding | Dynamic | + # | | Tokens | | Dimension | Embeddings | + # +-----------------------------------+--------+-----------+-----------+------------+ + # | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192 | NVIDIA | 2048 | Yes | + # | nvidia/nv-embedqa-e5-v5 | 512 | NVIDIA | 1024 | No | + # | nvidia/nv-embedqa-mistral-7b-v2 | 512 | NVIDIA | 4096 | No | + # | snowflake/arctic-embed-l | 512 | Snowflake | 1024 | No | + # +-----------------------------------+--------+-----------+-----------+------------+ + ProviderModelEntry( + provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 2048, + "context_length": 8192, + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/nv-embedqa-e5-v5", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 1024, + "context_length": 512, + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/nv-embedqa-mistral-7b-v2", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 4096, + "context_length": 512, + }, + ), + ProviderModelEntry( + provider_model_id="snowflake/arctic-embed-l", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 1024, + "context_length": 512, + }, + ), + # NVIDIA Reranking models + ProviderModelEntry( + provider_model_id="nv-rerank-qa-mistral-4b:1", + model_type=ModelType.rerank, + metadata={ + "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/nv-rerankqa-mistral-4b-v3", + model_type=ModelType.rerank, + metadata={ + "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking", + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/llama-3.2-nv-rerankqa-1b-v2", + model_type=ModelType.rerank, + metadata={ + "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", + }, + ), + # TODO(mf): how do we handle Nemotron models? + # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct", +] + SAFETY_MODELS_ENTRIES diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 2e6c3d769..b2fdec61f 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -12,6 +12,12 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingData, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, + RerankData, + RerankResponse, +) +from llama_stack.apis.inference.inference import ( + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, ) from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -80,6 +86,80 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): """ return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url + async def rerank( + self, + model: str, + query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, + items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], + max_num_results: int | None = None, + ) -> RerankResponse: + provider_model_id = await self._get_provider_model_id(model) + + ranking_url = self.get_base_url() + model_obj = await self.model_store.get_model(model) + + if _is_nvidia_hosted(self._config) and "endpoint" in model_obj.metadata: + ranking_url = model_obj.metadata["endpoint"] + + logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}") + + # Convert query to text format + if isinstance(query, str): + query_text = query + elif hasattr(query, "text"): + query_text = query.text + else: + raise ValueError("Query must be a string or text content part") + + # Convert items to text format + passages = [] + for item in items: + if isinstance(item, str): + passages.append({"text": item}) + elif hasattr(item, "text"): + passages.append({"text": item.text}) + else: + raise ValueError("Items must be strings or text content parts") + + payload = { + "model": provider_model_id, + "query": {"text": query_text}, + "passages": passages, + } + + headers = { + "Authorization": f"Bearer {self.get_api_key()}", + "Content-Type": "application/json", + } + + import aiohttp + + try: + async with aiohttp.ClientSession() as session: + async with session.post(ranking_url, headers=headers, json=payload) as response: + if response.status != 200: + response_text = await response.text() + raise ConnectionError( + f"NVIDIA rerank API request failed with status {response.status}: {response_text}" + ) + + result = await response.json() + rankings = result.get("rankings", []) + + # Convert to RerankData format + rerank_data = [] + for ranking in rankings: + rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"])) + + # Apply max_num_results limit if specified + if max_num_results is not None: + rerank_data = rerank_data[:max_num_results] + + return RerankResponse(data=rerank_data) + + except aiohttp.ClientError as e: + raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e + async def openai_embeddings( self, model: str, From d7cbeb4b8c5942cfda4f096dd1fd45eeb35d1349 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Thu, 4 Sep 2025 18:08:35 -0700 Subject: [PATCH 02/18] Add tests --- docs/docs/providers/inference/index.mdx | 6 +- .../remote/inference/nvidia/nvidia.py | 2 +- tests/integration/conftest.py | 5 + tests/integration/fixtures/common.py | 10 +- tests/integration/inference/test_rerank.py | 147 ++++++++++++++ .../providers/nvidia/test_rerank_inference.py | 180 ++++++++++++++++++ 6 files changed, 345 insertions(+), 5 deletions(-) create mode 100644 tests/integration/inference/test_rerank.py create mode 100644 tests/unit/providers/nvidia/test_rerank_inference.py diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index e96169cad..d9d30ab78 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -1,9 +1,9 @@ --- description: "Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: + This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search." + - Embedding models: these models generate embeddings to be used for semantic search. - Rerank models: these models rerank the documents by relevance." sidebar_label: Inference title: Inference @@ -15,7 +15,7 @@ title: Inference Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: + This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - Rerank models: these models rerank the documents by relevance. diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index b2fdec61f..8dc5e0a11 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -151,7 +151,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): for ranking in rankings: rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"])) - # Apply max_num_results limit if specified + # Apply max_num_results limit if max_num_results is not None: rerank_data = rerank_data[:max_num_results] diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 4735264c3..2ad4f7e4c 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -120,6 +120,10 @@ def pytest_addoption(parser): "--embedding-model", help="comma-separated list of embedding models. Fixture name: embedding_model_id", ) + parser.addoption( + "--rerank-model", + help="comma-separated list of rerank models. Fixture name: rerank_model_id", + ) parser.addoption( "--safety-shield", help="comma-separated list of safety shields. Fixture name: shield_id", @@ -198,6 +202,7 @@ def pytest_generate_tests(metafunc): "shield_id": ("--safety-shield", "shield"), "judge_model_id": ("--judge-model", "judge"), "embedding_dimension": ("--embedding-dimension", "dim"), + "rerank_model_id": ("--rerank-model", "rerank"), } # Collect all parameters and their values diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 68aa2b60b..27283afe7 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -119,6 +119,7 @@ def client_with_models( embedding_model_id, embedding_dimension, judge_model_id, + rerank_model_id, ): client = llama_stack_client @@ -151,6 +152,13 @@ def client_with_models( model_type="embedding", metadata={"embedding_dimension": embedding_dimension or 384}, ) + if rerank_model_id and rerank_model_id not in model_ids: + rerank_provider = providers[0] + client.models.register( + model_id=rerank_model_id, + provider_id=rerank_provider.provider_id, + model_type="rerank", + ) return client @@ -166,7 +174,7 @@ def model_providers(llama_stack_client): @pytest.fixture(autouse=True) def skip_if_no_model(request): - model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id"] + model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id", "rerank_model_id"] test_func = request.node.function actual_params = inspect.signature(test_func).parameters.keys() diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py new file mode 100644 index 000000000..0c536b539 --- /dev/null +++ b/tests/integration/inference/test_rerank.py @@ -0,0 +1,147 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import pytest +from llama_stack_client import BadRequestError as LlamaStackBadRequestError +from llama_stack_client.types import RerankResponse +from llama_stack_client.types.shared.interleaved_content import ( + ImageContentItem, + ImageContentItemImage, + ImageContentItemImageURL, + TextContentItem, +) + +from llama_stack.core.library_client import LlamaStackAsLibraryClient + +# Test data +DUMMY_STRING = "string_1" +DUMMY_STRING2 = "string_2" +DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text") +DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text") +DUMMY_IMAGE_URL = ImageContentItem( + image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image" +) +DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image") + +SUPPORTED_PROVIDERS = {"remote::nvidia"} +PROVIDERS_SUPPORTING_MEDIA = {} # Providers that support media input for rerank models + + +def _validate_rerank_response(response: RerankResponse, items: list) -> None: + """ + Validate that a rerank response has the correct structure and ordering. + + Args: + response: The RerankResponse to validate + items: The original items list that was ranked + + Raises: + AssertionError: If any validation fails + """ + seen = set() + last_score = float("inf") + for d in response.data: + assert 0 <= d.index < len(items), f"Index {d.index} out of bounds for {len(items)} items" + assert d.index not in seen, f"Duplicate index {d.index} found" + seen.add(d.index) + assert isinstance(d.relevance_score, float), f"Score must be float, got {type(d.relevance_score)}" + assert d.relevance_score <= last_score, f"Scores not in descending order: {d.relevance_score} > {last_score}" + last_score = d.relevance_score + + +@pytest.mark.parametrize( + "query,items", + [ + (DUMMY_STRING, [DUMMY_STRING, DUMMY_STRING2]), + (DUMMY_TEXT, [DUMMY_TEXT, DUMMY_TEXT2]), + (DUMMY_STRING, [DUMMY_STRING2, DUMMY_TEXT]), + (DUMMY_TEXT, [DUMMY_STRING, DUMMY_TEXT2]), + ], + ids=[ + "string-query-string-items", + "text-query-text-items", + "mixed-content-1", + "mixed-content-2", + ], +) +def test_rerank_text(llama_stack_client, rerank_model_id, query, items, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") + + response = llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items) + assert isinstance(response, RerankResponse) + assert len(response.data) <= len(items) + _validate_rerank_response(response, items) + + +@pytest.mark.parametrize( + "query,items", + [ + (DUMMY_IMAGE_URL, [DUMMY_STRING]), + (DUMMY_IMAGE_BASE64, [DUMMY_TEXT]), + (DUMMY_TEXT, [DUMMY_IMAGE_URL]), + (DUMMY_IMAGE_BASE64, [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT]), + (DUMMY_TEXT, [DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT]), + ], + ids=[ + "image-query-url", + "image-query-base64", + "text-query-image-item", + "mixed-content-1", + "mixed-content-2", + ], +) +def test_rerank_image(llama_stack_client, rerank_model_id, query, items, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") + + if rerank_model_id not in PROVIDERS_SUPPORTING_MEDIA: + error_type = ( + ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError + ) + with pytest.raises(error_type): + llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items) + else: + response = llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items) + + assert isinstance(response, RerankResponse) + assert len(response.data) <= len(items) + _validate_rerank_response(response, items) + + +def test_rerank_max_results(llama_stack_client, rerank_model_id, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") + + items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2] + max_num_results = 2 + + response = llama_stack_client.inference.rerank( + model=rerank_model_id, + query=DUMMY_STRING, + items=items, + max_num_results=max_num_results, + ) + + assert isinstance(response, RerankResponse) + assert len(response.data) == max_num_results + _validate_rerank_response(response, items) + + +def test_rerank_max_results_larger_than_items(llama_stack_client, rerank_model_id, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support rerank yet") + + items = [DUMMY_STRING, DUMMY_STRING2] + response = llama_stack_client.inference.rerank( + model=rerank_model_id, + query=DUMMY_STRING, + items=items, + max_num_results=10, # Larger than items length + ) + + assert isinstance(response, RerankResponse) + assert len(response.data) <= len(items) # Should return at most len(items) diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py new file mode 100644 index 000000000..03c54a732 --- /dev/null +++ b/tests/unit/providers/nvidia/test_rerank_inference.py @@ -0,0 +1,180 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from unittest.mock import AsyncMock, patch + +import aiohttp +import pytest + +from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig +from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter + + +class MockResponse: + def __init__(self, status=200, json_data=None, text_data="OK"): + self.status = status + self._json_data = json_data or {"rankings": []} + self._text_data = text_data + + async def json(self): + return self._json_data + + async def text(self): + return self._text_data + + +class MockSession: + def __init__(self, response): + self.response = response + self.post_calls = [] + + async def __aenter__(self): + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + return False + + def post(self, url, **kwargs): + self.post_calls.append((url, kwargs)) + + class PostContext: + def __init__(self, response): + self.response = response + + async def __aenter__(self): + return self.response + + async def __aexit__(self, exc_type, exc_val, exc_tb): + return False + + return PostContext(self.response) + + +def create_adapter(config=None, model_metadata=None): + if config is None: + config = NVIDIAConfig(api_key="test-key") + + adapter = NVIDIAInferenceAdapter(config) + + class MockModel: + provider_resource_id = "test-model" + metadata = model_metadata or {} + + adapter.model_store = AsyncMock() + adapter.model_store.get_model = AsyncMock(return_value=MockModel()) + + return adapter + + +@pytest.mark.asyncio +async def test_rerank_basic_functionality(): + adapter = create_adapter() + mock_response = MockResponse(json_data={"rankings": [{"index": 0, "logit": 0.5}]}) + mock_session = MockSession(mock_response) + + with patch("aiohttp.ClientSession", return_value=mock_session): + result = await adapter.rerank(model="test-model", query="test query", items=["item1", "item2"]) + + assert len(result.data) == 1 + assert result.data[0].index == 0 + assert result.data[0].relevance_score == 0.5 + + url, kwargs = mock_session.post_calls[0] + payload = kwargs["json"] + assert payload["model"] == "test-model" + assert payload["query"] == {"text": "test query"} + assert payload["passages"] == [{"text": "item1"}, {"text": "item2"}] + + +@pytest.mark.asyncio +async def test_missing_rankings_key(): + adapter = create_adapter() + mock_session = MockSession(MockResponse(json_data={})) + + with patch("aiohttp.ClientSession", return_value=mock_session): + result = await adapter.rerank(model="test-model", query="q", items=["a"]) + + assert len(result.data) == 0 + + +@pytest.mark.asyncio +async def test_hosted_with_endpoint(): + adapter = create_adapter( + config=NVIDIAConfig(api_key="key"), model_metadata={"endpoint": "https://model.endpoint/rerank"} + ) + mock_session = MockSession(MockResponse()) + + with patch("aiohttp.ClientSession", return_value=mock_session): + await adapter.rerank(model="test-model", query="q", items=["a"]) + + url, _ = mock_session.post_calls[0] + assert url == "https://model.endpoint/rerank" + + +@pytest.mark.asyncio +async def test_hosted_without_endpoint(): + adapter = create_adapter( + config=NVIDIAConfig(api_key="key"), # This creates hosted config (integrate.api.nvidia.com). + model_metadata={}, # No "endpoint" key + ) + mock_session = MockSession(MockResponse()) + + with patch("aiohttp.ClientSession", return_value=mock_session): + await adapter.rerank(model="test-model", query="q", items=["a"]) + + url, _ = mock_session.post_calls[0] + assert "https://integrate.api.nvidia.com" in url + + +@pytest.mark.asyncio +async def test_self_hosted_ignores_endpoint(): + adapter = create_adapter( + config=NVIDIAConfig(url="http://localhost:8000", api_key=None), + model_metadata={"endpoint": "https://model.endpoint/rerank"}, # This should be ignored. + ) + mock_session = MockSession(MockResponse()) + + with patch("aiohttp.ClientSession", return_value=mock_session): + await adapter.rerank(model="test-model", query="q", items=["a"]) + + url, _ = mock_session.post_calls[0] + assert "http://localhost:8000" in url + assert "model.endpoint/rerank" not in url + + +@pytest.mark.asyncio +async def test_max_num_results(): + adapter = create_adapter() + rankings = [{"index": 0, "logit": 0.8}, {"index": 1, "logit": 0.6}] + mock_session = MockSession(MockResponse(json_data={"rankings": rankings})) + + with patch("aiohttp.ClientSession", return_value=mock_session): + result = await adapter.rerank(model="test-model", query="q", items=["a", "b"], max_num_results=1) + + assert len(result.data) == 1 + assert result.data[0].index == 0 + assert result.data[0].relevance_score == 0.8 + + +@pytest.mark.asyncio +async def test_http_error(): + adapter = create_adapter() + mock_session = MockSession(MockResponse(status=500, text_data="Server Error")) + + with patch("aiohttp.ClientSession", return_value=mock_session): + with pytest.raises(ConnectionError, match="status 500.*Server Error"): + await adapter.rerank(model="test-model", query="q", items=["a"]) + + +@pytest.mark.asyncio +async def test_client_error(): + adapter = create_adapter() + mock_session = AsyncMock() + mock_session.__aenter__.side_effect = aiohttp.ClientError("Network error") + + with patch("aiohttp.ClientSession", return_value=mock_session): + with pytest.raises(ConnectionError, match="Failed to connect.*Network error"): + await adapter.rerank(model="test-model", query="q", items=["a"]) From 8c9b7aa764a936b9617d5f8a82d74beb302ed835 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Fri, 5 Sep 2025 16:52:42 -0700 Subject: [PATCH 03/18] Add example documentation --- .../remote/inference/nvidia/NVIDIA.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index 625be6088..c683c7a68 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -188,3 +188,22 @@ vlm_response = client.chat.completions.create( print(f"VLM Response: {vlm_response.choices[0].message.content}") ``` + +### Rerank Example + +The following example shows how to rerank documents using an NVIDIA NIM. + +```python +rerank_response = client.inference.rerank( + model="nvidia/llama-3.2-nv-rerankqa-1b-v2", + query="query", + items=[ + "item_1", + "item_2", + "item_3", + ], +) + +for i, result in enumerate(rerank_response.data): + print(f"{i+1}. [Index: {result.index}, Score: {result.relevance_score:.3f}]") +``` \ No newline at end of file From 3f6bbbb5609b6da2c5d46cc541fe414e859adccf Mon Sep 17 00:00:00 2001 From: Jiayi Date: Tue, 9 Sep 2025 14:15:24 -0700 Subject: [PATCH 04/18] Remove pre-commit auto formatted changes --- tests/unit/providers/nvidia/test_rerank_inference.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py index 03c54a732..687ffd502 100644 --- a/tests/unit/providers/nvidia/test_rerank_inference.py +++ b/tests/unit/providers/nvidia/test_rerank_inference.py @@ -69,7 +69,6 @@ def create_adapter(config=None, model_metadata=None): return adapter -@pytest.mark.asyncio async def test_rerank_basic_functionality(): adapter = create_adapter() mock_response = MockResponse(json_data={"rankings": [{"index": 0, "logit": 0.5}]}) @@ -89,7 +88,6 @@ async def test_rerank_basic_functionality(): assert payload["passages"] == [{"text": "item1"}, {"text": "item2"}] -@pytest.mark.asyncio async def test_missing_rankings_key(): adapter = create_adapter() mock_session = MockSession(MockResponse(json_data={})) @@ -100,7 +98,6 @@ async def test_missing_rankings_key(): assert len(result.data) == 0 -@pytest.mark.asyncio async def test_hosted_with_endpoint(): adapter = create_adapter( config=NVIDIAConfig(api_key="key"), model_metadata={"endpoint": "https://model.endpoint/rerank"} @@ -114,7 +111,6 @@ async def test_hosted_with_endpoint(): assert url == "https://model.endpoint/rerank" -@pytest.mark.asyncio async def test_hosted_without_endpoint(): adapter = create_adapter( config=NVIDIAConfig(api_key="key"), # This creates hosted config (integrate.api.nvidia.com). @@ -129,7 +125,6 @@ async def test_hosted_without_endpoint(): assert "https://integrate.api.nvidia.com" in url -@pytest.mark.asyncio async def test_self_hosted_ignores_endpoint(): adapter = create_adapter( config=NVIDIAConfig(url="http://localhost:8000", api_key=None), @@ -145,7 +140,6 @@ async def test_self_hosted_ignores_endpoint(): assert "model.endpoint/rerank" not in url -@pytest.mark.asyncio async def test_max_num_results(): adapter = create_adapter() rankings = [{"index": 0, "logit": 0.8}, {"index": 1, "logit": 0.6}] @@ -159,7 +153,6 @@ async def test_max_num_results(): assert result.data[0].relevance_score == 0.8 -@pytest.mark.asyncio async def test_http_error(): adapter = create_adapter() mock_session = MockSession(MockResponse(status=500, text_data="Server Error")) @@ -169,7 +162,6 @@ async def test_http_error(): await adapter.rerank(model="test-model", query="q", items=["a"]) -@pytest.mark.asyncio async def test_client_error(): adapter = create_adapter() mock_session = AsyncMock() From d78e30fe8b95a30a92f86e6b1ee9d69ceca2ae00 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Tue, 9 Sep 2025 15:01:49 -0700 Subject: [PATCH 05/18] Fix aiohttp location --- llama_stack/providers/remote/inference/nvidia/nvidia.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 8dc5e0a11..f629d8c19 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -5,6 +5,7 @@ # the root directory of this source tree. +import aiohttp from openai import NOT_GIVEN from llama_stack.apis.inference import ( @@ -132,8 +133,6 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): "Content-Type": "application/json", } - import aiohttp - try: async with aiohttp.ClientSession() as session: async with session.post(ranking_url, headers=headers, json=payload) as response: From f66718be8033b685abbe829753df539fc652e27d Mon Sep 17 00:00:00 2001 From: Jiayi Date: Tue, 9 Sep 2025 16:21:00 -0700 Subject: [PATCH 06/18] Update tests --- tests/integration/fixtures/common.py | 11 +++++++++-- tests/integration/inference/test_rerank.py | 20 ++++++++++---------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 27283afe7..8f4c564c8 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -153,10 +153,17 @@ def client_with_models( metadata={"embedding_dimension": embedding_dimension or 384}, ) if rerank_model_id and rerank_model_id not in model_ids: - rerank_provider = providers[0] + selected_provider = None + for p in providers: + # Currently only NVIDIA inference provider supports reranking + if p.provider_type == "remote::nvidia": + selected_provider = p + break + + selected_provider = selected_provider or providers[0] client.models.register( model_id=rerank_model_id, - provider_id=rerank_provider.provider_id, + provider_id=selected_provider.provider_id, model_type="rerank", ) return client diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py index 0c536b539..27f3074ad 100644 --- a/tests/integration/inference/test_rerank.py +++ b/tests/integration/inference/test_rerank.py @@ -67,11 +67,11 @@ def _validate_rerank_response(response: RerankResponse, items: list) -> None: "mixed-content-2", ], ) -def test_rerank_text(llama_stack_client, rerank_model_id, query, items, inference_provider_type): +def test_rerank_text(client_with_models, rerank_model_id, query, items, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") - response = llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items) + response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) assert isinstance(response, RerankResponse) assert len(response.data) <= len(items) _validate_rerank_response(response, items) @@ -94,32 +94,32 @@ def test_rerank_text(llama_stack_client, rerank_model_id, query, items, inferenc "mixed-content-2", ], ) -def test_rerank_image(llama_stack_client, rerank_model_id, query, items, inference_provider_type): +def test_rerank_image(client_with_models, rerank_model_id, query, items, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") if rerank_model_id not in PROVIDERS_SUPPORTING_MEDIA: error_type = ( - ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError + ValueError if isinstance(client_with_models, LlamaStackAsLibraryClient) else LlamaStackBadRequestError ) with pytest.raises(error_type): - llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items) + client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) else: - response = llama_stack_client.inference.rerank(model=rerank_model_id, query=query, items=items) + response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) assert isinstance(response, RerankResponse) assert len(response.data) <= len(items) _validate_rerank_response(response, items) -def test_rerank_max_results(llama_stack_client, rerank_model_id, inference_provider_type): +def test_rerank_max_results(client_with_models, rerank_model_id, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2] max_num_results = 2 - response = llama_stack_client.inference.rerank( + response = client_with_models.inference.rerank( model=rerank_model_id, query=DUMMY_STRING, items=items, @@ -131,12 +131,12 @@ def test_rerank_max_results(llama_stack_client, rerank_model_id, inference_provi _validate_rerank_response(response, items) -def test_rerank_max_results_larger_than_items(llama_stack_client, rerank_model_id, inference_provider_type): +def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_id, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support rerank yet") items = [DUMMY_STRING, DUMMY_STRING2] - response = llama_stack_client.inference.rerank( + response = client_with_models.inference.rerank( model=rerank_model_id, query=DUMMY_STRING, items=items, From 78375889ec474f9f1916a0e1163b7ef9b2b5eba5 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Wed, 10 Sep 2025 11:39:39 -0700 Subject: [PATCH 07/18] Update index.md --- docs/docs/providers/inference/index.mdx | 2 +- llama_stack/apis/inference/inference.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index d9d30ab78..1cbeb12f0 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -18,6 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models rerank the documents by relevance. + - Rerank models: these models reorder the documents by relevance. This section contains documentation for all available providers for the **inference** API. diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index e452d8157..7bd9f5918 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1159,9 +1159,10 @@ class InferenceProvider(Protocol): class Inference(InferenceProvider): """Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: + This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents by relevance. """ @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) From d1b4e090effd61a29c7102ba9042b749e0727971 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Wed, 10 Sep 2025 11:50:55 -0700 Subject: [PATCH 08/18] Update docs to include rerank models --- docs/static/llama-stack-spec.html | 2 +- docs/static/llama-stack-spec.yaml | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index b260f01a7..8192a9cf6 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -17875,7 +17875,7 @@ }, { "name": "Inference", - "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", + "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents by relevance.", "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings." }, { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ebe142557..895b939ab 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -13452,13 +13452,15 @@ tags: description: '' - name: Inference description: >- - This API provides the raw interface to the underlying models. Two kinds of models - are supported: + This API provides the raw interface to the underlying models. Three kinds of + models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. + + - Rerank models: these models reorder the documents by relevance. x-displayName: >- Llama Stack Inference API for generating completions, chat completions, and embeddings. From a0e6e82c1e68b95934985e98e95f7d4ab7b7d53e Mon Sep 17 00:00:00 2001 From: Jiayi Date: Wed, 10 Sep 2025 17:18:59 -0700 Subject: [PATCH 09/18] Add rerank semantic validation tests --- tests/integration/inference/test_rerank.py | 66 ++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py index 27f3074ad..f1b9311a4 100644 --- a/tests/integration/inference/test_rerank.py +++ b/tests/integration/inference/test_rerank.py @@ -52,6 +52,28 @@ def _validate_rerank_response(response: RerankResponse, items: list) -> None: last_score = d.relevance_score +def _validate_semantic_ranking(response: RerankResponse, items: list, expected_first_item: str) -> None: + """ + Validate that the expected most relevant item ranks first. + + Args: + response: The RerankResponse to validate + items: The original items list that was ranked + expected_first_item: The expected first item in the ranking + + Raises: + AssertionError: If any validation fails + """ + if not response.data: + raise AssertionError("No ranking data returned in response") + + actual_first_index = response.data[0].index + actual_first_item = items[actual_first_index] + assert actual_first_item == expected_first_item, ( + f"Expected '{expected_first_item}' to rank first, but '{actual_first_item}' ranked first instead." + ) + + @pytest.mark.parametrize( "query,items", [ @@ -145,3 +167,47 @@ def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_i assert isinstance(response, RerankResponse) assert len(response.data) <= len(items) # Should return at most len(items) + + +@pytest.mark.parametrize( + "query,items,expected_first_item", + [ + ( + "What is a reranking model? ", + [ + "A reranking model reranks a list of items based on the query. ", + "Machine learning algorithms learn patterns from data. ", + "Python is a programming language. ", + ], + "A reranking model reranks a list of items based on the query. ", + ), + ( + "What is C++?", + [ + "Learning new things is interesting. ", + "C++ is a programming language. ", + "Books provide knowledge and entertainment. ", + ], + "C++ is a programming language. ", + ), + ( + "What are good learning habits? ", + [ + "Cooking pasta is a fun activity. ", + "Plants need water and sunlight. ", + "Good learning habits include reading daily and taking notes. ", + ], + "Good learning habits include reading daily and taking notes. ", + ), + ], +) +def test_rerank_semantic_correctness( + client_with_models, rerank_model_id, query, items, expected_first_item, inference_provider_type +): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet.") + + response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) + + _validate_rerank_response(response, items) + _validate_semantic_ranking(response, items, expected_first_item) From 35384770705f81702b1cbe3913bdece9191c53f0 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Fri, 12 Sep 2025 19:55:04 -0700 Subject: [PATCH 10/18] Update docs --- docs/docs/providers/inference/index.mdx | 2 +- docs/static/llama-stack-spec.html | 2 +- docs/static/llama-stack-spec.yaml | 3 +- llama_stack/apis/inference/inference.py | 2 +- llama_stack/apis/models/models.py | 2 +- llama_stack/core/routers/inference.py | 1 - .../remote/inference/nvidia/models.py | 131 ------------------ 7 files changed, 6 insertions(+), 137 deletions(-) delete mode 100644 llama_stack/providers/remote/inference/nvidia/models.py diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index 1cbeb12f0..98ba10cc7 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -18,6 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models reorder the documents by relevance. + - Rerank models: these models reorder the documents based on their relevance to a query. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 8192a9cf6..0fdf3f415 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -17875,7 +17875,7 @@ }, { "name": "Inference", - "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents by relevance.", + "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.", "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings." }, { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index 895b939ab..ec0409849 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -13460,7 +13460,8 @@ tags: - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models reorder the documents by relevance. + - Rerank models: these models reorder the documents based on their relevance + to a query. x-displayName: >- Llama Stack Inference API for generating completions, chat completions, and embeddings. diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 7bd9f5918..6260ba552 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1162,7 +1162,7 @@ class Inference(InferenceProvider): This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models reorder the documents by relevance. + - Rerank models: these models reorder the documents based on their relevance to a query. """ @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 359f5bf0c..1275e90e3 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -27,7 +27,7 @@ class ModelType(StrEnum): """Enumeration of supported model types in Llama Stack. :cvar llm: Large language model for text generation and completion :cvar embedding: Embedding model for converting text to vector representations - :cvar rerank: Reranking model for reordering documents by relevance + :cvar rerank: Reranking model for reordering documents based on their relevance to a query """ llm = "llm" diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index e5826685e..c1d4203c2 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -191,7 +191,6 @@ class InferenceRouter(Inference): items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], max_num_results: int | None = None, ) -> RerankResponse: - """Route rerank requests to the appropriate provider based on the model.""" logger.debug(f"InferenceRouter.rerank: {model}") model_obj = await self._get_model(model, ModelType.rerank) provider = await self.routing_table.get_provider_impl(model_obj.identifier) diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py deleted file mode 100644 index a79a1c6aa..000000000 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.apis.models import ModelType -from llama_stack.models.llama.sku_types import CoreModelId -from llama_stack.providers.utils.inference.model_registry import ( - ProviderModelEntry, - build_hf_repo_model_entry, -) - -SAFETY_MODELS_ENTRIES = [] - -# https://docs.nvidia.com/nim/large-language-models/latest/supported-llm-agnostic-architectures.html -MODEL_ENTRIES = [ - build_hf_repo_model_entry( - "meta/llama3-8b-instruct", - CoreModelId.llama3_8b_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama3-70b-instruct", - CoreModelId.llama3_70b_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama-3.1-8b-instruct", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama-3.1-70b-instruct", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama-3.1-405b-instruct", - CoreModelId.llama3_1_405b_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama-3.2-1b-instruct", - CoreModelId.llama3_2_1b_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama-3.2-3b-instruct", - CoreModelId.llama3_2_3b_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama-3.2-11b-vision-instruct", - CoreModelId.llama3_2_11b_vision_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama-3.2-90b-vision-instruct", - CoreModelId.llama3_2_90b_vision_instruct.value, - ), - build_hf_repo_model_entry( - "meta/llama-3.3-70b-instruct", - CoreModelId.llama3_3_70b_instruct.value, - ), - ProviderModelEntry( - provider_model_id="nvidia/vila", - model_type=ModelType.llm, - ), - # NeMo Retriever Text Embedding models - - # - # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html - # - # +-----------------------------------+--------+-----------+-----------+------------+ - # | Model ID | Max | Publisher | Embedding | Dynamic | - # | | Tokens | | Dimension | Embeddings | - # +-----------------------------------+--------+-----------+-----------+------------+ - # | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192 | NVIDIA | 2048 | Yes | - # | nvidia/nv-embedqa-e5-v5 | 512 | NVIDIA | 1024 | No | - # | nvidia/nv-embedqa-mistral-7b-v2 | 512 | NVIDIA | 4096 | No | - # | snowflake/arctic-embed-l | 512 | Snowflake | 1024 | No | - # +-----------------------------------+--------+-----------+-----------+------------+ - ProviderModelEntry( - provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 2048, - "context_length": 8192, - }, - ), - ProviderModelEntry( - provider_model_id="nvidia/nv-embedqa-e5-v5", - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 1024, - "context_length": 512, - }, - ), - ProviderModelEntry( - provider_model_id="nvidia/nv-embedqa-mistral-7b-v2", - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 4096, - "context_length": 512, - }, - ), - ProviderModelEntry( - provider_model_id="snowflake/arctic-embed-l", - model_type=ModelType.embedding, - metadata={ - "embedding_dimension": 1024, - "context_length": 512, - }, - ), - # NVIDIA Reranking models - ProviderModelEntry( - provider_model_id="nv-rerank-qa-mistral-4b:1", - model_type=ModelType.rerank, - metadata={ - "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", - }, - ), - ProviderModelEntry( - provider_model_id="nvidia/nv-rerankqa-mistral-4b-v3", - model_type=ModelType.rerank, - metadata={ - "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking", - }, - ), - ProviderModelEntry( - provider_model_id="nvidia/llama-3.2-nv-rerankqa-1b-v2", - model_type=ModelType.rerank, - metadata={ - "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", - }, - ), - # TODO(mf): how do we handle Nemotron models? - # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct", -] + SAFETY_MODELS_ENTRIES From 816b68fdc7cc83288a4548f3c73c6285fe5c86d9 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Sun, 28 Sep 2025 14:45:16 -0700 Subject: [PATCH 11/18] Add rerank models to the dynamic model list; Fix integration tests --- docs/docs/providers/batches/index.mdx | 12 +- docs/docs/providers/inference/index.mdx | 1 + .../remote/inference/nvidia/NVIDIA.md | 4 +- .../remote/inference/nvidia/nvidia.py | 39 +++++ .../providers/utils/inference/openai_mixin.py | 12 ++ tests/integration/inference/test_rerank.py | 33 ++--- .../providers/nvidia/test_rerank_inference.py | 35 ++++- .../utils/inference/test_openai_mixin.py | 136 ++++++++++++++++++ 8 files changed, 247 insertions(+), 25 deletions(-) diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 2c64b277f..85213ab17 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -18,14 +18,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes. +Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index 98ba10cc7..065f620df 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -5,6 +5,7 @@ description: "Llama Stack Inference API for generating completions, chat complet - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - Rerank models: these models rerank the documents by relevance." + sidebar_label: Inference title: Inference --- diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index c683c7a68..dcc9d3909 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -204,6 +204,6 @@ rerank_response = client.inference.rerank( ], ) -for i, result in enumerate(rerank_response.data): - print(f"{i+1}. [Index: {result.index}, Score: {result.relevance_score:.3f}]") +for i, result in enumerate(rerank_response): + print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]") ``` \ No newline at end of file diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index f629d8c19..ae9245bfe 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -20,6 +20,7 @@ from llama_stack.apis.inference.inference import ( OpenAIChatCompletionContentPartImageParam, OpenAIChatCompletionContentPartTextParam, ) +from llama_stack.apis.models import Model, ModelType from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -51,6 +52,18 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): "snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024}, } + rerank_model_list = [ + "nv-rerank-qa-mistral-4b:1", + "nvidia/nv-rerankqa-mistral-4b-v3", + "nvidia/llama-3.2-nv-rerankqa-1b-v2", + ] + + _rerank_model_endpoints = { + "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", + "nvidia/nv-rerankqa-mistral-4b-v3": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking", + "nvidia/llama-3.2-nv-rerankqa-1b-v2": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", + } + def __init__(self, config: NVIDIAConfig) -> None: logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") @@ -69,6 +82,8 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): # "Consider removing the api_key from the configuration." # ) + super().__init__() + self._config = config def get_api_key(self) -> str: @@ -87,6 +102,30 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): """ return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url + async def list_models(self) -> list[Model] | None: + """ + List available NVIDIA models by combining: + 1. Dynamic models from https://integrate.api.nvidia.com/v1/models + 2. Static rerank models (which use different API endpoints) + """ + models = await super().list_models() or [] + + existing_ids = {m.identifier for m in models} + for model_id, _ in self._rerank_model_endpoints.items(): + if self.allowed_models and model_id not in self.allowed_models: + continue + if model_id not in existing_ids: + model = Model( + provider_id=self.__provider_id__, # type: ignore[attr-defined] + provider_resource_id=model_id, + identifier=model_id, + model_type=ModelType.rerank, + ) + models.append(model) + self._model_cache[model_id] = model + + return models + async def rerank( self, model: str, diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py index 4354b067e..da56374c5 100644 --- a/llama_stack/providers/utils/inference/openai_mixin.py +++ b/llama_stack/providers/utils/inference/openai_mixin.py @@ -63,6 +63,10 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): # Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}} embedding_model_metadata: dict[str, dict[str, int]] = {} + # List of rerank model IDs for this provider + # Can be set by subclasses or instances to provide rerank models + rerank_model_list: list[str] = [] + # Cache of available models keyed by model ID # This is set in list_models() and used in check_model_availability() _model_cache: dict[str, Model] = {} @@ -400,6 +404,14 @@ class OpenAIMixin(ModelsProtocolPrivate, NeedsRequestProviderData, ABC): model_type=ModelType.embedding, metadata=metadata, ) + elif m.id in self.rerank_model_list: + # This is a rerank model + model = Model( + provider_id=self.__provider_id__, # type: ignore[attr-defined] + provider_resource_id=m.id, + identifier=m.id, + model_type=ModelType.rerank, + ) else: # This is an LLM model = Model( diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py index f1b9311a4..ea17a54cb 100644 --- a/tests/integration/inference/test_rerank.py +++ b/tests/integration/inference/test_rerank.py @@ -6,7 +6,7 @@ import pytest from llama_stack_client import BadRequestError as LlamaStackBadRequestError -from llama_stack_client.types import RerankResponse +from llama_stack_client.types import InferenceRerankResponse from llama_stack_client.types.shared.interleaved_content import ( ImageContentItem, ImageContentItemImage, @@ -30,12 +30,12 @@ SUPPORTED_PROVIDERS = {"remote::nvidia"} PROVIDERS_SUPPORTING_MEDIA = {} # Providers that support media input for rerank models -def _validate_rerank_response(response: RerankResponse, items: list) -> None: +def _validate_rerank_response(response: InferenceRerankResponse, items: list) -> None: """ Validate that a rerank response has the correct structure and ordering. Args: - response: The RerankResponse to validate + response: The InferenceRerankResponse to validate items: The original items list that was ranked Raises: @@ -43,7 +43,7 @@ def _validate_rerank_response(response: RerankResponse, items: list) -> None: """ seen = set() last_score = float("inf") - for d in response.data: + for d in response: assert 0 <= d.index < len(items), f"Index {d.index} out of bounds for {len(items)} items" assert d.index not in seen, f"Duplicate index {d.index} found" seen.add(d.index) @@ -52,22 +52,22 @@ def _validate_rerank_response(response: RerankResponse, items: list) -> None: last_score = d.relevance_score -def _validate_semantic_ranking(response: RerankResponse, items: list, expected_first_item: str) -> None: +def _validate_semantic_ranking(response: InferenceRerankResponse, items: list, expected_first_item: str) -> None: """ Validate that the expected most relevant item ranks first. Args: - response: The RerankResponse to validate + response: The InferenceRerankResponse to validate items: The original items list that was ranked expected_first_item: The expected first item in the ranking Raises: AssertionError: If any validation fails """ - if not response.data: + if not response: raise AssertionError("No ranking data returned in response") - actual_first_index = response.data[0].index + actual_first_index = response[0].index actual_first_item = items[actual_first_index] assert actual_first_item == expected_first_item, ( f"Expected '{expected_first_item}' to rank first, but '{actual_first_item}' ranked first instead." @@ -94,8 +94,9 @@ def test_rerank_text(client_with_models, rerank_model_id, query, items, inferenc pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) - assert isinstance(response, RerankResponse) - assert len(response.data) <= len(items) + assert isinstance(response, list) + # TODO: Add type validation for response items once InferenceRerankResponseItem is exported from llama stack client. + assert len(response) <= len(items) _validate_rerank_response(response, items) @@ -129,8 +130,8 @@ def test_rerank_image(client_with_models, rerank_model_id, query, items, inferen else: response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) - assert isinstance(response, RerankResponse) - assert len(response.data) <= len(items) + assert isinstance(response, list) + assert len(response) <= len(items) _validate_rerank_response(response, items) @@ -148,8 +149,8 @@ def test_rerank_max_results(client_with_models, rerank_model_id, inference_provi max_num_results=max_num_results, ) - assert isinstance(response, RerankResponse) - assert len(response.data) == max_num_results + assert isinstance(response, list) + assert len(response) == max_num_results _validate_rerank_response(response, items) @@ -165,8 +166,8 @@ def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_i max_num_results=10, # Larger than items length ) - assert isinstance(response, RerankResponse) - assert len(response.data) <= len(items) # Should return at most len(items) + assert isinstance(response, list) + assert len(response) <= len(items) # Should return at most len(items) @pytest.mark.parametrize( diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py index 687ffd502..f34518609 100644 --- a/tests/unit/providers/nvidia/test_rerank_inference.py +++ b/tests/unit/providers/nvidia/test_rerank_inference.py @@ -4,11 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import aiohttp import pytest +from llama_stack.apis.models import ModelType from llama_stack.providers.remote.inference.nvidia.config import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia.nvidia import NVIDIAInferenceAdapter @@ -170,3 +171,35 @@ async def test_client_error(): with patch("aiohttp.ClientSession", return_value=mock_session): with pytest.raises(ConnectionError, match="Failed to connect.*Network error"): await adapter.rerank(model="test-model", query="q", items=["a"]) + + +async def test_list_models_adds_rerank_models(): + """Test that list_models adds rerank models to the dynamic model list.""" + adapter = create_adapter() + adapter.__provider_id__ = "nvidia" + + # Mock the list_models from the superclass to return some dynamic models + base_models = [ + MagicMock(identifier="llm-1", model_type=ModelType.llm), + MagicMock(identifier="embedding-1", model_type=ModelType.embedding), + ] + + with patch.object(NVIDIAInferenceAdapter.__bases__[0], "list_models", return_value=base_models): + result = await adapter.list_models() + + assert result is not None + + # Check that the rerank models are added + model_ids = [m.identifier for m in result] + assert "nv-rerank-qa-mistral-4b:1" in model_ids + assert "nvidia/nv-rerankqa-mistral-4b-v3" in model_ids + assert "nvidia/llama-3.2-nv-rerankqa-1b-v2" in model_ids + + rerank_models = [m for m in result if m.model_type == ModelType.rerank] + + assert len(rerank_models) == 3 + + for rerank_model in rerank_models: + assert rerank_model.provider_id == "nvidia" + assert rerank_model.metadata == {} + assert rerank_model.identifier in adapter._model_cache diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py index 4856f510b..ae723dcc2 100644 --- a/tests/unit/providers/utils/inference/test_openai_mixin.py +++ b/tests/unit/providers/utils/inference/test_openai_mixin.py @@ -35,6 +35,40 @@ class OpenAIMixinWithEmbeddingsImpl(OpenAIMixinImpl): } +class OpenAIMixinWithRerankImpl(OpenAIMixin): + """Test implementation with rerank model list""" + + rerank_model_list = ["rerank-model-1", "rerank-model-2"] + + def __init__(self): + self.__provider_id__ = "test-provider" + + def get_api_key(self) -> str: + raise NotImplementedError("This method should be mocked in tests") + + def get_base_url(self) -> str: + raise NotImplementedError("This method should be mocked in tests") + + +class OpenAIMixinWithEmbeddingsAndRerankImpl(OpenAIMixin): + """Test implementation with both embedding model metadata and rerank model list""" + + embedding_model_metadata = { + "text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192}, + "text-embedding-ada-002": {"embedding_dimension": 1536, "context_length": 8192}, + } + + rerank_model_list = ["rerank-model-1", "rerank-model-2"] + + __provider_id__ = "test-provider" + + def get_api_key(self) -> str: + raise NotImplementedError("This method should be mocked in tests") + + def get_base_url(self) -> str: + raise NotImplementedError("This method should be mocked in tests") + + @pytest.fixture def mixin(): """Create a test instance of OpenAIMixin with mocked model_store""" @@ -56,6 +90,18 @@ def mixin_with_embeddings(): return OpenAIMixinWithEmbeddingsImpl() +@pytest.fixture +def mixin_with_rerank(): + """Create a test instance of OpenAIMixin with rerank model list""" + return OpenAIMixinWithRerankImpl() + + +@pytest.fixture +def mixin_with_embeddings_and_rerank(): + """Create a test instance of OpenAIMixin with both embedding model metadata and rerank model list""" + return OpenAIMixinWithEmbeddingsAndRerankImpl() + + @pytest.fixture def mock_models(): """Create multiple mock OpenAI model objects""" @@ -317,6 +363,96 @@ class TestOpenAIMixinEmbeddingModelMetadata: assert llm_model.provider_resource_id == "gpt-4" +class TestOpenAIMixinRerankModelList: + """Test cases for rerank_model_list attribute functionality""" + + async def test_rerank_model_identified(self, mixin_with_rerank, mock_client_context): + """Test that models in rerank_model_list are correctly identified as rerank models""" + # Create mock models: 1 rerank model and 1 LLM + mock_rerank_model = MagicMock(id="rerank-model-1") + mock_llm_model = MagicMock(id="gpt-4") + mock_models = [mock_rerank_model, mock_llm_model] + + mock_client = MagicMock() + + async def mock_models_list(): + for model in mock_models: + yield model + + mock_client.models.list.return_value = mock_models_list() + + with mock_client_context(mixin_with_rerank, mock_client): + result = await mixin_with_rerank.list_models() + + assert result is not None + assert len(result) == 2 + + # Find the models in the result + rerank_model = next(m for m in result if m.identifier == "rerank-model-1") + llm_model = next(m for m in result if m.identifier == "gpt-4") + + # Check rerank model + assert rerank_model.model_type == ModelType.rerank + assert rerank_model.metadata == {} # No metadata for rerank models + assert rerank_model.provider_id == "test-provider" + assert rerank_model.provider_resource_id == "rerank-model-1" + + # Check LLM model + assert llm_model.model_type == ModelType.llm + assert llm_model.metadata == {} # No metadata for LLMs + assert llm_model.provider_id == "test-provider" + assert llm_model.provider_resource_id == "gpt-4" + + +class TestOpenAIMixinMixedModelTypes: + """Test cases for mixed model types (LLM, embedding, rerank)""" + + async def test_mixed_model_types_identification(self, mixin_with_embeddings_and_rerank, mock_client_context): + """Test that LLM, embedding, and rerank models are correctly identified with proper types and metadata""" + # Create mock models: 1 embedding, 1 rerank, 1 LLM + mock_embedding_model = MagicMock(id="text-embedding-3-small") + mock_rerank_model = MagicMock(id="rerank-model-1") + mock_llm_model = MagicMock(id="gpt-4") + mock_models = [mock_embedding_model, mock_rerank_model, mock_llm_model] + + mock_client = MagicMock() + + async def mock_models_list(): + for model in mock_models: + yield model + + mock_client.models.list.return_value = mock_models_list() + + with mock_client_context(mixin_with_embeddings_and_rerank, mock_client): + result = await mixin_with_embeddings_and_rerank.list_models() + + assert result is not None + assert len(result) == 3 + + # Find the models in the result + embedding_model = next(m for m in result if m.identifier == "text-embedding-3-small") + rerank_model = next(m for m in result if m.identifier == "rerank-model-1") + llm_model = next(m for m in result if m.identifier == "gpt-4") + + # Check embedding model + assert embedding_model.model_type == ModelType.embedding + assert embedding_model.metadata == {"embedding_dimension": 1536, "context_length": 8192} + assert embedding_model.provider_id == "test-provider" + assert embedding_model.provider_resource_id == "text-embedding-3-small" + + # Check rerank model + assert rerank_model.model_type == ModelType.rerank + assert rerank_model.metadata == {} # No metadata for rerank models + assert rerank_model.provider_id == "test-provider" + assert rerank_model.provider_resource_id == "rerank-model-1" + + # Check LLM model + assert llm_model.model_type == ModelType.llm + assert llm_model.metadata == {} # No metadata for LLMs + assert llm_model.provider_id == "test-provider" + assert llm_model.provider_resource_id == "gpt-4" + + class TestOpenAIMixinAllowedModels: """Test cases for allowed_models filtering functionality""" From cf386ad8f8072dbb0609b4792a002689d769fa49 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Sun, 28 Sep 2025 15:11:23 -0700 Subject: [PATCH 12/18] Address comments --- docs/docs/providers/batches/index.mdx | 12 ++++++------ .../providers/remote/inference/nvidia/nvidia.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 85213ab17..2c64b277f 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -18,14 +18,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, -particularly useful for processing large datasets, batch evaluation workflows, and -cost-effective inference at scale. + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. -The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. -This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation -Note: This API is currently under active development and may undergo changes. + Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index ae9245bfe..1fc6a23b1 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -146,7 +146,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): # Convert query to text format if isinstance(query, str): query_text = query - elif hasattr(query, "text"): + elif isinstance(query, OpenAIChatCompletionContentPartTextParam): query_text = query.text else: raise ValueError("Query must be a string or text content part") @@ -156,7 +156,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): for item in items: if isinstance(item, str): passages.append({"text": item}) - elif hasattr(item, "text"): + elif isinstance(item, OpenAIChatCompletionContentPartTextParam): passages.append({"text": item.text}) else: raise ValueError("Items must be strings or text content parts") From f85743dcca1e8d594d7c54c05d52224128db3682 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Sun, 28 Sep 2025 15:48:29 -0700 Subject: [PATCH 13/18] Add nvidia model cache --- llama_stack/providers/remote/inference/nvidia/nvidia.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 1fc6a23b1..f6fca4014 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -108,8 +108,10 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): 1. Dynamic models from https://integrate.api.nvidia.com/v1/models 2. Static rerank models (which use different API endpoints) """ - models = await super().list_models() or [] + self._model_cache = {} + models = await super().list_models() + # Add rerank models existing_ids = {m.identifier for m in models} for model_id, _ in self._rerank_model_endpoints.items(): if self.allowed_models and model_id not in self.allowed_models: From 2fb8756fe2e1f4d157d2bcae0363bd74345ca9dc Mon Sep 17 00:00:00 2001 From: Jiayi Date: Sun, 28 Sep 2025 17:45:54 -0700 Subject: [PATCH 14/18] Fix rerank model endpoint issue --- .../remote/inference/nvidia/nvidia.py | 5 ++-- .../providers/nvidia/test_rerank_inference.py | 27 +++++++++++++++---- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index f6fca4014..15e50ff97 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -138,10 +138,9 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): provider_model_id = await self._get_provider_model_id(model) ranking_url = self.get_base_url() - model_obj = await self.model_store.get_model(model) - if _is_nvidia_hosted(self._config) and "endpoint" in model_obj.metadata: - ranking_url = model_obj.metadata["endpoint"] + if _is_nvidia_hosted(self._config) and provider_model_id in self._rerank_model_endpoints: + ranking_url = self._rerank_model_endpoints[provider_model_id] logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}") diff --git a/tests/unit/providers/nvidia/test_rerank_inference.py b/tests/unit/providers/nvidia/test_rerank_inference.py index f34518609..60891e496 100644 --- a/tests/unit/providers/nvidia/test_rerank_inference.py +++ b/tests/unit/providers/nvidia/test_rerank_inference.py @@ -54,7 +54,7 @@ class MockSession: return PostContext(self.response) -def create_adapter(config=None, model_metadata=None): +def create_adapter(config=None, rerank_endpoints=None): if config is None: config = NVIDIAConfig(api_key="test-key") @@ -62,11 +62,14 @@ def create_adapter(config=None, model_metadata=None): class MockModel: provider_resource_id = "test-model" - metadata = model_metadata or {} + metadata = {} adapter.model_store = AsyncMock() adapter.model_store.get_model = AsyncMock(return_value=MockModel()) + if rerank_endpoints is not None: + adapter._rerank_model_endpoints = rerank_endpoints + return adapter @@ -101,7 +104,7 @@ async def test_missing_rankings_key(): async def test_hosted_with_endpoint(): adapter = create_adapter( - config=NVIDIAConfig(api_key="key"), model_metadata={"endpoint": "https://model.endpoint/rerank"} + config=NVIDIAConfig(api_key="key"), rerank_endpoints={"test-model": "https://model.endpoint/rerank"} ) mock_session = MockSession(MockResponse()) @@ -115,7 +118,7 @@ async def test_hosted_with_endpoint(): async def test_hosted_without_endpoint(): adapter = create_adapter( config=NVIDIAConfig(api_key="key"), # This creates hosted config (integrate.api.nvidia.com). - model_metadata={}, # No "endpoint" key + rerank_endpoints={}, # No endpoint mapping for test-model ) mock_session = MockSession(MockResponse()) @@ -126,10 +129,24 @@ async def test_hosted_without_endpoint(): assert "https://integrate.api.nvidia.com" in url +async def test_hosted_model_not_in_endpoint_mapping(): + adapter = create_adapter( + config=NVIDIAConfig(api_key="key"), rerank_endpoints={"other-model": "https://other.endpoint/rerank"} + ) + mock_session = MockSession(MockResponse()) + + with patch("aiohttp.ClientSession", return_value=mock_session): + await adapter.rerank(model="test-model", query="q", items=["a"]) + + url, _ = mock_session.post_calls[0] + assert "https://integrate.api.nvidia.com" in url + assert url != "https://other.endpoint/rerank" + + async def test_self_hosted_ignores_endpoint(): adapter = create_adapter( config=NVIDIAConfig(url="http://localhost:8000", api_key=None), - model_metadata={"endpoint": "https://model.endpoint/rerank"}, # This should be ignored. + rerank_endpoints={"test-model": "https://model.endpoint/rerank"}, # This should be ignored for self-hosted. ) mock_session = MockSession(MockResponse()) From f2a398dcba567290f1d290d660dff4bab0c2e699 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Mon, 29 Sep 2025 11:30:46 -0700 Subject: [PATCH 15/18] Add skip_if_provider_doesnt_support_rerank --- tests/integration/inference/test_rerank.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py index ea17a54cb..4931c3d6c 100644 --- a/tests/integration/inference/test_rerank.py +++ b/tests/integration/inference/test_rerank.py @@ -26,10 +26,15 @@ DUMMY_IMAGE_URL = ImageContentItem( ) DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image") -SUPPORTED_PROVIDERS = {"remote::nvidia"} PROVIDERS_SUPPORTING_MEDIA = {} # Providers that support media input for rerank models +def skip_if_provider_doesnt_support_rerank(inference_provider_type): + supported_providers = {"remote::nvidia"} + if inference_provider_type not in supported_providers: + pytest.skip(f"{inference_provider_type} doesn't support rerank models") + + def _validate_rerank_response(response: InferenceRerankResponse, items: list) -> None: """ Validate that a rerank response has the correct structure and ordering. @@ -90,8 +95,7 @@ def _validate_semantic_ranking(response: InferenceRerankResponse, items: list, e ], ) def test_rerank_text(client_with_models, rerank_model_id, query, items, inference_provider_type): - if inference_provider_type not in SUPPORTED_PROVIDERS: - pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") + skip_if_provider_doesnt_support_rerank(inference_provider_type) response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) assert isinstance(response, list) @@ -118,8 +122,7 @@ def test_rerank_text(client_with_models, rerank_model_id, query, items, inferenc ], ) def test_rerank_image(client_with_models, rerank_model_id, query, items, inference_provider_type): - if inference_provider_type not in SUPPORTED_PROVIDERS: - pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") + skip_if_provider_doesnt_support_rerank(inference_provider_type) if rerank_model_id not in PROVIDERS_SUPPORTING_MEDIA: error_type = ( @@ -136,8 +139,7 @@ def test_rerank_image(client_with_models, rerank_model_id, query, items, inferen def test_rerank_max_results(client_with_models, rerank_model_id, inference_provider_type): - if inference_provider_type not in SUPPORTED_PROVIDERS: - pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet. ") + skip_if_provider_doesnt_support_rerank(inference_provider_type) items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2] max_num_results = 2 @@ -155,8 +157,7 @@ def test_rerank_max_results(client_with_models, rerank_model_id, inference_provi def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_id, inference_provider_type): - if inference_provider_type not in SUPPORTED_PROVIDERS: - pytest.xfail(f"{inference_provider_type} doesn't support rerank yet") + skip_if_provider_doesnt_support_rerank(inference_provider_type) items = [DUMMY_STRING, DUMMY_STRING2] response = client_with_models.inference.rerank( @@ -205,8 +206,7 @@ def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_i def test_rerank_semantic_correctness( client_with_models, rerank_model_id, query, items, expected_first_item, inference_provider_type ): - if inference_provider_type not in SUPPORTED_PROVIDERS: - pytest.xfail(f"{inference_provider_type} doesn't support rerank models yet.") + skip_if_provider_doesnt_support_rerank(inference_provider_type) response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) From 96bd6c1836f7476fef280af8bbaa045c1d31d5db Mon Sep 17 00:00:00 2001 From: Jiayi Date: Mon, 29 Sep 2025 11:43:49 -0700 Subject: [PATCH 16/18] Refactor openai mixin tests with model checking loop --- .../utils/inference/test_openai_mixin.py | 113 ++++++++++-------- 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/tests/unit/providers/utils/inference/test_openai_mixin.py b/tests/unit/providers/utils/inference/test_openai_mixin.py index ae723dcc2..937caa1c0 100644 --- a/tests/unit/providers/utils/inference/test_openai_mixin.py +++ b/tests/unit/providers/utils/inference/test_openai_mixin.py @@ -153,6 +153,19 @@ def mock_client_context(): return _mock_client_context +def _assert_models_match_expected(actual_models, expected_models): + """Verify the models match expected attributes. + + Args: + actual_models: List of models to verify + expected_models: Mapping of model identifier to expected attribute values + """ + for identifier, expected_attrs in expected_models.items(): + model = next(m for m in actual_models if m.identifier == identifier) + for attr_name, expected_value in expected_attrs.items(): + assert getattr(model, attr_name) == expected_value + + class TestOpenAIMixinListModels: """Test cases for the list_models method""" @@ -346,21 +359,22 @@ class TestOpenAIMixinEmbeddingModelMetadata: assert result is not None assert len(result) == 2 - # Find the models in the result - embedding_model = next(m for m in result if m.identifier == "text-embedding-3-small") - llm_model = next(m for m in result if m.identifier == "gpt-4") + expected_models = { + "text-embedding-3-small": { + "model_type": ModelType.embedding, + "metadata": {"embedding_dimension": 1536, "context_length": 8192}, + "provider_id": "test-provider", + "provider_resource_id": "text-embedding-3-small", + }, + "gpt-4": { + "model_type": ModelType.llm, + "metadata": {}, + "provider_id": "test-provider", + "provider_resource_id": "gpt-4", + }, + } - # Check embedding model - assert embedding_model.model_type == ModelType.embedding - assert embedding_model.metadata == {"embedding_dimension": 1536, "context_length": 8192} - assert embedding_model.provider_id == "test-provider" - assert embedding_model.provider_resource_id == "text-embedding-3-small" - - # Check LLM model - assert llm_model.model_type == ModelType.llm - assert llm_model.metadata == {} # No metadata for LLMs - assert llm_model.provider_id == "test-provider" - assert llm_model.provider_resource_id == "gpt-4" + _assert_models_match_expected(result, expected_models) class TestOpenAIMixinRerankModelList: @@ -387,21 +401,22 @@ class TestOpenAIMixinRerankModelList: assert result is not None assert len(result) == 2 - # Find the models in the result - rerank_model = next(m for m in result if m.identifier == "rerank-model-1") - llm_model = next(m for m in result if m.identifier == "gpt-4") + expected_models = { + "rerank-model-1": { + "model_type": ModelType.rerank, + "metadata": {}, + "provider_id": "test-provider", + "provider_resource_id": "rerank-model-1", + }, + "gpt-4": { + "model_type": ModelType.llm, + "metadata": {}, + "provider_id": "test-provider", + "provider_resource_id": "gpt-4", + }, + } - # Check rerank model - assert rerank_model.model_type == ModelType.rerank - assert rerank_model.metadata == {} # No metadata for rerank models - assert rerank_model.provider_id == "test-provider" - assert rerank_model.provider_resource_id == "rerank-model-1" - - # Check LLM model - assert llm_model.model_type == ModelType.llm - assert llm_model.metadata == {} # No metadata for LLMs - assert llm_model.provider_id == "test-provider" - assert llm_model.provider_resource_id == "gpt-4" + _assert_models_match_expected(result, expected_models) class TestOpenAIMixinMixedModelTypes: @@ -429,28 +444,28 @@ class TestOpenAIMixinMixedModelTypes: assert result is not None assert len(result) == 3 - # Find the models in the result - embedding_model = next(m for m in result if m.identifier == "text-embedding-3-small") - rerank_model = next(m for m in result if m.identifier == "rerank-model-1") - llm_model = next(m for m in result if m.identifier == "gpt-4") + expected_models = { + "text-embedding-3-small": { + "model_type": ModelType.embedding, + "metadata": {"embedding_dimension": 1536, "context_length": 8192}, + "provider_id": "test-provider", + "provider_resource_id": "text-embedding-3-small", + }, + "rerank-model-1": { + "model_type": ModelType.rerank, + "metadata": {}, + "provider_id": "test-provider", + "provider_resource_id": "rerank-model-1", + }, + "gpt-4": { + "model_type": ModelType.llm, + "metadata": {}, + "provider_id": "test-provider", + "provider_resource_id": "gpt-4", + }, + } - # Check embedding model - assert embedding_model.model_type == ModelType.embedding - assert embedding_model.metadata == {"embedding_dimension": 1536, "context_length": 8192} - assert embedding_model.provider_id == "test-provider" - assert embedding_model.provider_resource_id == "text-embedding-3-small" - - # Check rerank model - assert rerank_model.model_type == ModelType.rerank - assert rerank_model.metadata == {} # No metadata for rerank models - assert rerank_model.provider_id == "test-provider" - assert rerank_model.provider_resource_id == "rerank-model-1" - - # Check LLM model - assert llm_model.model_type == ModelType.llm - assert llm_model.metadata == {} # No metadata for LLMs - assert llm_model.provider_id == "test-provider" - assert llm_model.provider_resource_id == "gpt-4" + _assert_models_match_expected(result, expected_models) class TestOpenAIMixinAllowedModels: From bb2eb33fc3509028f932a8e32f9cf66e383ba53b Mon Sep 17 00:00:00 2001 From: Jiayi Date: Tue, 30 Sep 2025 12:09:09 -0700 Subject: [PATCH 17/18] Fix pre-commit after rebasing --- docs/docs/providers/agents/index.mdx | 2 +- .../static/experimental-llama-stack-spec.html | 2 +- .../static/experimental-llama-stack-spec.yaml | 3 +- docs/static/llama-stack-spec.html | 4995 +---------------- docs/static/llama-stack-spec.yaml | 3725 +----------- llama_stack/core/routers/inference.py | 1 - tests/integration/fixtures/common.py | 9 +- 7 files changed, 15 insertions(+), 8722 deletions(-) diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 06eb104af..200d0119f 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -14,4 +14,4 @@ Agents APIs for creating and interacting with agentic systems. -This section contains documentation for all available providers for the **agents** API. +This section contains documentation for all available providers for the **agents** API. \ No newline at end of file diff --git a/docs/static/experimental-llama-stack-spec.html b/docs/static/experimental-llama-stack-spec.html index a84226c05..574107a6d 100644 --- a/docs/static/experimental-llama-stack-spec.html +++ b/docs/static/experimental-llama-stack-spec.html @@ -4992,7 +4992,7 @@ "properties": { "model": { "type": "string", - "description": "The identifier of the reranking model to use." + "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint." }, "query": { "oneOf": [ diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml index a08c0cc87..aae356d6d 100644 --- a/docs/static/experimental-llama-stack-spec.yaml +++ b/docs/static/experimental-llama-stack-spec.yaml @@ -3657,7 +3657,8 @@ components: model: type: string description: >- - The identifier of the reranking model to use. + The identifier of the reranking model to use. The model must be a reranking + model registered with Llama Stack and available via the /models endpoint. query: oneOf: - type: string diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 0fdf3f415..2ee665123 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -4819,2834 +4819,6 @@ "title": "OpenAIUserMessageParam", "description": "A message from the user in an OpenAI-compatible chat completion request." }, - "OpenAICompletionWithInputMessages": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The ID of the chat completion" - }, - "choices": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OpenAIChoice" - }, - "description": "List of choices" - }, - "object": { - "type": "string", - "const": "chat.completion", - "default": "chat.completion", - "description": "The object type, which will be \"chat.completion\"" - }, - "created": { - "type": "integer", - "description": "The Unix timestamp in seconds when the chat completion was created" - }, - "model": { - "type": "string", - "description": "The model that was used to generate the chat completion" - }, - "input_messages": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OpenAIMessageParam" - } - } - }, - "additionalProperties": false, - "required": [ - "id", - "choices", - "object", - "created", - "model", - "input_messages" - ], - "title": "OpenAICompletionWithInputMessages" - }, - "DataSource": { - "oneOf": [ - { - "$ref": "#/components/schemas/URIDataSource" - }, - { - "$ref": "#/components/schemas/RowsDataSource" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "uri": "#/components/schemas/URIDataSource", - "rows": "#/components/schemas/RowsDataSource" - } - } - }, - "Dataset": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "model", - "shield", - "vector_db", - "dataset", - "scoring_function", - "benchmark", - "tool", - "tool_group", - "prompt" - ], - "const": "dataset", - "default": "dataset", - "description": "Type of resource, always 'dataset' for datasets" - }, - "purpose": { - "type": "string", - "enum": [ - "post-training/messages", - "eval/question-answer", - "eval/messages-answer" - ], - "description": "Purpose of the dataset indicating its intended use" - }, - "source": { - "$ref": "#/components/schemas/DataSource", - "description": "Data source configuration for the dataset" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata for the dataset" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_id", - "type", - "purpose", - "source", - "metadata" - ], - "title": "Dataset", - "description": "Dataset resource for storing and accessing training or evaluation data." - }, - "RowsDataSource": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "rows", - "default": "rows" - }, - "rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]" - } - }, - "additionalProperties": false, - "required": [ - "type", - "rows" - ], - "title": "RowsDataSource", - "description": "A dataset stored in rows." - }, - "URIDataSource": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "uri", - "default": "uri" - }, - "uri": { - "type": "string", - "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\"" - } - }, - "additionalProperties": false, - "required": [ - "type", - "uri" - ], - "title": "URIDataSource", - "description": "A dataset that can be obtained from a URI." - }, - "Model": { - "type": "object", - "properties": { - "identifier": { - "type": "string", - "description": "Unique identifier for this resource in llama stack" - }, - "provider_resource_id": { - "type": "string", - "description": "Unique identifier for this resource in the provider" - }, - "provider_id": { - "type": "string", - "description": "ID of the provider that owns this resource" - }, - "type": { - "type": "string", - "enum": [ - "model", - "shield", - "vector_db", - "dataset", - "scoring_function", - "benchmark", - "tool", - "tool_group", - "prompt" - ], - "const": "model", - "default": "model", - "description": "The resource type, always 'model' for model resources" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Any additional metadata for this model" - }, - "model_type": { - "$ref": "#/components/schemas/ModelType", - "default": "llm", - "description": "The type of model (LLM or embedding model)" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_id", - "type", - "metadata", - "model_type" - ], - "title": "Model", - "description": "A model resource representing an AI model registered in Llama Stack." - }, - "ModelType": { - "type": "string", - "enum": [ - "llm", - "embedding", - "rerank" - ], - "title": "ModelType", - "description": "Enumeration of supported model types in Llama Stack." - }, - "AgentTurnInputType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "agent_turn_input", - "default": "agent_turn_input", - "description": "Discriminator type. Always \"agent_turn_input\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "AgentTurnInputType", - "description": "Parameter type for agent turn input." - }, - "ArrayType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "array", - "default": "array", - "description": "Discriminator type. Always \"array\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "ArrayType", - "description": "Parameter type for array values." - }, - "BooleanType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "boolean", - "default": "boolean", - "description": "Discriminator type. Always \"boolean\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "BooleanType", - "description": "Parameter type for boolean values." - }, - "ChatCompletionInputType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "chat_completion_input", - "default": "chat_completion_input", - "description": "Discriminator type. Always \"chat_completion_input\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "ChatCompletionInputType", - "description": "Parameter type for chat completion input." - }, - "CompletionInputType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "completion_input", - "default": "completion_input", - "description": "Discriminator type. Always \"completion_input\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "CompletionInputType", - "description": "Parameter type for completion input." - }, - "JsonType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "json", - "default": "json", - "description": "Discriminator type. Always \"json\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "JsonType", - "description": "Parameter type for JSON values." - }, - "NumberType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "number", - "default": "number", - "description": "Discriminator type. Always \"number\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "NumberType", - "description": "Parameter type for numeric values." - }, - "ObjectType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "object", - "default": "object", - "description": "Discriminator type. Always \"object\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "ObjectType", - "description": "Parameter type for object values." - }, - "ParamType": { - "oneOf": [ - { - "$ref": "#/components/schemas/StringType" - }, - { - "$ref": "#/components/schemas/NumberType" - }, - { - "$ref": "#/components/schemas/BooleanType" - }, - { - "$ref": "#/components/schemas/ArrayType" - }, - { - "$ref": "#/components/schemas/ObjectType" - }, - { - "$ref": "#/components/schemas/JsonType" - }, - { - "$ref": "#/components/schemas/UnionType" - }, - { - "$ref": "#/components/schemas/ChatCompletionInputType" - }, - { - "$ref": "#/components/schemas/CompletionInputType" - }, - { - "$ref": "#/components/schemas/AgentTurnInputType" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "string": "#/components/schemas/StringType", - "number": "#/components/schemas/NumberType", - "boolean": "#/components/schemas/BooleanType", - "array": "#/components/schemas/ArrayType", - "object": "#/components/schemas/ObjectType", - "json": "#/components/schemas/JsonType", - "union": "#/components/schemas/UnionType", - "chat_completion_input": "#/components/schemas/ChatCompletionInputType", - "completion_input": "#/components/schemas/CompletionInputType", - "agent_turn_input": "#/components/schemas/AgentTurnInputType" - } - } - }, - "ScoringFn": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "model", - "shield", - "vector_db", - "dataset", - "scoring_function", - "benchmark", - "tool", - "tool_group", - "prompt" - ], - "const": "scoring_function", - "default": "scoring_function", - "description": "The resource type, always scoring_function" - }, - "description": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "return_type": { - "$ref": "#/components/schemas/ParamType" - }, - "params": { - "$ref": "#/components/schemas/ScoringFnParams" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_id", - "type", - "metadata", - "return_type" - ], - "title": "ScoringFn", - "description": "A scoring function resource for evaluating model outputs." - }, - "StringType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "string", - "default": "string", - "description": "Discriminator type. Always \"string\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "StringType", - "description": "Parameter type for string values." - }, - "UnionType": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "union", - "default": "union", - "description": "Discriminator type. Always \"union\"" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "UnionType", - "description": "Parameter type for union values." - }, - "Shield": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "model", - "shield", - "vector_db", - "dataset", - "scoring_function", - "benchmark", - "tool", - "tool_group", - "prompt" - ], - "const": "shield", - "default": "shield", - "description": "The resource type, always shield" - }, - "params": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Configuration parameters for the shield" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_id", - "type" - ], - "title": "Shield", - "description": "A safety shield resource that can be used to check content." - }, - "Span": { - "type": "object", - "properties": { - "span_id": { - "type": "string", - "description": "Unique identifier for the span" - }, - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this span belongs to" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the operation began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the operation finished, if completed" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the span" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ], - "title": "Span", - "description": "A span representing a single operation within a trace." - }, - "GetSpanTreeRequest": { - "type": "object", - "properties": { - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to return in the tree." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "title": "GetSpanTreeRequest" - }, - "SpanStatus": { - "type": "string", - "enum": [ - "ok", - "error" - ], - "title": "SpanStatus", - "description": "The status of a span indicating whether it completed successfully or with an error." - }, - "SpanWithStatus": { - "type": "object", - "properties": { - "span_id": { - "type": "string", - "description": "Unique identifier for the span" - }, - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this span belongs to" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the operation began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the operation finished, if completed" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the span" - }, - "status": { - "$ref": "#/components/schemas/SpanStatus", - "description": "(Optional) The current status of the span" - } - }, - "additionalProperties": false, - "required": [ - "span_id", - "trace_id", - "name", - "start_time" - ], - "title": "SpanWithStatus", - "description": "A span that includes status information." - }, - "QuerySpanTreeResponse": { - "type": "object", - "properties": { - "data": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/SpanWithStatus" - }, - "description": "Dictionary mapping span IDs to spans with status information" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QuerySpanTreeResponse", - "description": "Response containing a tree structure of spans." - }, - "Tool": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "model", - "shield", - "vector_db", - "dataset", - "scoring_function", - "benchmark", - "tool", - "tool_group", - "prompt" - ], - "const": "tool", - "default": "tool", - "description": "Type of resource, always 'tool'" - }, - "toolgroup_id": { - "type": "string", - "description": "ID of the tool group this tool belongs to" - }, - "description": { - "type": "string", - "description": "Human-readable description of what the tool does" - }, - "parameters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolParameter" - }, - "description": "List of parameters this tool accepts" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Additional metadata about the tool" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_id", - "type", - "toolgroup_id", - "description", - "parameters" - ], - "title": "Tool", - "description": "A tool that can be invoked by agents." - }, - "ToolGroup": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "model", - "shield", - "vector_db", - "dataset", - "scoring_function", - "benchmark", - "tool", - "tool_group", - "prompt" - ], - "const": "tool_group", - "default": "tool_group", - "description": "Type of resource, always 'tool_group'" - }, - "mcp_endpoint": { - "$ref": "#/components/schemas/URL", - "description": "(Optional) Model Context Protocol endpoint for remote tools" - }, - "args": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Additional arguments for the tool group" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_id", - "type" - ], - "title": "ToolGroup", - "description": "A group of related tools managed together." - }, - "Trace": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace" - }, - "root_span_id": { - "type": "string", - "description": "Unique identifier for the root span that started this trace" - }, - "start_time": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the trace began" - }, - "end_time": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the trace finished, if completed" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "root_span_id", - "start_time" - ], - "title": "Trace", - "description": "A trace representing the complete execution path of a request across multiple operations." - }, - "Checkpoint": { - "type": "object", - "properties": { - "identifier": { - "type": "string", - "description": "Unique identifier for the checkpoint" - }, - "created_at": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the checkpoint was created" - }, - "epoch": { - "type": "integer", - "description": "Training epoch when the checkpoint was saved" - }, - "post_training_job_id": { - "type": "string", - "description": "Identifier of the training job that created this checkpoint" - }, - "path": { - "type": "string", - "description": "File system path where the checkpoint is stored" - }, - "training_metrics": { - "$ref": "#/components/schemas/PostTrainingMetric", - "description": "(Optional) Training metrics associated with this checkpoint" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "created_at", - "epoch", - "post_training_job_id", - "path" - ], - "title": "Checkpoint", - "description": "Checkpoint created during training runs." - }, - "PostTrainingJobArtifactsResponse": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string", - "description": "Unique identifier for the training job" - }, - "checkpoints": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Checkpoint" - }, - "description": "List of model checkpoints created during training" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid", - "checkpoints" - ], - "title": "PostTrainingJobArtifactsResponse", - "description": "Artifacts of a finetuning job." - }, - "PostTrainingMetric": { - "type": "object", - "properties": { - "epoch": { - "type": "integer", - "description": "Training epoch number" - }, - "train_loss": { - "type": "number", - "description": "Loss value on the training dataset" - }, - "validation_loss": { - "type": "number", - "description": "Loss value on the validation dataset" - }, - "perplexity": { - "type": "number", - "description": "Perplexity metric indicating model confidence" - } - }, - "additionalProperties": false, - "required": [ - "epoch", - "train_loss", - "validation_loss", - "perplexity" - ], - "title": "PostTrainingMetric", - "description": "Training metrics captured during post-training jobs." - }, - "PostTrainingJobStatusResponse": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string", - "description": "Unique identifier for the training job" - }, - "status": { - "type": "string", - "enum": [ - "completed", - "in_progress", - "failed", - "scheduled", - "cancelled" - ], - "description": "Current status of the training job" - }, - "scheduled_at": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the job was scheduled" - }, - "started_at": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the job execution began" - }, - "completed_at": { - "type": "string", - "format": "date-time", - "description": "(Optional) Timestamp when the job finished, if completed" - }, - "resources_allocated": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Information about computational resources allocated to the job" - }, - "checkpoints": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Checkpoint" - }, - "description": "List of model checkpoints created during training" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid", - "status", - "checkpoints" - ], - "title": "PostTrainingJobStatusResponse", - "description": "Status of a finetuning job." - }, - "ListPostTrainingJobsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid" - ], - "title": "PostTrainingJob" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListPostTrainingJobsResponse" - }, - "VectorDB": { - "type": "object", - "properties": { - "identifier": { - "type": "string" - }, - "provider_resource_id": { - "type": "string" - }, - "provider_id": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "model", - "shield", - "vector_db", - "dataset", - "scoring_function", - "benchmark", - "tool", - "tool_group", - "prompt" - ], - "const": "vector_db", - "default": "vector_db", - "description": "Type of resource, always 'vector_db' for vector databases" - }, - "embedding_model": { - "type": "string", - "description": "Name of the embedding model to use for vector generation" - }, - "embedding_dimension": { - "type": "integer", - "description": "Dimension of the embedding vectors" - }, - "vector_db_name": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "identifier", - "provider_id", - "type", - "embedding_model", - "embedding_dimension" - ], - "title": "VectorDB", - "description": "Vector database resource for storing and querying vector embeddings." - }, - "HealthInfo": { - "type": "object", - "properties": { - "status": { - "type": "string", - "enum": [ - "OK", - "Error", - "Not Implemented" - ], - "description": "Current health status of the service" - } - }, - "additionalProperties": false, - "required": [ - "status" - ], - "title": "HealthInfo", - "description": "Health status information for the service." - }, - "RAGDocument": { - "type": "object", - "properties": { - "document_id": { - "type": "string", - "description": "The unique identifier for the document." - }, - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/InterleavedContentItem" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/InterleavedContentItem" - } - }, - { - "$ref": "#/components/schemas/URL" - } - ], - "description": "The content of the document." - }, - "mime_type": { - "type": "string", - "description": "The MIME type of the document." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata for the document." - } - }, - "additionalProperties": false, - "required": [ - "document_id", - "content", - "metadata" - ], - "title": "RAGDocument", - "description": "A document to be used for document ingestion in the RAG Tool." - }, - "InsertRequest": { - "type": "object", - "properties": { - "documents": { - "type": "array", - "items": { - "$ref": "#/components/schemas/RAGDocument" - }, - "description": "List of documents to index in the RAG system" - }, - "vector_db_id": { - "type": "string", - "description": "ID of the vector database to store the document embeddings" - }, - "chunk_size_in_tokens": { - "type": "integer", - "description": "(Optional) Size in tokens for document chunking during indexing" - } - }, - "additionalProperties": false, - "required": [ - "documents", - "vector_db_id", - "chunk_size_in_tokens" - ], - "title": "InsertRequest" - }, - "Chunk": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The content of the chunk, which can be interleaved text, images, or other types." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Metadata associated with the chunk that will be used in the model context during inference." - }, - "embedding": { - "type": "array", - "items": { - "type": "number" - }, - "description": "Optional embedding for the chunk. If not provided, it will be computed later." - }, - "stored_chunk_id": { - "type": "string", - "description": "The chunk ID that is stored in the vector database. Used for backend functionality." - }, - "chunk_metadata": { - "$ref": "#/components/schemas/ChunkMetadata", - "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." - } - }, - "additionalProperties": false, - "required": [ - "content", - "metadata" - ], - "title": "Chunk", - "description": "A chunk of content that can be inserted into a vector database." - }, - "ChunkMetadata": { - "type": "object", - "properties": { - "chunk_id": { - "type": "string", - "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content." - }, - "document_id": { - "type": "string", - "description": "The ID of the document this chunk belongs to." - }, - "source": { - "type": "string", - "description": "The source of the content, such as a URL, file path, or other identifier." - }, - "created_timestamp": { - "type": "integer", - "description": "An optional timestamp indicating when the chunk was created." - }, - "updated_timestamp": { - "type": "integer", - "description": "An optional timestamp indicating when the chunk was last updated." - }, - "chunk_window": { - "type": "string", - "description": "The window of the chunk, which can be used to group related chunks together." - }, - "chunk_tokenizer": { - "type": "string", - "description": "The tokenizer used to create the chunk. Default is Tiktoken." - }, - "chunk_embedding_model": { - "type": "string", - "description": "The embedding model used to create the chunk's embedding." - }, - "chunk_embedding_dimension": { - "type": "integer", - "description": "The dimension of the embedding vector for the chunk." - }, - "content_token_count": { - "type": "integer", - "description": "The number of tokens in the content of the chunk." - }, - "metadata_token_count": { - "type": "integer", - "description": "The number of tokens in the metadata of the chunk." - } - }, - "additionalProperties": false, - "title": "ChunkMetadata", - "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. Use `Chunk.metadata` for metadata that will be used in the context during inference." - }, - "InsertChunksRequest": { - "type": "object", - "properties": { - "vector_db_id": { - "type": "string", - "description": "The identifier of the vector database to insert the chunks into." - }, - "chunks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Chunk" - }, - "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later." - }, - "ttl_seconds": { - "type": "integer", - "description": "The time to live of the chunks." - } - }, - "additionalProperties": false, - "required": [ - "vector_db_id", - "chunks" - ], - "title": "InsertChunksRequest" - }, - "ProviderInfo": { - "type": "object", - "properties": { - "api": { - "type": "string", - "description": "The API name this provider implements" - }, - "provider_id": { - "type": "string", - "description": "Unique identifier for the provider" - }, - "provider_type": { - "type": "string", - "description": "The type of provider implementation" - }, - "config": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Configuration parameters for the provider" - }, - "health": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Current health status of the provider" - } - }, - "additionalProperties": false, - "required": [ - "api", - "provider_id", - "provider_type", - "config", - "health" - ], - "title": "ProviderInfo", - "description": "Information about a registered provider including its configuration and health status." - }, - "InvokeToolRequest": { - "type": "object", - "properties": { - "tool_name": { - "type": "string", - "description": "The name of the tool to invoke." - }, - "kwargs": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "A dictionary of arguments to pass to the tool." - } - }, - "additionalProperties": false, - "required": [ - "tool_name", - "kwargs" - ], - "title": "InvokeToolRequest" - }, - "ToolInvocationResult": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "(Optional) The output content from the tool execution" - }, - "error_message": { - "type": "string", - "description": "(Optional) Error message if the tool execution failed" - }, - "error_code": { - "type": "integer", - "description": "(Optional) Numeric error code if the tool execution failed" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Additional metadata about the tool execution" - } - }, - "additionalProperties": false, - "title": "ToolInvocationResult", - "description": "Result of a tool invocation." - }, - "PaginatedResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "The list of items for the current page" - }, - "has_more": { - "type": "boolean", - "description": "Whether there are more items available after this set" - }, - "url": { - "type": "string", - "description": "The URL for accessing this list" - } - }, - "additionalProperties": false, - "required": [ - "data", - "has_more" - ], - "title": "PaginatedResponse", - "description": "A generic paginated response that follows a simple format." - }, - "Job": { - "type": "object", - "properties": { - "job_id": { - "type": "string", - "description": "Unique identifier for the job" - }, - "status": { - "type": "string", - "enum": [ - "completed", - "in_progress", - "failed", - "scheduled", - "cancelled" - ], - "description": "Current execution status of the job" - } - }, - "additionalProperties": false, - "required": [ - "job_id", - "status" - ], - "title": "Job", - "description": "A job execution instance with status tracking." - }, - "ListBenchmarksResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Benchmark" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListBenchmarksResponse" - }, - "Order": { - "type": "string", - "enum": [ - "asc", - "desc" - ], - "title": "Order", - "description": "Sort order for paginated responses." - }, - "ListOpenAIChatCompletionResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The ID of the chat completion" - }, - "choices": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OpenAIChoice" - }, - "description": "List of choices" - }, - "object": { - "type": "string", - "const": "chat.completion", - "default": "chat.completion", - "description": "The object type, which will be \"chat.completion\"" - }, - "created": { - "type": "integer", - "description": "The Unix timestamp in seconds when the chat completion was created" - }, - "model": { - "type": "string", - "description": "The model that was used to generate the chat completion" - }, - "input_messages": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OpenAIMessageParam" - } - } - }, - "additionalProperties": false, - "required": [ - "id", - "choices", - "object", - "created", - "model", - "input_messages" - ], - "title": "OpenAICompletionWithInputMessages" - }, - "description": "List of chat completion objects with their input messages" - }, - "has_more": { - "type": "boolean", - "description": "Whether there are more completions available beyond this list" - }, - "first_id": { - "type": "string", - "description": "ID of the first completion in this list" - }, - "last_id": { - "type": "string", - "description": "ID of the last completion in this list" - }, - "object": { - "type": "string", - "const": "list", - "default": "list", - "description": "Must be \"list\" to identify this as a list response" - } - }, - "additionalProperties": false, - "required": [ - "data", - "has_more", - "first_id", - "last_id", - "object" - ], - "title": "ListOpenAIChatCompletionResponse", - "description": "Response from listing OpenAI-compatible chat completions." - }, - "ListDatasetsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Dataset" - }, - "description": "List of datasets" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListDatasetsResponse", - "description": "Response from listing datasets." - }, - "ListModelsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Model" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListModelsResponse" - }, - "ListOpenAIResponseInputItem": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OpenAIResponseInput" - }, - "description": "List of input items" - }, - "object": { - "type": "string", - "const": "list", - "default": "list", - "description": "Object type identifier, always \"list\"" - } - }, - "additionalProperties": false, - "required": [ - "data", - "object" - ], - "title": "ListOpenAIResponseInputItem", - "description": "List container for OpenAI response input items." - }, - "ListOpenAIResponseObject": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OpenAIResponseObjectWithInput" - }, - "description": "List of response objects with their input context" - }, - "has_more": { - "type": "boolean", - "description": "Whether there are more results available beyond this page" - }, - "first_id": { - "type": "string", - "description": "Identifier of the first item in this page" - }, - "last_id": { - "type": "string", - "description": "Identifier of the last item in this page" - }, - "object": { - "type": "string", - "const": "list", - "default": "list", - "description": "Object type identifier, always \"list\"" - } - }, - "additionalProperties": false, - "required": [ - "data", - "has_more", - "first_id", - "last_id", - "object" - ], - "title": "ListOpenAIResponseObject", - "description": "Paginated list of OpenAI response objects with navigation metadata." - }, - "OpenAIResponseObjectWithInput": { - "type": "object", - "properties": { - "created_at": { - "type": "integer", - "description": "Unix timestamp when the response was created" - }, - "error": { - "$ref": "#/components/schemas/OpenAIResponseError", - "description": "(Optional) Error details if the response generation failed" - }, - "id": { - "type": "string", - "description": "Unique identifier for this response" - }, - "model": { - "type": "string", - "description": "Model identifier used for generation" - }, - "object": { - "type": "string", - "const": "response", - "default": "response", - "description": "Object type identifier, always \"response\"" - }, - "output": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OpenAIResponseOutput" - }, - "description": "List of generated output items (messages, tool calls, etc.)" - }, - "parallel_tool_calls": { - "type": "boolean", - "default": false, - "description": "Whether tool calls can be executed in parallel" - }, - "previous_response_id": { - "type": "string", - "description": "(Optional) ID of the previous response in a conversation" - }, - "status": { - "type": "string", - "description": "Current status of the response generation" - }, - "temperature": { - "type": "number", - "description": "(Optional) Sampling temperature used for generation" - }, - "text": { - "$ref": "#/components/schemas/OpenAIResponseText", - "description": "Text formatting configuration for the response" - }, - "top_p": { - "type": "number", - "description": "(Optional) Nucleus sampling parameter used for generation" - }, - "truncation": { - "type": "string", - "description": "(Optional) Truncation strategy applied to the response" - }, - "user": { - "type": "string", - "description": "(Optional) User identifier associated with the request" - }, - "input": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OpenAIResponseInput" - }, - "description": "List of input items that led to this response" - } - }, - "additionalProperties": false, - "required": [ - "created_at", - "id", - "model", - "object", - "output", - "parallel_tool_calls", - "status", - "text", - "input" - ], - "title": "OpenAIResponseObjectWithInput", - "description": "OpenAI response object extended with input context information." - }, - "ListPromptsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Prompt" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListPromptsResponse", - "description": "Response model to list prompts." - }, - "ListProvidersResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ProviderInfo" - }, - "description": "List of provider information objects" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListProvidersResponse", - "description": "Response containing a list of all available providers." - }, - "RouteInfo": { - "type": "object", - "properties": { - "route": { - "type": "string", - "description": "The API endpoint path" - }, - "method": { - "type": "string", - "description": "HTTP method for the route" - }, - "provider_types": { - "type": "array", - "items": { - "type": "string" - }, - "description": "List of provider types that implement this route" - } - }, - "additionalProperties": false, - "required": [ - "route", - "method", - "provider_types" - ], - "title": "RouteInfo", - "description": "Information about an API route including its path, method, and implementing providers." - }, - "ListRoutesResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/RouteInfo" - }, - "description": "List of available route information objects" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListRoutesResponse", - "description": "Response containing a list of all available API routes." - }, - "ListToolDefsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolDef" - }, - "description": "List of tool definitions" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListToolDefsResponse", - "description": "Response containing a list of tool definitions." - }, - "ListScoringFunctionsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ScoringFn" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListScoringFunctionsResponse" - }, - "ListShieldsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Shield" - } - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListShieldsResponse" - }, - "ListToolGroupsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolGroup" - }, - "description": "List of tool groups" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListToolGroupsResponse", - "description": "Response containing a list of tool groups." - }, - "ListToolsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Tool" - }, - "description": "List of tools" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListToolsResponse", - "description": "Response containing a list of tools." - }, - "ListVectorDBsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/VectorDB" - }, - "description": "List of vector databases" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "ListVectorDBsResponse", - "description": "Response from listing vector databases." - }, - "Event": { - "oneOf": [ - { - "$ref": "#/components/schemas/UnstructuredLogEvent" - }, - { - "$ref": "#/components/schemas/MetricEvent" - }, - { - "$ref": "#/components/schemas/StructuredLogEvent" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "unstructured_log": "#/components/schemas/UnstructuredLogEvent", - "metric": "#/components/schemas/MetricEvent", - "structured_log": "#/components/schemas/StructuredLogEvent" - } - } - }, - "EventType": { - "type": "string", - "enum": [ - "unstructured_log", - "structured_log", - "metric" - ], - "title": "EventType", - "description": "The type of telemetry event being logged." - }, - "LogSeverity": { - "type": "string", - "enum": [ - "verbose", - "debug", - "info", - "warn", - "error", - "critical" - ], - "title": "LogSeverity", - "description": "The severity level of a log message." - }, - "MetricEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "metric", - "default": "metric", - "description": "Event type identifier set to METRIC" - }, - "metric": { - "type": "string", - "description": "The name of the metric being measured" - }, - "value": { - "oneOf": [ - { - "type": "integer" - }, - { - "type": "number" - } - ], - "description": "The numeric value of the metric measurement" - }, - "unit": { - "type": "string", - "description": "The unit of measurement for the metric value" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "metric", - "value", - "unit" - ], - "title": "MetricEvent", - "description": "A metric event containing a measured value." - }, - "SpanEndPayload": { - "type": "object", - "properties": { - "type": { - "$ref": "#/components/schemas/StructuredLogType", - "const": "span_end", - "default": "span_end", - "description": "Payload type identifier set to SPAN_END" - }, - "status": { - "$ref": "#/components/schemas/SpanStatus", - "description": "The final status of the span indicating success or failure" - } - }, - "additionalProperties": false, - "required": [ - "type", - "status" - ], - "title": "SpanEndPayload", - "description": "Payload for a span end event." - }, - "SpanStartPayload": { - "type": "object", - "properties": { - "type": { - "$ref": "#/components/schemas/StructuredLogType", - "const": "span_start", - "default": "span_start", - "description": "Payload type identifier set to SPAN_START" - }, - "name": { - "type": "string", - "description": "Human-readable name describing the operation this span represents" - }, - "parent_span_id": { - "type": "string", - "description": "(Optional) Unique identifier for the parent span, if this is a child span" - } - }, - "additionalProperties": false, - "required": [ - "type", - "name" - ], - "title": "SpanStartPayload", - "description": "Payload for a span start event." - }, - "StructuredLogEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "structured_log", - "default": "structured_log", - "description": "Event type identifier set to STRUCTURED_LOG" - }, - "payload": { - "$ref": "#/components/schemas/StructuredLogPayload", - "description": "The structured payload data for the log event" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "payload" - ], - "title": "StructuredLogEvent", - "description": "A structured log event containing typed payload data." - }, - "StructuredLogPayload": { - "oneOf": [ - { - "$ref": "#/components/schemas/SpanStartPayload" - }, - { - "$ref": "#/components/schemas/SpanEndPayload" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "span_start": "#/components/schemas/SpanStartPayload", - "span_end": "#/components/schemas/SpanEndPayload" - } - } - }, - "StructuredLogType": { - "type": "string", - "enum": [ - "span_start", - "span_end" - ], - "title": "StructuredLogType", - "description": "The type of structured log event payload." - }, - "UnstructuredLogEvent": { - "type": "object", - "properties": { - "trace_id": { - "type": "string", - "description": "Unique identifier for the trace this event belongs to" - }, - "span_id": { - "type": "string", - "description": "Unique identifier for the span this event belongs to" - }, - "timestamp": { - "type": "string", - "format": "date-time", - "description": "Timestamp when the event occurred" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - } - ] - }, - "description": "(Optional) Key-value pairs containing additional metadata about the event" - }, - "type": { - "$ref": "#/components/schemas/EventType", - "const": "unstructured_log", - "default": "unstructured_log", - "description": "Event type identifier set to UNSTRUCTURED_LOG" - }, - "message": { - "type": "string", - "description": "The log message text" - }, - "severity": { - "$ref": "#/components/schemas/LogSeverity", - "description": "The severity level of the log message" - } - }, - "additionalProperties": false, - "required": [ - "trace_id", - "span_id", - "timestamp", - "type", - "message", - "severity" - ], - "title": "UnstructuredLogEvent", - "description": "An unstructured log event containing a simple text message." - }, - "LogEventRequest": { - "type": "object", - "properties": { - "event": { - "$ref": "#/components/schemas/Event", - "description": "The event to log." - }, - "ttl_seconds": { - "type": "integer", - "description": "The time to live of the event." - } - }, - "additionalProperties": false, - "required": [ - "event", - "ttl_seconds" - ], - "title": "LogEventRequest" - }, - "VectorStoreChunkingStrategy": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto" - }, - { - "$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "auto": "#/components/schemas/VectorStoreChunkingStrategyAuto", - "static": "#/components/schemas/VectorStoreChunkingStrategyStatic" - } - } - }, - "VectorStoreChunkingStrategyAuto": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "auto", - "default": "auto", - "description": "Strategy type, always \"auto\" for automatic chunking" - } - }, - "additionalProperties": false, - "required": [ - "type" - ], - "title": "VectorStoreChunkingStrategyAuto", - "description": "Automatic chunking strategy for vector store files." - }, - "VectorStoreChunkingStrategyStatic": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "static", - "default": "static", - "description": "Strategy type, always \"static\" for static chunking" - }, - "static": { - "$ref": "#/components/schemas/VectorStoreChunkingStrategyStaticConfig", - "description": "Configuration parameters for the static chunking strategy" - } - }, - "additionalProperties": false, - "required": [ - "type", - "static" - ], - "title": "VectorStoreChunkingStrategyStatic", - "description": "Static chunking strategy with configurable parameters." - }, - "VectorStoreChunkingStrategyStaticConfig": { - "type": "object", - "properties": { - "chunk_overlap_tokens": { - "type": "integer", - "default": 400, - "description": "Number of tokens to overlap between adjacent chunks" - }, - "max_chunk_size_tokens": { - "type": "integer", - "default": 800, - "description": "Maximum number of tokens per chunk, must be between 100 and 4096" - } - }, - "additionalProperties": false, - "required": [ - "chunk_overlap_tokens", - "max_chunk_size_tokens" - ], - "title": "VectorStoreChunkingStrategyStaticConfig", - "description": "Configuration for static chunking strategy." - }, - "OpenaiAttachFileToVectorStoreRequest": { - "type": "object", - "properties": { - "file_id": { - "type": "string", - "description": "The ID of the file to attach to the vector store." - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The key-value attributes stored with the file, which can be used for filtering." - }, - "chunking_strategy": { - "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "The chunking strategy to use for the file." - } - }, - "additionalProperties": false, - "required": [ - "file_id" - ], - "title": "OpenaiAttachFileToVectorStoreRequest" - }, - "VectorStoreFileLastError": { - "type": "object", - "properties": { - "code": { - "oneOf": [ - { - "type": "string", - "const": "server_error" - }, - { - "type": "string", - "const": "rate_limit_exceeded" - } - ], - "description": "Error code indicating the type of failure" - }, - "message": { - "type": "string", - "description": "Human-readable error message describing the failure" - } - }, - "additionalProperties": false, - "required": [ - "code", - "message" - ], - "title": "VectorStoreFileLastError", - "description": "Error information for failed vector store file processing." - }, - "VectorStoreFileObject": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "Unique identifier for the file" - }, - "object": { - "type": "string", - "default": "vector_store.file", - "description": "Object type identifier, always \"vector_store.file\"" - }, - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Key-value attributes associated with the file" - }, - "chunking_strategy": { - "$ref": "#/components/schemas/VectorStoreChunkingStrategy", - "description": "Strategy used for splitting the file into chunks" - }, - "created_at": { - "type": "integer", - "description": "Timestamp when the file was added to the vector store" - }, - "last_error": { - "$ref": "#/components/schemas/VectorStoreFileLastError", - "description": "(Optional) Error information if file processing failed" - }, - "status": { - "$ref": "#/components/schemas/VectorStoreFileStatus", - "description": "Current processing status of the file" - }, - "usage_bytes": { - "type": "integer", - "default": 0, - "description": "Storage space used by this file in bytes" - }, - "vector_store_id": { - "type": "string", - "description": "ID of the vector store containing this file" - } - }, - "additionalProperties": false, - "required": [ - "id", - "object", - "attributes", - "chunking_strategy", - "created_at", - "status", - "usage_bytes", - "vector_store_id" - ], - "title": "VectorStoreFileObject", - "description": "OpenAI Vector Store File object." - }, - "VectorStoreFileStatus": { - "oneOf": [ - { - "type": "string", - "const": "completed" - }, - { - "type": "string", - "const": "in_progress" - }, - { - "type": "string", - "const": "cancelled" - }, - { - "type": "string", - "const": "failed" - } - ] - }, "OpenAIJSONSchema": { "type": "object", "properties": { @@ -9657,7 +6829,8 @@ "type": "string", "enum": [ "llm", - "embedding" + "embedding", + "rerank" ], "title": "ModelType", "description": "Enumeration of supported model types in Llama Stack." @@ -15610,2170 +12783,6 @@ "title": "VectorStoreSearchResponsePage", "description": "Paginated response from searching a vector store." }, -<<<<<<< HEAD -======= - "OpenaiUpdateVectorStoreRequest": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the vector store." - }, - "expires_after": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The expiration policy for a vector store." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Set of 16 key-value pairs that can be attached to an object." - } - }, - "additionalProperties": false, - "title": "OpenaiUpdateVectorStoreRequest" - }, - "OpenaiUpdateVectorStoreFileRequest": { - "type": "object", - "properties": { - "attributes": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The updated key-value attributes to store with the file." - } - }, - "additionalProperties": false, - "required": [ - "attributes" - ], - "title": "OpenaiUpdateVectorStoreFileRequest" - }, - "DPOAlignmentConfig": { - "type": "object", - "properties": { - "beta": { - "type": "number", - "description": "Temperature parameter for the DPO loss" - }, - "loss_type": { - "$ref": "#/components/schemas/DPOLossType", - "default": "sigmoid", - "description": "The type of loss function to use for DPO" - } - }, - "additionalProperties": false, - "required": [ - "beta", - "loss_type" - ], - "title": "DPOAlignmentConfig", - "description": "Configuration for Direct Preference Optimization (DPO) alignment." - }, - "DPOLossType": { - "type": "string", - "enum": [ - "sigmoid", - "hinge", - "ipo", - "kto_pair" - ], - "title": "DPOLossType" - }, - "DataConfig": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string", - "description": "Unique identifier for the training dataset" - }, - "batch_size": { - "type": "integer", - "description": "Number of samples per training batch" - }, - "shuffle": { - "type": "boolean", - "description": "Whether to shuffle the dataset during training" - }, - "data_format": { - "$ref": "#/components/schemas/DatasetFormat", - "description": "Format of the dataset (instruct or dialog)" - }, - "validation_dataset_id": { - "type": "string", - "description": "(Optional) Unique identifier for the validation dataset" - }, - "packed": { - "type": "boolean", - "default": false, - "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency" - }, - "train_on_input": { - "type": "boolean", - "default": false, - "description": "(Optional) Whether to compute loss on input tokens as well as output tokens" - } - }, - "additionalProperties": false, - "required": [ - "dataset_id", - "batch_size", - "shuffle", - "data_format" - ], - "title": "DataConfig", - "description": "Configuration for training data and data loading." - }, - "DatasetFormat": { - "type": "string", - "enum": [ - "instruct", - "dialog" - ], - "title": "DatasetFormat", - "description": "Format of the training dataset." - }, - "EfficiencyConfig": { - "type": "object", - "properties": { - "enable_activation_checkpointing": { - "type": "boolean", - "default": false, - "description": "(Optional) Whether to use activation checkpointing to reduce memory usage" - }, - "enable_activation_offloading": { - "type": "boolean", - "default": false, - "description": "(Optional) Whether to offload activations to CPU to save GPU memory" - }, - "memory_efficient_fsdp_wrap": { - "type": "boolean", - "default": false, - "description": "(Optional) Whether to use memory-efficient FSDP wrapping" - }, - "fsdp_cpu_offload": { - "type": "boolean", - "default": false, - "description": "(Optional) Whether to offload FSDP parameters to CPU" - } - }, - "additionalProperties": false, - "title": "EfficiencyConfig", - "description": "Configuration for memory and compute efficiency optimizations." - }, - "OptimizerConfig": { - "type": "object", - "properties": { - "optimizer_type": { - "$ref": "#/components/schemas/OptimizerType", - "description": "Type of optimizer to use (adam, adamw, or sgd)" - }, - "lr": { - "type": "number", - "description": "Learning rate for the optimizer" - }, - "weight_decay": { - "type": "number", - "description": "Weight decay coefficient for regularization" - }, - "num_warmup_steps": { - "type": "integer", - "description": "Number of steps for learning rate warmup" - } - }, - "additionalProperties": false, - "required": [ - "optimizer_type", - "lr", - "weight_decay", - "num_warmup_steps" - ], - "title": "OptimizerConfig", - "description": "Configuration parameters for the optimization algorithm." - }, - "OptimizerType": { - "type": "string", - "enum": [ - "adam", - "adamw", - "sgd" - ], - "title": "OptimizerType", - "description": "Available optimizer algorithms for training." - }, - "TrainingConfig": { - "type": "object", - "properties": { - "n_epochs": { - "type": "integer", - "description": "Number of training epochs to run" - }, - "max_steps_per_epoch": { - "type": "integer", - "default": 1, - "description": "Maximum number of steps to run per epoch" - }, - "gradient_accumulation_steps": { - "type": "integer", - "default": 1, - "description": "Number of steps to accumulate gradients before updating" - }, - "max_validation_steps": { - "type": "integer", - "default": 1, - "description": "(Optional) Maximum number of validation steps per epoch" - }, - "data_config": { - "$ref": "#/components/schemas/DataConfig", - "description": "(Optional) Configuration for data loading and formatting" - }, - "optimizer_config": { - "$ref": "#/components/schemas/OptimizerConfig", - "description": "(Optional) Configuration for the optimization algorithm" - }, - "efficiency_config": { - "$ref": "#/components/schemas/EfficiencyConfig", - "description": "(Optional) Configuration for memory and compute optimizations" - }, - "dtype": { - "type": "string", - "default": "bf16", - "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)" - } - }, - "additionalProperties": false, - "required": [ - "n_epochs", - "max_steps_per_epoch", - "gradient_accumulation_steps" - ], - "title": "TrainingConfig", - "description": "Comprehensive configuration for the training process." - }, - "PreferenceOptimizeRequest": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string", - "description": "The UUID of the job to create." - }, - "finetuned_model": { - "type": "string", - "description": "The model to fine-tune." - }, - "algorithm_config": { - "$ref": "#/components/schemas/DPOAlignmentConfig", - "description": "The algorithm configuration." - }, - "training_config": { - "$ref": "#/components/schemas/TrainingConfig", - "description": "The training configuration." - }, - "hyperparam_search_config": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The hyperparam search configuration." - }, - "logger_config": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The logger configuration." - } - }, - "additionalProperties": false, - "required": [ - "job_uuid", - "finetuned_model", - "algorithm_config", - "training_config", - "hyperparam_search_config", - "logger_config" - ], - "title": "PreferenceOptimizeRequest" - }, - "PostTrainingJob": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid" - ], - "title": "PostTrainingJob" - }, - "DefaultRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "default", - "default": "default", - "description": "Type of query generator, always 'default'" - }, - "separator": { - "type": "string", - "default": " ", - "description": "String separator used to join query terms" - } - }, - "additionalProperties": false, - "required": [ - "type", - "separator" - ], - "title": "DefaultRAGQueryGeneratorConfig", - "description": "Configuration for the default RAG query generator." - }, - "LLMRAGQueryGeneratorConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "llm", - "default": "llm", - "description": "Type of query generator, always 'llm'" - }, - "model": { - "type": "string", - "description": "Name of the language model to use for query generation" - }, - "template": { - "type": "string", - "description": "Template string for formatting the query generation prompt" - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "template" - ], - "title": "LLMRAGQueryGeneratorConfig", - "description": "Configuration for the LLM-based RAG query generator." - }, - "RAGQueryConfig": { - "type": "object", - "properties": { - "query_generator_config": { - "$ref": "#/components/schemas/RAGQueryGeneratorConfig", - "description": "Configuration for the query generator." - }, - "max_tokens_in_context": { - "type": "integer", - "default": 4096, - "description": "Maximum number of tokens in the context." - }, - "max_chunks": { - "type": "integer", - "default": 5, - "description": "Maximum number of chunks to retrieve." - }, - "chunk_template": { - "type": "string", - "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", - "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\"" - }, - "mode": { - "$ref": "#/components/schemas/RAGSearchMode", - "default": "vector", - "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"." - }, - "ranker": { - "$ref": "#/components/schemas/Ranker", - "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker." - } - }, - "additionalProperties": false, - "required": [ - "query_generator_config", - "max_tokens_in_context", - "max_chunks", - "chunk_template" - ], - "title": "RAGQueryConfig", - "description": "Configuration for the RAG query generation." - }, - "RAGQueryGeneratorConfig": { - "oneOf": [ - { - "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig" - }, - { - "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig", - "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig" - } - } - }, - "RAGSearchMode": { - "type": "string", - "enum": [ - "vector", - "keyword", - "hybrid" - ], - "title": "RAGSearchMode", - "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results" - }, - "RRFRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "rrf", - "default": "rrf", - "description": "The type of ranker, always \"rrf\"" - }, - "impact_factor": { - "type": "number", - "default": 60.0, - "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0" - } - }, - "additionalProperties": false, - "required": [ - "type", - "impact_factor" - ], - "title": "RRFRanker", - "description": "Reciprocal Rank Fusion (RRF) ranker configuration." - }, - "Ranker": { - "oneOf": [ - { - "$ref": "#/components/schemas/RRFRanker" - }, - { - "$ref": "#/components/schemas/WeightedRanker" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "rrf": "#/components/schemas/RRFRanker", - "weighted": "#/components/schemas/WeightedRanker" - } - } - }, - "WeightedRanker": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "weighted", - "default": "weighted", - "description": "The type of ranker, always \"weighted\"" - }, - "alpha": { - "type": "number", - "default": 0.5, - "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores." - } - }, - "additionalProperties": false, - "required": [ - "type", - "alpha" - ], - "title": "WeightedRanker", - "description": "Weighted ranker configuration that combines vector and keyword scores." - }, - "QueryRequest": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The query content to search for in the indexed documents" - }, - "vector_db_ids": { - "type": "array", - "items": { - "type": "string" - }, - "description": "List of vector database IDs to search within" - }, - "query_config": { - "$ref": "#/components/schemas/RAGQueryConfig", - "description": "(Optional) Configuration parameters for the query operation" - } - }, - "additionalProperties": false, - "required": [ - "content", - "vector_db_ids" - ], - "title": "QueryRequest" - }, - "RAGQueryResult": { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "(Optional) The retrieved content from the query" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Additional metadata about the query result" - } - }, - "additionalProperties": false, - "required": [ - "metadata" - ], - "title": "RAGQueryResult", - "description": "Result of a RAG query containing retrieved content and metadata." - }, - "QueryChunksRequest": { - "type": "object", - "properties": { - "vector_db_id": { - "type": "string", - "description": "The identifier of the vector database to query." - }, - "query": { - "$ref": "#/components/schemas/InterleavedContent", - "description": "The query to search for." - }, - "params": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The parameters of the query." - } - }, - "additionalProperties": false, - "required": [ - "vector_db_id", - "query" - ], - "title": "QueryChunksRequest" - }, - "QueryChunksResponse": { - "type": "object", - "properties": { - "chunks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Chunk" - }, - "description": "List of content chunks returned from the query" - }, - "scores": { - "type": "array", - "items": { - "type": "number" - }, - "description": "Relevance scores corresponding to each returned chunk" - } - }, - "additionalProperties": false, - "required": [ - "chunks", - "scores" - ], - "title": "QueryChunksResponse", - "description": "Response from querying chunks in a vector database." - }, - "QueryMetricsRequest": { - "type": "object", - "properties": { - "start_time": { - "type": "integer", - "description": "The start time of the metric to query." - }, - "end_time": { - "type": "integer", - "description": "The end time of the metric to query." - }, - "granularity": { - "type": "string", - "description": "The granularity of the metric to query." - }, - "query_type": { - "type": "string", - "enum": [ - "range", - "instant" - ], - "description": "The type of query to perform." - }, - "label_matchers": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the label to match" - }, - "value": { - "type": "string", - "description": "The value to match against" - }, - "operator": { - "type": "string", - "enum": [ - "=", - "!=", - "=~", - "!~" - ], - "description": "The comparison operator to use for matching", - "default": "=" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value", - "operator" - ], - "title": "MetricLabelMatcher", - "description": "A matcher for filtering metrics by label values." - }, - "description": "The label matchers to apply to the metric." - } - }, - "additionalProperties": false, - "required": [ - "start_time", - "query_type" - ], - "title": "QueryMetricsRequest" - }, - "MetricDataPoint": { - "type": "object", - "properties": { - "timestamp": { - "type": "integer", - "description": "Unix timestamp when the metric value was recorded" - }, - "value": { - "type": "number", - "description": "The numeric value of the metric at this timestamp" - }, - "unit": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "timestamp", - "value", - "unit" - ], - "title": "MetricDataPoint", - "description": "A single data point in a metric time series." - }, - "MetricLabel": { - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "The name of the label" - }, - "value": { - "type": "string", - "description": "The value of the label" - } - }, - "additionalProperties": false, - "required": [ - "name", - "value" - ], - "title": "MetricLabel", - "description": "A label associated with a metric." - }, - "MetricSeries": { - "type": "object", - "properties": { - "metric": { - "type": "string", - "description": "The name of the metric" - }, - "labels": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricLabel" - }, - "description": "List of labels associated with this metric series" - }, - "values": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricDataPoint" - }, - "description": "List of data points in chronological order" - } - }, - "additionalProperties": false, - "required": [ - "metric", - "labels", - "values" - ], - "title": "MetricSeries", - "description": "A time series of metric data points." - }, - "QueryMetricsResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MetricSeries" - }, - "description": "List of metric series matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QueryMetricsResponse", - "description": "Response containing metric time series data." - }, - "QueryCondition": { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "The attribute key to filter on" - }, - "op": { - "$ref": "#/components/schemas/QueryConditionOp", - "description": "The comparison operator to apply" - }, - "value": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ], - "description": "The value to compare against" - } - }, - "additionalProperties": false, - "required": [ - "key", - "op", - "value" - ], - "title": "QueryCondition", - "description": "A condition for filtering query results." - }, - "QueryConditionOp": { - "type": "string", - "enum": [ - "eq", - "ne", - "gt", - "lt" - ], - "title": "QueryConditionOp", - "description": "Comparison operators for query conditions." - }, - "QuerySpansRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the spans." - }, - "attributes_to_return": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to return in the spans." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "required": [ - "attribute_filters", - "attributes_to_return" - ], - "title": "QuerySpansRequest" - }, - "QuerySpansResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Span" - }, - "description": "List of spans matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QuerySpansResponse", - "description": "Response containing a list of spans." - }, - "QueryTracesRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the traces." - }, - "limit": { - "type": "integer", - "description": "The limit of traces to return." - }, - "offset": { - "type": "integer", - "description": "The offset of the traces to return." - }, - "order_by": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The order by of the traces to return." - } - }, - "additionalProperties": false, - "title": "QueryTracesRequest" - }, - "QueryTracesResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Trace" - }, - "description": "List of traces matching the query criteria" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "QueryTracesResponse", - "description": "Response containing a list of traces." - }, - "RegisterBenchmarkRequest": { - "type": "object", - "properties": { - "benchmark_id": { - "type": "string", - "description": "The ID of the benchmark to register." - }, - "dataset_id": { - "type": "string", - "description": "The ID of the dataset to use for the benchmark." - }, - "scoring_functions": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The scoring functions to use for the benchmark." - }, - "provider_benchmark_id": { - "type": "string", - "description": "The ID of the provider benchmark to use for the benchmark." - }, - "provider_id": { - "type": "string", - "description": "The ID of the provider to use for the benchmark." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The metadata to use for the benchmark." - } - }, - "additionalProperties": false, - "required": [ - "benchmark_id", - "dataset_id", - "scoring_functions" - ], - "title": "RegisterBenchmarkRequest" - }, - "RegisterDatasetRequest": { - "type": "object", - "properties": { - "purpose": { - "type": "string", - "enum": [ - "post-training/messages", - "eval/question-answer", - "eval/messages-answer" - ], - "description": "The purpose of the dataset. One of: - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }" - }, - "source": { - "$ref": "#/components/schemas/DataSource", - "description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}." - }, - "dataset_id": { - "type": "string", - "description": "The ID of the dataset. If not provided, an ID will be generated." - } - }, - "additionalProperties": false, - "required": [ - "purpose", - "source" - ], - "title": "RegisterDatasetRequest" - }, - "RegisterModelRequest": { - "type": "object", - "properties": { - "model_id": { - "type": "string", - "description": "The identifier of the model to register." - }, - "provider_model_id": { - "type": "string", - "description": "The identifier of the model in the provider." - }, - "provider_id": { - "type": "string", - "description": "The identifier of the provider." - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "Any additional metadata for this model." - }, - "model_type": { - "$ref": "#/components/schemas/ModelType", - "description": "The type of model to register." - } - }, - "additionalProperties": false, - "required": [ - "model_id" - ], - "title": "RegisterModelRequest" - }, - "RegisterScoringFunctionRequest": { - "type": "object", - "properties": { - "scoring_fn_id": { - "type": "string", - "description": "The ID of the scoring function to register." - }, - "description": { - "type": "string", - "description": "The description of the scoring function." - }, - "return_type": { - "$ref": "#/components/schemas/ParamType", - "description": "The return type of the scoring function." - }, - "provider_scoring_fn_id": { - "type": "string", - "description": "The ID of the provider scoring function to use for the scoring function." - }, - "provider_id": { - "type": "string", - "description": "The ID of the provider to use for the scoring function." - }, - "params": { - "$ref": "#/components/schemas/ScoringFnParams", - "description": "The parameters for the scoring function for benchmark eval, these can be overridden for app eval." - } - }, - "additionalProperties": false, - "required": [ - "scoring_fn_id", - "description", - "return_type" - ], - "title": "RegisterScoringFunctionRequest" - }, - "RegisterShieldRequest": { - "type": "object", - "properties": { - "shield_id": { - "type": "string", - "description": "The identifier of the shield to register." - }, - "provider_shield_id": { - "type": "string", - "description": "The identifier of the shield in the provider." - }, - "provider_id": { - "type": "string", - "description": "The identifier of the provider." - }, - "params": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The parameters of the shield." - } - }, - "additionalProperties": false, - "required": [ - "shield_id" - ], - "title": "RegisterShieldRequest" - }, - "RegisterToolGroupRequest": { - "type": "object", - "properties": { - "toolgroup_id": { - "type": "string", - "description": "The ID of the tool group to register." - }, - "provider_id": { - "type": "string", - "description": "The ID of the provider to use for the tool group." - }, - "mcp_endpoint": { - "$ref": "#/components/schemas/URL", - "description": "The MCP endpoint to use for the tool group." - }, - "args": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "A dictionary of arguments to pass to the tool group." - } - }, - "additionalProperties": false, - "required": [ - "toolgroup_id", - "provider_id" - ], - "title": "RegisterToolGroupRequest" - }, - "RegisterVectorDbRequest": { - "type": "object", - "properties": { - "vector_db_id": { - "type": "string", - "description": "The identifier of the vector database to register." - }, - "embedding_model": { - "type": "string", - "description": "The embedding model to use." - }, - "embedding_dimension": { - "type": "integer", - "description": "The dimension of the embedding model." - }, - "provider_id": { - "type": "string", - "description": "The identifier of the provider." - }, - "vector_db_name": { - "type": "string", - "description": "The name of the vector database." - }, - "provider_vector_db_id": { - "type": "string", - "description": "The identifier of the vector database in the provider." - } - }, - "additionalProperties": false, - "required": [ - "vector_db_id", - "embedding_model" - ], - "title": "RegisterVectorDbRequest" - }, - "RerankRequest": { - "type": "object", - "properties": { - "model": { - "type": "string", - "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint." - }, - "query": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam" - }, - { - "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" - } - ], - "description": "The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length." - }, - "items": { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam" - }, - { - "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" - } - ] - }, - "description": "List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length." - }, - "max_num_results": { - "type": "integer", - "description": "(Optional) Maximum number of results to return. Default: returns all." - } - }, - "additionalProperties": false, - "required": [ - "model", - "query", - "items" - ], - "title": "RerankRequest" - }, - "RerankData": { - "type": "object", - "properties": { - "index": { - "type": "integer", - "description": "The original index of the document in the input list" - }, - "relevance_score": { - "type": "number", - "description": "The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance." - } - }, - "additionalProperties": false, - "required": [ - "index", - "relevance_score" - ], - "title": "RerankData", - "description": "A single rerank result from a reranking response." - }, - "RerankResponse": { - "type": "object", - "properties": { - "data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/RerankData" - }, - "description": "List of rerank result objects, sorted by relevance score (descending)" - } - }, - "additionalProperties": false, - "required": [ - "data" - ], - "title": "RerankResponse", - "description": "Response from a reranking request." - }, - "ResumeAgentTurnRequest": { - "type": "object", - "properties": { - "tool_responses": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolResponse" - }, - "description": "The tool call responses to resume the turn with." - }, - "stream": { - "type": "boolean", - "description": "Whether to stream the response." - } - }, - "additionalProperties": false, - "required": [ - "tool_responses" - ], - "title": "ResumeAgentTurnRequest" - }, - "RunEvalRequest": { - "type": "object", - "properties": { - "benchmark_config": { - "$ref": "#/components/schemas/BenchmarkConfig", - "description": "The configuration for the benchmark." - } - }, - "additionalProperties": false, - "required": [ - "benchmark_config" - ], - "title": "RunEvalRequest" - }, - "RunModerationRequest": { - "type": "object", - "properties": { - "input": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ], - "description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models." - }, - "model": { - "type": "string", - "description": "The content moderation model you would like to use." - } - }, - "additionalProperties": false, - "required": [ - "input", - "model" - ], - "title": "RunModerationRequest" - }, - "ModerationObject": { - "type": "object", - "properties": { - "id": { - "type": "string", - "description": "The unique identifier for the moderation request." - }, - "model": { - "type": "string", - "description": "The model used to generate the moderation results." - }, - "results": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ModerationObjectResults" - }, - "description": "A list of moderation objects" - } - }, - "additionalProperties": false, - "required": [ - "id", - "model", - "results" - ], - "title": "ModerationObject", - "description": "A moderation object." - }, - "ModerationObjectResults": { - "type": "object", - "properties": { - "flagged": { - "type": "boolean", - "description": "Whether any of the below categories are flagged." - }, - "categories": { - "type": "object", - "additionalProperties": { - "type": "boolean" - }, - "description": "A list of the categories, and whether they are flagged or not." - }, - "category_applied_input_types": { - "type": "object", - "additionalProperties": { - "type": "array", - "items": { - "type": "string" - } - }, - "description": "A list of the categories along with the input type(s) that the score applies to." - }, - "category_scores": { - "type": "object", - "additionalProperties": { - "type": "number" - }, - "description": "A list of the categories along with their scores as predicted by model." - }, - "user_message": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "flagged", - "metadata" - ], - "title": "ModerationObjectResults", - "description": "A moderation object." - }, - "RunShieldRequest": { - "type": "object", - "properties": { - "shield_id": { - "type": "string", - "description": "The identifier of the shield to run." - }, - "messages": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Message" - }, - "description": "The messages to run the shield on." - }, - "params": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The parameters of the shield." - } - }, - "additionalProperties": false, - "required": [ - "shield_id", - "messages", - "params" - ], - "title": "RunShieldRequest" - }, - "RunShieldResponse": { - "type": "object", - "properties": { - "violation": { - "$ref": "#/components/schemas/SafetyViolation", - "description": "(Optional) Safety violation detected by the shield, if any" - } - }, - "additionalProperties": false, - "title": "RunShieldResponse", - "description": "Response from running a safety shield." - }, - "SaveSpansToDatasetRequest": { - "type": "object", - "properties": { - "attribute_filters": { - "type": "array", - "items": { - "$ref": "#/components/schemas/QueryCondition" - }, - "description": "The attribute filters to apply to the spans." - }, - "attributes_to_save": { - "type": "array", - "items": { - "type": "string" - }, - "description": "The attributes to save to the dataset." - }, - "dataset_id": { - "type": "string", - "description": "The ID of the dataset to save the spans to." - }, - "max_depth": { - "type": "integer", - "description": "The maximum depth of the tree." - } - }, - "additionalProperties": false, - "required": [ - "attribute_filters", - "attributes_to_save", - "dataset_id" - ], - "title": "SaveSpansToDatasetRequest" - }, - "ScoreRequest": { - "type": "object", - "properties": { - "input_rows": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "The rows to score." - }, - "scoring_functions": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "$ref": "#/components/schemas/ScoringFnParams" - }, - { - "type": "null" - } - ] - }, - "description": "The scoring functions to use for the scoring." - } - }, - "additionalProperties": false, - "required": [ - "input_rows", - "scoring_functions" - ], - "title": "ScoreRequest" - }, - "ScoreResponse": { - "type": "object", - "properties": { - "results": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringResult" - }, - "description": "A map of scoring function name to ScoringResult." - } - }, - "additionalProperties": false, - "required": [ - "results" - ], - "title": "ScoreResponse", - "description": "The response from scoring." - }, - "ScoreBatchRequest": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string", - "description": "The ID of the dataset to score." - }, - "scoring_functions": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "$ref": "#/components/schemas/ScoringFnParams" - }, - { - "type": "null" - } - ] - }, - "description": "The scoring functions to use for the scoring." - }, - "save_results_dataset": { - "type": "boolean", - "description": "Whether to save the results to a dataset." - } - }, - "additionalProperties": false, - "required": [ - "dataset_id", - "scoring_functions", - "save_results_dataset" - ], - "title": "ScoreBatchRequest" - }, - "ScoreBatchResponse": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string", - "description": "(Optional) The identifier of the dataset that was scored" - }, - "results": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ScoringResult" - }, - "description": "A map of scoring function name to ScoringResult" - } - }, - "additionalProperties": false, - "required": [ - "results" - ], - "title": "ScoreBatchResponse", - "description": "Response from batch scoring operations on datasets." - }, - "SetDefaultVersionRequest": { - "type": "object", - "properties": { - "version": { - "type": "integer", - "description": "The version to set as default." - } - }, - "additionalProperties": false, - "required": [ - "version" - ], - "title": "SetDefaultVersionRequest" - }, - "AlgorithmConfig": { - "oneOf": [ - { - "$ref": "#/components/schemas/LoraFinetuningConfig" - }, - { - "$ref": "#/components/schemas/QATFinetuningConfig" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "LoRA": "#/components/schemas/LoraFinetuningConfig", - "QAT": "#/components/schemas/QATFinetuningConfig" - } - } - }, - "LoraFinetuningConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "LoRA", - "default": "LoRA", - "description": "Algorithm type identifier, always \"LoRA\"" - }, - "lora_attn_modules": { - "type": "array", - "items": { - "type": "string" - }, - "description": "List of attention module names to apply LoRA to" - }, - "apply_lora_to_mlp": { - "type": "boolean", - "description": "Whether to apply LoRA to MLP layers" - }, - "apply_lora_to_output": { - "type": "boolean", - "description": "Whether to apply LoRA to output projection layers" - }, - "rank": { - "type": "integer", - "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)" - }, - "alpha": { - "type": "integer", - "description": "LoRA scaling parameter that controls adaptation strength" - }, - "use_dora": { - "type": "boolean", - "default": false, - "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)" - }, - "quantize_base": { - "type": "boolean", - "default": false, - "description": "(Optional) Whether to quantize the base model weights" - } - }, - "additionalProperties": false, - "required": [ - "type", - "lora_attn_modules", - "apply_lora_to_mlp", - "apply_lora_to_output", - "rank", - "alpha" - ], - "title": "LoraFinetuningConfig", - "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning." - }, - "QATFinetuningConfig": { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "QAT", - "default": "QAT", - "description": "Algorithm type identifier, always \"QAT\"" - }, - "quantizer_name": { - "type": "string", - "description": "Name of the quantization algorithm to use" - }, - "group_size": { - "type": "integer", - "description": "Size of groups for grouped quantization" - } - }, - "additionalProperties": false, - "required": [ - "type", - "quantizer_name", - "group_size" - ], - "title": "QATFinetuningConfig", - "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning." - }, - "SupervisedFineTuneRequest": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string", - "description": "The UUID of the job to create." - }, - "training_config": { - "$ref": "#/components/schemas/TrainingConfig", - "description": "The training configuration." - }, - "hyperparam_search_config": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The hyperparam search configuration." - }, - "logger_config": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "The logger configuration." - }, - "model": { - "type": "string", - "description": "The model to fine-tune." - }, - "checkpoint_dir": { - "type": "string", - "description": "The directory to save checkpoint(s) to." - }, - "algorithm_config": { - "$ref": "#/components/schemas/AlgorithmConfig", - "description": "The algorithm configuration." - } - }, - "additionalProperties": false, - "required": [ - "job_uuid", - "training_config", - "hyperparam_search_config", - "logger_config" - ], - "title": "SupervisedFineTuneRequest" - }, - "SyntheticDataGenerateRequest": { - "type": "object", - "properties": { - "dialogs": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Message" - }, - "description": "List of conversation messages to use as input for synthetic data generation" - }, - "filtering_function": { - "type": "string", - "enum": [ - "none", - "random", - "top_k", - "top_p", - "top_k_top_p", - "sigmoid" - ], - "description": "Type of filtering to apply to generated synthetic data samples" - }, - "model": { - "type": "string", - "description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint" - } - }, - "additionalProperties": false, - "required": [ - "dialogs", - "filtering_function" - ], - "title": "SyntheticDataGenerateRequest" - }, - "SyntheticDataGenerationResponse": { - "type": "object", - "properties": { - "synthetic_data": { - "type": "array", - "items": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - }, - "description": "List of generated synthetic data samples that passed the filtering criteria" - }, - "statistics": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - }, - "description": "(Optional) Statistical information about the generation process and filtering results" - } - }, - "additionalProperties": false, - "required": [ - "synthetic_data" - ], - "title": "SyntheticDataGenerationResponse", - "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." - }, - "UpdatePromptRequest": { - "type": "object", - "properties": { - "prompt": { - "type": "string", - "description": "The updated prompt text content." - }, - "version": { - "type": "integer", - "description": "The current version of the prompt being updated." - }, - "variables": { - "type": "array", - "items": { - "type": "string" - }, - "description": "Updated list of variable names that can be used in the prompt template." - }, - "set_as_default": { - "type": "boolean", - "description": "Set the new version as the default (default=True)." - } - }, - "additionalProperties": false, - "required": [ - "prompt", - "version", - "set_as_default" - ], - "title": "UpdatePromptRequest" - }, ->>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) "VersionInfo": { "type": "object", "properties": { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index ec0409849..566ac7de9 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -3634,2130 +3634,6 @@ components: title: OpenAIUserMessageParam description: >- A message from the user in an OpenAI-compatible chat completion request. -<<<<<<< HEAD -======= - OpenAICompletionWithInputMessages: - type: object - properties: - id: - type: string - description: The ID of the chat completion - choices: - type: array - items: - $ref: '#/components/schemas/OpenAIChoice' - description: List of choices - object: - type: string - const: chat.completion - default: chat.completion - description: >- - The object type, which will be "chat.completion" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: - type: string - description: >- - The model that was used to generate the chat completion - input_messages: - type: array - items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false - required: - - id - - choices - - object - - created - - model - - input_messages - title: OpenAICompletionWithInputMessages - DataSource: - oneOf: - - $ref: '#/components/schemas/URIDataSource' - - $ref: '#/components/schemas/RowsDataSource' - discriminator: - propertyName: type - mapping: - uri: '#/components/schemas/URIDataSource' - rows: '#/components/schemas/RowsDataSource' - Dataset: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_db - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: dataset - default: dataset - description: >- - Type of resource, always 'dataset' for datasets - purpose: - type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - Purpose of the dataset indicating its intended use - source: - $ref: '#/components/schemas/DataSource' - description: >- - Data source configuration for the dataset - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the dataset - additionalProperties: false - required: - - identifier - - provider_id - - type - - purpose - - source - - metadata - title: Dataset - description: >- - Dataset resource for storing and accessing training or evaluation data. - RowsDataSource: - type: object - properties: - type: - type: string - const: rows - default: rows - rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", - "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, - world!"}]} ] - additionalProperties: false - required: - - type - - rows - title: RowsDataSource - description: A dataset stored in rows. - URIDataSource: - type: object - properties: - type: - type: string - const: uri - default: uri - uri: - type: string - description: >- - The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" - - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" - additionalProperties: false - required: - - type - - uri - title: URIDataSource - description: >- - A dataset that can be obtained from a URI. - Model: - type: object - properties: - identifier: - type: string - description: >- - Unique identifier for this resource in llama stack - provider_resource_id: - type: string - description: >- - Unique identifier for this resource in the provider - provider_id: - type: string - description: >- - ID of the provider that owns this resource - type: - type: string - enum: - - model - - shield - - vector_db - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: model - default: model - description: >- - The resource type, always 'model' for model resources - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Any additional metadata for this model - model_type: - $ref: '#/components/schemas/ModelType' - default: llm - description: >- - The type of model (LLM or embedding model) - additionalProperties: false - required: - - identifier - - provider_id - - type - - metadata - - model_type - title: Model - description: >- - A model resource representing an AI model registered in Llama Stack. - ModelType: - type: string - enum: - - llm - - embedding - - rerank - title: ModelType - description: >- - Enumeration of supported model types in Llama Stack. - AgentTurnInputType: - type: object - properties: - type: - type: string - const: agent_turn_input - default: agent_turn_input - description: >- - Discriminator type. Always "agent_turn_input" - additionalProperties: false - required: - - type - title: AgentTurnInputType - description: Parameter type for agent turn input. - ArrayType: - type: object - properties: - type: - type: string - const: array - default: array - description: Discriminator type. Always "array" - additionalProperties: false - required: - - type - title: ArrayType - description: Parameter type for array values. - BooleanType: - type: object - properties: - type: - type: string - const: boolean - default: boolean - description: Discriminator type. Always "boolean" - additionalProperties: false - required: - - type - title: BooleanType - description: Parameter type for boolean values. - ChatCompletionInputType: - type: object - properties: - type: - type: string - const: chat_completion_input - default: chat_completion_input - description: >- - Discriminator type. Always "chat_completion_input" - additionalProperties: false - required: - - type - title: ChatCompletionInputType - description: >- - Parameter type for chat completion input. - CompletionInputType: - type: object - properties: - type: - type: string - const: completion_input - default: completion_input - description: >- - Discriminator type. Always "completion_input" - additionalProperties: false - required: - - type - title: CompletionInputType - description: Parameter type for completion input. - JsonType: - type: object - properties: - type: - type: string - const: json - default: json - description: Discriminator type. Always "json" - additionalProperties: false - required: - - type - title: JsonType - description: Parameter type for JSON values. - NumberType: - type: object - properties: - type: - type: string - const: number - default: number - description: Discriminator type. Always "number" - additionalProperties: false - required: - - type - title: NumberType - description: Parameter type for numeric values. - ObjectType: - type: object - properties: - type: - type: string - const: object - default: object - description: Discriminator type. Always "object" - additionalProperties: false - required: - - type - title: ObjectType - description: Parameter type for object values. - ParamType: - oneOf: - - $ref: '#/components/schemas/StringType' - - $ref: '#/components/schemas/NumberType' - - $ref: '#/components/schemas/BooleanType' - - $ref: '#/components/schemas/ArrayType' - - $ref: '#/components/schemas/ObjectType' - - $ref: '#/components/schemas/JsonType' - - $ref: '#/components/schemas/UnionType' - - $ref: '#/components/schemas/ChatCompletionInputType' - - $ref: '#/components/schemas/CompletionInputType' - - $ref: '#/components/schemas/AgentTurnInputType' - discriminator: - propertyName: type - mapping: - string: '#/components/schemas/StringType' - number: '#/components/schemas/NumberType' - boolean: '#/components/schemas/BooleanType' - array: '#/components/schemas/ArrayType' - object: '#/components/schemas/ObjectType' - json: '#/components/schemas/JsonType' - union: '#/components/schemas/UnionType' - chat_completion_input: '#/components/schemas/ChatCompletionInputType' - completion_input: '#/components/schemas/CompletionInputType' - agent_turn_input: '#/components/schemas/AgentTurnInputType' - ScoringFn: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_db - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: scoring_function - default: scoring_function - description: >- - The resource type, always scoring_function - description: - type: string - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - return_type: - $ref: '#/components/schemas/ParamType' - params: - $ref: '#/components/schemas/ScoringFnParams' - additionalProperties: false - required: - - identifier - - provider_id - - type - - metadata - - return_type - title: ScoringFn - description: >- - A scoring function resource for evaluating model outputs. - StringType: - type: object - properties: - type: - type: string - const: string - default: string - description: Discriminator type. Always "string" - additionalProperties: false - required: - - type - title: StringType - description: Parameter type for string values. - UnionType: - type: object - properties: - type: - type: string - const: union - default: union - description: Discriminator type. Always "union" - additionalProperties: false - required: - - type - title: UnionType - description: Parameter type for union values. - Shield: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_db - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: shield - default: shield - description: The resource type, always shield - params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Configuration parameters for the shield - additionalProperties: false - required: - - identifier - - provider_id - - type - title: Shield - description: >- - A safety shield resource that can be used to check content. - Span: - type: object - properties: - span_id: - type: string - description: Unique identifier for the span - trace_id: - type: string - description: >- - Unique identifier for the trace this span belongs to - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - name: - type: string - description: >- - Human-readable name describing the operation this span represents - start_time: - type: string - format: date-time - description: Timestamp when the operation began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the operation finished, if completed - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value pairs containing additional metadata about the span - additionalProperties: false - required: - - span_id - - trace_id - - name - - start_time - title: Span - description: >- - A span representing a single operation within a trace. - GetSpanTreeRequest: - type: object - properties: - attributes_to_return: - type: array - items: - type: string - description: The attributes to return in the tree. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - title: GetSpanTreeRequest - SpanStatus: - type: string - enum: - - ok - - error - title: SpanStatus - description: >- - The status of a span indicating whether it completed successfully or with - an error. - SpanWithStatus: - type: object - properties: - span_id: - type: string - description: Unique identifier for the span - trace_id: - type: string - description: >- - Unique identifier for the trace this span belongs to - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - name: - type: string - description: >- - Human-readable name describing the operation this span represents - start_time: - type: string - format: date-time - description: Timestamp when the operation began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the operation finished, if completed - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Key-value pairs containing additional metadata about the span - status: - $ref: '#/components/schemas/SpanStatus' - description: >- - (Optional) The current status of the span - additionalProperties: false - required: - - span_id - - trace_id - - name - - start_time - title: SpanWithStatus - description: A span that includes status information. - QuerySpanTreeResponse: - type: object - properties: - data: - type: object - additionalProperties: - $ref: '#/components/schemas/SpanWithStatus' - description: >- - Dictionary mapping span IDs to spans with status information - additionalProperties: false - required: - - data - title: QuerySpanTreeResponse - description: >- - Response containing a tree structure of spans. - Tool: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_db - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: tool - default: tool - description: Type of resource, always 'tool' - toolgroup_id: - type: string - description: >- - ID of the tool group this tool belongs to - description: - type: string - description: >- - Human-readable description of what the tool does - parameters: - type: array - items: - $ref: '#/components/schemas/ToolParameter' - description: List of parameters this tool accepts - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool - additionalProperties: false - required: - - identifier - - provider_id - - type - - toolgroup_id - - description - - parameters - title: Tool - description: A tool that can be invoked by agents. - ToolGroup: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_db - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: tool_group - default: tool_group - description: Type of resource, always 'tool_group' - mcp_endpoint: - $ref: '#/components/schemas/URL' - description: >- - (Optional) Model Context Protocol endpoint for remote tools - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional arguments for the tool group - additionalProperties: false - required: - - identifier - - provider_id - - type - title: ToolGroup - description: >- - A group of related tools managed together. - Trace: - type: object - properties: - trace_id: - type: string - description: Unique identifier for the trace - root_span_id: - type: string - description: >- - Unique identifier for the root span that started this trace - start_time: - type: string - format: date-time - description: Timestamp when the trace began - end_time: - type: string - format: date-time - description: >- - (Optional) Timestamp when the trace finished, if completed - additionalProperties: false - required: - - trace_id - - root_span_id - - start_time - title: Trace - description: >- - A trace representing the complete execution path of a request across multiple - operations. - Checkpoint: - type: object - properties: - identifier: - type: string - description: Unique identifier for the checkpoint - created_at: - type: string - format: date-time - description: >- - Timestamp when the checkpoint was created - epoch: - type: integer - description: >- - Training epoch when the checkpoint was saved - post_training_job_id: - type: string - description: >- - Identifier of the training job that created this checkpoint - path: - type: string - description: >- - File system path where the checkpoint is stored - training_metrics: - $ref: '#/components/schemas/PostTrainingMetric' - description: >- - (Optional) Training metrics associated with this checkpoint - additionalProperties: false - required: - - identifier - - created_at - - epoch - - post_training_job_id - - path - title: Checkpoint - description: Checkpoint created during training runs. - PostTrainingJobArtifactsResponse: - type: object - properties: - job_uuid: - type: string - description: Unique identifier for the training job - checkpoints: - type: array - items: - $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false - required: - - job_uuid - - checkpoints - title: PostTrainingJobArtifactsResponse - description: Artifacts of a finetuning job. - PostTrainingMetric: - type: object - properties: - epoch: - type: integer - description: Training epoch number - train_loss: - type: number - description: Loss value on the training dataset - validation_loss: - type: number - description: Loss value on the validation dataset - perplexity: - type: number - description: >- - Perplexity metric indicating model confidence - additionalProperties: false - required: - - epoch - - train_loss - - validation_loss - - perplexity - title: PostTrainingMetric - description: >- - Training metrics captured during post-training jobs. - PostTrainingJobStatusResponse: - type: object - properties: - job_uuid: - type: string - description: Unique identifier for the training job - status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current status of the training job - scheduled_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job was scheduled - started_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job execution began - completed_at: - type: string - format: date-time - description: >- - (Optional) Timestamp when the job finished, if completed - resources_allocated: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Information about computational resources allocated to the - job - checkpoints: - type: array - items: - $ref: '#/components/schemas/Checkpoint' - description: >- - List of model checkpoints created during training - additionalProperties: false - required: - - job_uuid - - status - - checkpoints - title: PostTrainingJobStatusResponse - description: Status of a finetuning job. - ListPostTrainingJobsResponse: - type: object - properties: - data: - type: array - items: - type: object - properties: - job_uuid: - type: string - additionalProperties: false - required: - - job_uuid - title: PostTrainingJob - additionalProperties: false - required: - - data - title: ListPostTrainingJobsResponse - VectorDB: - type: object - properties: - identifier: - type: string - provider_resource_id: - type: string - provider_id: - type: string - type: - type: string - enum: - - model - - shield - - vector_db - - dataset - - scoring_function - - benchmark - - tool - - tool_group - - prompt - const: vector_db - default: vector_db - description: >- - Type of resource, always 'vector_db' for vector databases - embedding_model: - type: string - description: >- - Name of the embedding model to use for vector generation - embedding_dimension: - type: integer - description: Dimension of the embedding vectors - vector_db_name: - type: string - additionalProperties: false - required: - - identifier - - provider_id - - type - - embedding_model - - embedding_dimension - title: VectorDB - description: >- - Vector database resource for storing and querying vector embeddings. - HealthInfo: - type: object - properties: - status: - type: string - enum: - - OK - - Error - - Not Implemented - description: Current health status of the service - additionalProperties: false - required: - - status - title: HealthInfo - description: >- - Health status information for the service. - RAGDocument: - type: object - properties: - document_id: - type: string - description: The unique identifier for the document. - content: - oneOf: - - type: string - - $ref: '#/components/schemas/InterleavedContentItem' - - type: array - items: - $ref: '#/components/schemas/InterleavedContentItem' - - $ref: '#/components/schemas/URL' - description: The content of the document. - mime_type: - type: string - description: The MIME type of the document. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Additional metadata for the document. - additionalProperties: false - required: - - document_id - - content - - metadata - title: RAGDocument - description: >- - A document to be used for document ingestion in the RAG Tool. - InsertRequest: - type: object - properties: - documents: - type: array - items: - $ref: '#/components/schemas/RAGDocument' - description: >- - List of documents to index in the RAG system - vector_db_id: - type: string - description: >- - ID of the vector database to store the document embeddings - chunk_size_in_tokens: - type: integer - description: >- - (Optional) Size in tokens for document chunking during indexing - additionalProperties: false - required: - - documents - - vector_db_id - - chunk_size_in_tokens - title: InsertRequest - Chunk: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The content of the chunk, which can be interleaved text, images, or other - types. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Metadata associated with the chunk that will be used in the model context - during inference. - embedding: - type: array - items: - type: number - description: >- - Optional embedding for the chunk. If not provided, it will be computed - later. - stored_chunk_id: - type: string - description: >- - The chunk ID that is stored in the vector database. Used for backend functionality. - chunk_metadata: - $ref: '#/components/schemas/ChunkMetadata' - description: >- - Metadata for the chunk that will NOT be used in the context during inference. - The `chunk_metadata` is required backend functionality. - additionalProperties: false - required: - - content - - metadata - title: Chunk - description: >- - A chunk of content that can be inserted into a vector database. - ChunkMetadata: - type: object - properties: - chunk_id: - type: string - description: >- - The ID of the chunk. If not set, it will be generated based on the document - ID and content. - document_id: - type: string - description: >- - The ID of the document this chunk belongs to. - source: - type: string - description: >- - The source of the content, such as a URL, file path, or other identifier. - created_timestamp: - type: integer - description: >- - An optional timestamp indicating when the chunk was created. - updated_timestamp: - type: integer - description: >- - An optional timestamp indicating when the chunk was last updated. - chunk_window: - type: string - description: >- - The window of the chunk, which can be used to group related chunks together. - chunk_tokenizer: - type: string - description: >- - The tokenizer used to create the chunk. Default is Tiktoken. - chunk_embedding_model: - type: string - description: >- - The embedding model used to create the chunk's embedding. - chunk_embedding_dimension: - type: integer - description: >- - The dimension of the embedding vector for the chunk. - content_token_count: - type: integer - description: >- - The number of tokens in the content of the chunk. - metadata_token_count: - type: integer - description: >- - The number of tokens in the metadata of the chunk. - additionalProperties: false - title: ChunkMetadata - description: >- - `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional - information about the chunk that will not be used in the context during - inference, but is required for backend functionality. The `ChunkMetadata` is - set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not - expected to change after. Use `Chunk.metadata` for metadata that will - be used in the context during inference. - InsertChunksRequest: - type: object - properties: - vector_db_id: - type: string - description: >- - The identifier of the vector database to insert the chunks into. - chunks: - type: array - items: - $ref: '#/components/schemas/Chunk' - description: >- - The chunks to insert. Each `Chunk` should contain content which can be - interleaved text, images, or other types. `metadata`: `dict[str, Any]` - and `embedding`: `List[float]` are optional. If `metadata` is provided, - you configure how Llama Stack formats the chunk during generation. If - `embedding` is not provided, it will be computed later. - ttl_seconds: - type: integer - description: The time to live of the chunks. - additionalProperties: false - required: - - vector_db_id - - chunks - title: InsertChunksRequest - ProviderInfo: - type: object - properties: - api: - type: string - description: The API name this provider implements - provider_id: - type: string - description: Unique identifier for the provider - provider_type: - type: string - description: The type of provider implementation - config: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Configuration parameters for the provider - health: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Current health status of the provider - additionalProperties: false - required: - - api - - provider_id - - provider_type - - config - - health - title: ProviderInfo - description: >- - Information about a registered provider including its configuration and health - status. - InvokeToolRequest: - type: object - properties: - tool_name: - type: string - description: The name of the tool to invoke. - kwargs: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - A dictionary of arguments to pass to the tool. - additionalProperties: false - required: - - tool_name - - kwargs - title: InvokeToolRequest - ToolInvocationResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The output content from the tool execution - error_message: - type: string - description: >- - (Optional) Error message if the tool execution failed - error_code: - type: integer - description: >- - (Optional) Numeric error code if the tool execution failed - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Additional metadata about the tool execution - additionalProperties: false - title: ToolInvocationResult - description: Result of a tool invocation. - PaginatedResponse: - type: object - properties: - data: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The list of items for the current page - has_more: - type: boolean - description: >- - Whether there are more items available after this set - url: - type: string - description: The URL for accessing this list - additionalProperties: false - required: - - data - - has_more - title: PaginatedResponse - description: >- - A generic paginated response that follows a simple format. - Job: - type: object - properties: - job_id: - type: string - description: Unique identifier for the job - status: - type: string - enum: - - completed - - in_progress - - failed - - scheduled - - cancelled - description: Current execution status of the job - additionalProperties: false - required: - - job_id - - status - title: Job - description: >- - A job execution instance with status tracking. - ListBenchmarksResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Benchmark' - additionalProperties: false - required: - - data - title: ListBenchmarksResponse - Order: - type: string - enum: - - asc - - desc - title: Order - description: Sort order for paginated responses. - ListOpenAIChatCompletionResponse: - type: object - properties: - data: - type: array - items: - type: object - properties: - id: - type: string - description: The ID of the chat completion - choices: - type: array - items: - $ref: '#/components/schemas/OpenAIChoice' - description: List of choices - object: - type: string - const: chat.completion - default: chat.completion - description: >- - The object type, which will be "chat.completion" - created: - type: integer - description: >- - The Unix timestamp in seconds when the chat completion was created - model: - type: string - description: >- - The model that was used to generate the chat completion - input_messages: - type: array - items: - $ref: '#/components/schemas/OpenAIMessageParam' - additionalProperties: false - required: - - id - - choices - - object - - created - - model - - input_messages - title: OpenAICompletionWithInputMessages - description: >- - List of chat completion objects with their input messages - has_more: - type: boolean - description: >- - Whether there are more completions available beyond this list - first_id: - type: string - description: ID of the first completion in this list - last_id: - type: string - description: ID of the last completion in this list - object: - type: string - const: list - default: list - description: >- - Must be "list" to identify this as a list response - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIChatCompletionResponse - description: >- - Response from listing OpenAI-compatible chat completions. - ListDatasetsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Dataset' - description: List of datasets - additionalProperties: false - required: - - data - title: ListDatasetsResponse - description: Response from listing datasets. - ListModelsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Model' - additionalProperties: false - required: - - data - title: ListModelsResponse - ListOpenAIResponseInputItem: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: List of input items - object: - type: string - const: list - default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - object - title: ListOpenAIResponseInputItem - description: >- - List container for OpenAI response input items. - ListOpenAIResponseObject: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseObjectWithInput' - description: >- - List of response objects with their input context - has_more: - type: boolean - description: >- - Whether there are more results available beyond this page - first_id: - type: string - description: >- - Identifier of the first item in this page - last_id: - type: string - description: Identifier of the last item in this page - object: - type: string - const: list - default: list - description: Object type identifier, always "list" - additionalProperties: false - required: - - data - - has_more - - first_id - - last_id - - object - title: ListOpenAIResponseObject - description: >- - Paginated list of OpenAI response objects with navigation metadata. - OpenAIResponseObjectWithInput: - type: object - properties: - created_at: - type: integer - description: >- - Unix timestamp when the response was created - error: - $ref: '#/components/schemas/OpenAIResponseError' - description: >- - (Optional) Error details if the response generation failed - id: - type: string - description: Unique identifier for this response - model: - type: string - description: Model identifier used for generation - object: - type: string - const: response - default: response - description: >- - Object type identifier, always "response" - output: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseOutput' - description: >- - List of generated output items (messages, tool calls, etc.) - parallel_tool_calls: - type: boolean - default: false - description: >- - Whether tool calls can be executed in parallel - previous_response_id: - type: string - description: >- - (Optional) ID of the previous response in a conversation - status: - type: string - description: >- - Current status of the response generation - temperature: - type: number - description: >- - (Optional) Sampling temperature used for generation - text: - $ref: '#/components/schemas/OpenAIResponseText' - description: >- - Text formatting configuration for the response - top_p: - type: number - description: >- - (Optional) Nucleus sampling parameter used for generation - truncation: - type: string - description: >- - (Optional) Truncation strategy applied to the response - user: - type: string - description: >- - (Optional) User identifier associated with the request - input: - type: array - items: - $ref: '#/components/schemas/OpenAIResponseInput' - description: >- - List of input items that led to this response - additionalProperties: false - required: - - created_at - - id - - model - - object - - output - - parallel_tool_calls - - status - - text - - input - title: OpenAIResponseObjectWithInput - description: >- - OpenAI response object extended with input context information. - ListPromptsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Prompt' - additionalProperties: false - required: - - data - title: ListPromptsResponse - description: Response model to list prompts. - ListProvidersResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/ProviderInfo' - description: List of provider information objects - additionalProperties: false - required: - - data - title: ListProvidersResponse - description: >- - Response containing a list of all available providers. - RouteInfo: - type: object - properties: - route: - type: string - description: The API endpoint path - method: - type: string - description: HTTP method for the route - provider_types: - type: array - items: - type: string - description: >- - List of provider types that implement this route - additionalProperties: false - required: - - route - - method - - provider_types - title: RouteInfo - description: >- - Information about an API route including its path, method, and implementing - providers. - ListRoutesResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/RouteInfo' - description: >- - List of available route information objects - additionalProperties: false - required: - - data - title: ListRoutesResponse - description: >- - Response containing a list of all available API routes. - ListToolDefsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/ToolDef' - description: List of tool definitions - additionalProperties: false - required: - - data - title: ListToolDefsResponse - description: >- - Response containing a list of tool definitions. - ListScoringFunctionsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/ScoringFn' - additionalProperties: false - required: - - data - title: ListScoringFunctionsResponse - ListShieldsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Shield' - additionalProperties: false - required: - - data - title: ListShieldsResponse - ListToolGroupsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/ToolGroup' - description: List of tool groups - additionalProperties: false - required: - - data - title: ListToolGroupsResponse - description: >- - Response containing a list of tool groups. - ListToolsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Tool' - description: List of tools - additionalProperties: false - required: - - data - title: ListToolsResponse - description: Response containing a list of tools. - ListVectorDBsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/VectorDB' - description: List of vector databases - additionalProperties: false - required: - - data - title: ListVectorDBsResponse - description: Response from listing vector databases. - Event: - oneOf: - - $ref: '#/components/schemas/UnstructuredLogEvent' - - $ref: '#/components/schemas/MetricEvent' - - $ref: '#/components/schemas/StructuredLogEvent' - discriminator: - propertyName: type - mapping: - unstructured_log: '#/components/schemas/UnstructuredLogEvent' - metric: '#/components/schemas/MetricEvent' - structured_log: '#/components/schemas/StructuredLogEvent' - EventType: - type: string - enum: - - unstructured_log - - structured_log - - metric - title: EventType - description: >- - The type of telemetry event being logged. - LogSeverity: - type: string - enum: - - verbose - - debug - - info - - warn - - error - - critical - title: LogSeverity - description: The severity level of a log message. - MetricEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: metric - default: metric - description: Event type identifier set to METRIC - metric: - type: string - description: The name of the metric being measured - value: - oneOf: - - type: integer - - type: number - description: >- - The numeric value of the metric measurement - unit: - type: string - description: >- - The unit of measurement for the metric value - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - metric - - value - - unit - title: MetricEvent - description: >- - A metric event containing a measured value. - SpanEndPayload: - type: object - properties: - type: - $ref: '#/components/schemas/StructuredLogType' - const: span_end - default: span_end - description: Payload type identifier set to SPAN_END - status: - $ref: '#/components/schemas/SpanStatus' - description: >- - The final status of the span indicating success or failure - additionalProperties: false - required: - - type - - status - title: SpanEndPayload - description: Payload for a span end event. - SpanStartPayload: - type: object - properties: - type: - $ref: '#/components/schemas/StructuredLogType' - const: span_start - default: span_start - description: >- - Payload type identifier set to SPAN_START - name: - type: string - description: >- - Human-readable name describing the operation this span represents - parent_span_id: - type: string - description: >- - (Optional) Unique identifier for the parent span, if this is a child span - additionalProperties: false - required: - - type - - name - title: SpanStartPayload - description: Payload for a span start event. - StructuredLogEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: structured_log - default: structured_log - description: >- - Event type identifier set to STRUCTURED_LOG - payload: - $ref: '#/components/schemas/StructuredLogPayload' - description: >- - The structured payload data for the log event - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - payload - title: StructuredLogEvent - description: >- - A structured log event containing typed payload data. - StructuredLogPayload: - oneOf: - - $ref: '#/components/schemas/SpanStartPayload' - - $ref: '#/components/schemas/SpanEndPayload' - discriminator: - propertyName: type - mapping: - span_start: '#/components/schemas/SpanStartPayload' - span_end: '#/components/schemas/SpanEndPayload' - StructuredLogType: - type: string - enum: - - span_start - - span_end - title: StructuredLogType - description: >- - The type of structured log event payload. - UnstructuredLogEvent: - type: object - properties: - trace_id: - type: string - description: >- - Unique identifier for the trace this event belongs to - span_id: - type: string - description: >- - Unique identifier for the span this event belongs to - timestamp: - type: string - format: date-time - description: Timestamp when the event occurred - attributes: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - description: >- - (Optional) Key-value pairs containing additional metadata about the event - type: - $ref: '#/components/schemas/EventType' - const: unstructured_log - default: unstructured_log - description: >- - Event type identifier set to UNSTRUCTURED_LOG - message: - type: string - description: The log message text - severity: - $ref: '#/components/schemas/LogSeverity' - description: The severity level of the log message - additionalProperties: false - required: - - trace_id - - span_id - - timestamp - - type - - message - - severity - title: UnstructuredLogEvent - description: >- - An unstructured log event containing a simple text message. - LogEventRequest: - type: object - properties: - event: - $ref: '#/components/schemas/Event' - description: The event to log. - ttl_seconds: - type: integer - description: The time to live of the event. - additionalProperties: false - required: - - event - - ttl_seconds - title: LogEventRequest - VectorStoreChunkingStrategy: - oneOf: - - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' - discriminator: - propertyName: type - mapping: - auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' - static: '#/components/schemas/VectorStoreChunkingStrategyStatic' - VectorStoreChunkingStrategyAuto: - type: object - properties: - type: - type: string - const: auto - default: auto - description: >- - Strategy type, always "auto" for automatic chunking - additionalProperties: false - required: - - type - title: VectorStoreChunkingStrategyAuto - description: >- - Automatic chunking strategy for vector store files. - VectorStoreChunkingStrategyStatic: - type: object - properties: - type: - type: string - const: static - default: static - description: >- - Strategy type, always "static" for static chunking - static: - $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' - description: >- - Configuration parameters for the static chunking strategy - additionalProperties: false - required: - - type - - static - title: VectorStoreChunkingStrategyStatic - description: >- - Static chunking strategy with configurable parameters. - VectorStoreChunkingStrategyStaticConfig: - type: object - properties: - chunk_overlap_tokens: - type: integer - default: 400 - description: >- - Number of tokens to overlap between adjacent chunks - max_chunk_size_tokens: - type: integer - default: 800 - description: >- - Maximum number of tokens per chunk, must be between 100 and 4096 - additionalProperties: false - required: - - chunk_overlap_tokens - - max_chunk_size_tokens - title: VectorStoreChunkingStrategyStaticConfig - description: >- - Configuration for static chunking strategy. - OpenaiAttachFileToVectorStoreRequest: - type: object - properties: - file_id: - type: string - description: >- - The ID of the file to attach to the vector store. - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The key-value attributes stored with the file, which can be used for filtering. - chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - The chunking strategy to use for the file. - additionalProperties: false - required: - - file_id - title: OpenaiAttachFileToVectorStoreRequest - VectorStoreFileLastError: - type: object - properties: - code: - oneOf: - - type: string - const: server_error - - type: string - const: rate_limit_exceeded - description: >- - Error code indicating the type of failure - message: - type: string - description: >- - Human-readable error message describing the failure - additionalProperties: false - required: - - code - - message - title: VectorStoreFileLastError - description: >- - Error information for failed vector store file processing. - VectorStoreFileObject: - type: object - properties: - id: - type: string - description: Unique identifier for the file - object: - type: string - default: vector_store.file - description: >- - Object type identifier, always "vector_store.file" - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Key-value attributes associated with the file - chunking_strategy: - $ref: '#/components/schemas/VectorStoreChunkingStrategy' - description: >- - Strategy used for splitting the file into chunks - created_at: - type: integer - description: >- - Timestamp when the file was added to the vector store - last_error: - $ref: '#/components/schemas/VectorStoreFileLastError' - description: >- - (Optional) Error information if file processing failed - status: - $ref: '#/components/schemas/VectorStoreFileStatus' - description: Current processing status of the file - usage_bytes: - type: integer - default: 0 - description: Storage space used by this file in bytes - vector_store_id: - type: string - description: >- - ID of the vector store containing this file - additionalProperties: false - required: - - id - - object - - attributes - - chunking_strategy - - created_at - - status - - usage_bytes - - vector_store_id - title: VectorStoreFileObject - description: OpenAI Vector Store File object. - VectorStoreFileStatus: - oneOf: - - type: string - const: completed - - type: string - const: in_progress - - type: string - const: cancelled - - type: string - const: failed ->>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) OpenAIJSONSchema: type: object properties: @@ -7282,6 +5158,7 @@ components: enum: - llm - embedding + - rerank title: ModelType description: >- Enumeration of supported model types in Llama Stack. @@ -11706,1606 +9583,6 @@ components: title: VectorStoreSearchResponsePage description: >- Paginated response from searching a vector store. -<<<<<<< HEAD -======= - OpenaiUpdateVectorStoreRequest: - type: object - properties: - name: - type: string - description: The name of the vector store. - expires_after: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The expiration policy for a vector store. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Set of 16 key-value pairs that can be attached to an object. - additionalProperties: false - title: OpenaiUpdateVectorStoreRequest - OpenaiUpdateVectorStoreFileRequest: - type: object - properties: - attributes: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The updated key-value attributes to store with the file. - additionalProperties: false - required: - - attributes - title: OpenaiUpdateVectorStoreFileRequest - DPOAlignmentConfig: - type: object - properties: - beta: - type: number - description: Temperature parameter for the DPO loss - loss_type: - $ref: '#/components/schemas/DPOLossType' - default: sigmoid - description: The type of loss function to use for DPO - additionalProperties: false - required: - - beta - - loss_type - title: DPOAlignmentConfig - description: >- - Configuration for Direct Preference Optimization (DPO) alignment. - DPOLossType: - type: string - enum: - - sigmoid - - hinge - - ipo - - kto_pair - title: DPOLossType - DataConfig: - type: object - properties: - dataset_id: - type: string - description: >- - Unique identifier for the training dataset - batch_size: - type: integer - description: Number of samples per training batch - shuffle: - type: boolean - description: >- - Whether to shuffle the dataset during training - data_format: - $ref: '#/components/schemas/DatasetFormat' - description: >- - Format of the dataset (instruct or dialog) - validation_dataset_id: - type: string - description: >- - (Optional) Unique identifier for the validation dataset - packed: - type: boolean - default: false - description: >- - (Optional) Whether to pack multiple samples into a single sequence for - efficiency - train_on_input: - type: boolean - default: false - description: >- - (Optional) Whether to compute loss on input tokens as well as output tokens - additionalProperties: false - required: - - dataset_id - - batch_size - - shuffle - - data_format - title: DataConfig - description: >- - Configuration for training data and data loading. - DatasetFormat: - type: string - enum: - - instruct - - dialog - title: DatasetFormat - description: Format of the training dataset. - EfficiencyConfig: - type: object - properties: - enable_activation_checkpointing: - type: boolean - default: false - description: >- - (Optional) Whether to use activation checkpointing to reduce memory usage - enable_activation_offloading: - type: boolean - default: false - description: >- - (Optional) Whether to offload activations to CPU to save GPU memory - memory_efficient_fsdp_wrap: - type: boolean - default: false - description: >- - (Optional) Whether to use memory-efficient FSDP wrapping - fsdp_cpu_offload: - type: boolean - default: false - description: >- - (Optional) Whether to offload FSDP parameters to CPU - additionalProperties: false - title: EfficiencyConfig - description: >- - Configuration for memory and compute efficiency optimizations. - OptimizerConfig: - type: object - properties: - optimizer_type: - $ref: '#/components/schemas/OptimizerType' - description: >- - Type of optimizer to use (adam, adamw, or sgd) - lr: - type: number - description: Learning rate for the optimizer - weight_decay: - type: number - description: >- - Weight decay coefficient for regularization - num_warmup_steps: - type: integer - description: Number of steps for learning rate warmup - additionalProperties: false - required: - - optimizer_type - - lr - - weight_decay - - num_warmup_steps - title: OptimizerConfig - description: >- - Configuration parameters for the optimization algorithm. - OptimizerType: - type: string - enum: - - adam - - adamw - - sgd - title: OptimizerType - description: >- - Available optimizer algorithms for training. - TrainingConfig: - type: object - properties: - n_epochs: - type: integer - description: Number of training epochs to run - max_steps_per_epoch: - type: integer - default: 1 - description: Maximum number of steps to run per epoch - gradient_accumulation_steps: - type: integer - default: 1 - description: >- - Number of steps to accumulate gradients before updating - max_validation_steps: - type: integer - default: 1 - description: >- - (Optional) Maximum number of validation steps per epoch - data_config: - $ref: '#/components/schemas/DataConfig' - description: >- - (Optional) Configuration for data loading and formatting - optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' - description: >- - (Optional) Configuration for the optimization algorithm - efficiency_config: - $ref: '#/components/schemas/EfficiencyConfig' - description: >- - (Optional) Configuration for memory and compute optimizations - dtype: - type: string - default: bf16 - description: >- - (Optional) Data type for model parameters (bf16, fp16, fp32) - additionalProperties: false - required: - - n_epochs - - max_steps_per_epoch - - gradient_accumulation_steps - title: TrainingConfig - description: >- - Comprehensive configuration for the training process. - PreferenceOptimizeRequest: - type: object - properties: - job_uuid: - type: string - description: The UUID of the job to create. - finetuned_model: - type: string - description: The model to fine-tune. - algorithm_config: - $ref: '#/components/schemas/DPOAlignmentConfig' - description: The algorithm configuration. - training_config: - $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. - hyperparam_search_config: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. - logger_config: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. - additionalProperties: false - required: - - job_uuid - - finetuned_model - - algorithm_config - - training_config - - hyperparam_search_config - - logger_config - title: PreferenceOptimizeRequest - PostTrainingJob: - type: object - properties: - job_uuid: - type: string - additionalProperties: false - required: - - job_uuid - title: PostTrainingJob - DefaultRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: default - default: default - description: >- - Type of query generator, always 'default' - separator: - type: string - default: ' ' - description: >- - String separator used to join query terms - additionalProperties: false - required: - - type - - separator - title: DefaultRAGQueryGeneratorConfig - description: >- - Configuration for the default RAG query generator. - LLMRAGQueryGeneratorConfig: - type: object - properties: - type: - type: string - const: llm - default: llm - description: Type of query generator, always 'llm' - model: - type: string - description: >- - Name of the language model to use for query generation - template: - type: string - description: >- - Template string for formatting the query generation prompt - additionalProperties: false - required: - - type - - model - - template - title: LLMRAGQueryGeneratorConfig - description: >- - Configuration for the LLM-based RAG query generator. - RAGQueryConfig: - type: object - properties: - query_generator_config: - $ref: '#/components/schemas/RAGQueryGeneratorConfig' - description: Configuration for the query generator. - max_tokens_in_context: - type: integer - default: 4096 - description: Maximum number of tokens in the context. - max_chunks: - type: integer - default: 5 - description: Maximum number of chunks to retrieve. - chunk_template: - type: string - default: > - Result {index} - - Content: {chunk.content} - - Metadata: {metadata} - description: >- - Template for formatting each retrieved chunk in the context. Available - placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk - content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: - {chunk.content}\nMetadata: {metadata}\n" - mode: - $ref: '#/components/schemas/RAGSearchMode' - default: vector - description: >- - Search mode for retrieval—either "vector", "keyword", or "hybrid". Default - "vector". - ranker: - $ref: '#/components/schemas/Ranker' - description: >- - Configuration for the ranker to use in hybrid search. Defaults to RRF - ranker. - additionalProperties: false - required: - - query_generator_config - - max_tokens_in_context - - max_chunks - - chunk_template - title: RAGQueryConfig - description: >- - Configuration for the RAG query generation. - RAGQueryGeneratorConfig: - oneOf: - - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' - discriminator: - propertyName: type - mapping: - default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' - llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' - RAGSearchMode: - type: string - enum: - - vector - - keyword - - hybrid - title: RAGSearchMode - description: >- - Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search - for semantic matching - KEYWORD: Uses keyword-based search for exact matching - - HYBRID: Combines both vector and keyword search for better results - RRFRanker: - type: object - properties: - type: - type: string - const: rrf - default: rrf - description: The type of ranker, always "rrf" - impact_factor: - type: number - default: 60.0 - description: >- - The impact factor for RRF scoring. Higher values give more weight to higher-ranked - results. Must be greater than 0 - additionalProperties: false - required: - - type - - impact_factor - title: RRFRanker - description: >- - Reciprocal Rank Fusion (RRF) ranker configuration. - Ranker: - oneOf: - - $ref: '#/components/schemas/RRFRanker' - - $ref: '#/components/schemas/WeightedRanker' - discriminator: - propertyName: type - mapping: - rrf: '#/components/schemas/RRFRanker' - weighted: '#/components/schemas/WeightedRanker' - WeightedRanker: - type: object - properties: - type: - type: string - const: weighted - default: weighted - description: The type of ranker, always "weighted" - alpha: - type: number - default: 0.5 - description: >- - Weight factor between 0 and 1. 0 means only use keyword scores, 1 means - only use vector scores, values in between blend both scores. - additionalProperties: false - required: - - type - - alpha - title: WeightedRanker - description: >- - Weighted ranker configuration that combines vector and keyword scores. - QueryRequest: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - The query content to search for in the indexed documents - vector_db_ids: - type: array - items: - type: string - description: >- - List of vector database IDs to search within - query_config: - $ref: '#/components/schemas/RAGQueryConfig' - description: >- - (Optional) Configuration parameters for the query operation - additionalProperties: false - required: - - content - - vector_db_ids - title: QueryRequest - RAGQueryResult: - type: object - properties: - content: - $ref: '#/components/schemas/InterleavedContent' - description: >- - (Optional) The retrieved content from the query - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - Additional metadata about the query result - additionalProperties: false - required: - - metadata - title: RAGQueryResult - description: >- - Result of a RAG query containing retrieved content and metadata. - QueryChunksRequest: - type: object - properties: - vector_db_id: - type: string - description: >- - The identifier of the vector database to query. - query: - $ref: '#/components/schemas/InterleavedContent' - description: The query to search for. - params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the query. - additionalProperties: false - required: - - vector_db_id - - query - title: QueryChunksRequest - QueryChunksResponse: - type: object - properties: - chunks: - type: array - items: - $ref: '#/components/schemas/Chunk' - description: >- - List of content chunks returned from the query - scores: - type: array - items: - type: number - description: >- - Relevance scores corresponding to each returned chunk - additionalProperties: false - required: - - chunks - - scores - title: QueryChunksResponse - description: >- - Response from querying chunks in a vector database. - QueryMetricsRequest: - type: object - properties: - start_time: - type: integer - description: The start time of the metric to query. - end_time: - type: integer - description: The end time of the metric to query. - granularity: - type: string - description: The granularity of the metric to query. - query_type: - type: string - enum: - - range - - instant - description: The type of query to perform. - label_matchers: - type: array - items: - type: object - properties: - name: - type: string - description: The name of the label to match - value: - type: string - description: The value to match against - operator: - type: string - enum: - - '=' - - '!=' - - =~ - - '!~' - description: >- - The comparison operator to use for matching - default: '=' - additionalProperties: false - required: - - name - - value - - operator - title: MetricLabelMatcher - description: >- - A matcher for filtering metrics by label values. - description: >- - The label matchers to apply to the metric. - additionalProperties: false - required: - - start_time - - query_type - title: QueryMetricsRequest - MetricDataPoint: - type: object - properties: - timestamp: - type: integer - description: >- - Unix timestamp when the metric value was recorded - value: - type: number - description: >- - The numeric value of the metric at this timestamp - unit: - type: string - additionalProperties: false - required: - - timestamp - - value - - unit - title: MetricDataPoint - description: >- - A single data point in a metric time series. - MetricLabel: - type: object - properties: - name: - type: string - description: The name of the label - value: - type: string - description: The value of the label - additionalProperties: false - required: - - name - - value - title: MetricLabel - description: A label associated with a metric. - MetricSeries: - type: object - properties: - metric: - type: string - description: The name of the metric - labels: - type: array - items: - $ref: '#/components/schemas/MetricLabel' - description: >- - List of labels associated with this metric series - values: - type: array - items: - $ref: '#/components/schemas/MetricDataPoint' - description: >- - List of data points in chronological order - additionalProperties: false - required: - - metric - - labels - - values - title: MetricSeries - description: A time series of metric data points. - QueryMetricsResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/MetricSeries' - description: >- - List of metric series matching the query criteria - additionalProperties: false - required: - - data - title: QueryMetricsResponse - description: >- - Response containing metric time series data. - QueryCondition: - type: object - properties: - key: - type: string - description: The attribute key to filter on - op: - $ref: '#/components/schemas/QueryConditionOp' - description: The comparison operator to apply - value: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The value to compare against - additionalProperties: false - required: - - key - - op - - value - title: QueryCondition - description: A condition for filtering query results. - QueryConditionOp: - type: string - enum: - - eq - - ne - - gt - - lt - title: QueryConditionOp - description: >- - Comparison operators for query conditions. - QuerySpansRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the spans. - attributes_to_return: - type: array - items: - type: string - description: The attributes to return in the spans. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - required: - - attribute_filters - - attributes_to_return - title: QuerySpansRequest - QuerySpansResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Span' - description: >- - List of spans matching the query criteria - additionalProperties: false - required: - - data - title: QuerySpansResponse - description: Response containing a list of spans. - QueryTracesRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the traces. - limit: - type: integer - description: The limit of traces to return. - offset: - type: integer - description: The offset of the traces to return. - order_by: - type: array - items: - type: string - description: The order by of the traces to return. - additionalProperties: false - title: QueryTracesRequest - QueryTracesResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/Trace' - description: >- - List of traces matching the query criteria - additionalProperties: false - required: - - data - title: QueryTracesResponse - description: Response containing a list of traces. - RegisterBenchmarkRequest: - type: object - properties: - benchmark_id: - type: string - description: The ID of the benchmark to register. - dataset_id: - type: string - description: >- - The ID of the dataset to use for the benchmark. - scoring_functions: - type: array - items: - type: string - description: >- - The scoring functions to use for the benchmark. - provider_benchmark_id: - type: string - description: >- - The ID of the provider benchmark to use for the benchmark. - provider_id: - type: string - description: >- - The ID of the provider to use for the benchmark. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The metadata to use for the benchmark. - additionalProperties: false - required: - - benchmark_id - - dataset_id - - scoring_functions - title: RegisterBenchmarkRequest - RegisterDatasetRequest: - type: object - properties: - purpose: - type: string - enum: - - post-training/messages - - eval/question-answer - - eval/messages-answer - description: >- - The purpose of the dataset. One of: - "post-training/messages": The dataset - contains a messages column with list of messages for post-training. { - "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", - "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset - contains a question column and an answer column for evaluation. { "question": - "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": - The dataset contains a messages column with list of messages and an answer - column for evaluation. { "messages": [ {"role": "user", "content": "Hello, - my name is John Doe."}, {"role": "assistant", "content": "Hello, John - Doe. How can I help you today?"}, {"role": "user", "content": "What's - my name?"}, ], "answer": "John Doe" } - source: - $ref: '#/components/schemas/DataSource' - description: >- - The data source of the dataset. Ensure that the data source schema is - compatible with the purpose of the dataset. Examples: - { "type": "uri", - "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": - "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" - } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" - } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": - "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] - } ] } - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - The metadata for the dataset. - E.g. {"description": "My dataset"}. - dataset_id: - type: string - description: >- - The ID of the dataset. If not provided, an ID will be generated. - additionalProperties: false - required: - - purpose - - source - title: RegisterDatasetRequest - RegisterModelRequest: - type: object - properties: - model_id: - type: string - description: The identifier of the model to register. - provider_model_id: - type: string - description: >- - The identifier of the model in the provider. - provider_id: - type: string - description: The identifier of the provider. - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: Any additional metadata for this model. - model_type: - $ref: '#/components/schemas/ModelType' - description: The type of model to register. - additionalProperties: false - required: - - model_id - title: RegisterModelRequest - RegisterScoringFunctionRequest: - type: object - properties: - scoring_fn_id: - type: string - description: >- - The ID of the scoring function to register. - description: - type: string - description: The description of the scoring function. - return_type: - $ref: '#/components/schemas/ParamType' - description: The return type of the scoring function. - provider_scoring_fn_id: - type: string - description: >- - The ID of the provider scoring function to use for the scoring function. - provider_id: - type: string - description: >- - The ID of the provider to use for the scoring function. - params: - $ref: '#/components/schemas/ScoringFnParams' - description: >- - The parameters for the scoring function for benchmark eval, these can - be overridden for app eval. - additionalProperties: false - required: - - scoring_fn_id - - description - - return_type - title: RegisterScoringFunctionRequest - RegisterShieldRequest: - type: object - properties: - shield_id: - type: string - description: >- - The identifier of the shield to register. - provider_shield_id: - type: string - description: >- - The identifier of the shield in the provider. - provider_id: - type: string - description: The identifier of the provider. - params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the shield. - additionalProperties: false - required: - - shield_id - title: RegisterShieldRequest - RegisterToolGroupRequest: - type: object - properties: - toolgroup_id: - type: string - description: The ID of the tool group to register. - provider_id: - type: string - description: >- - The ID of the provider to use for the tool group. - mcp_endpoint: - $ref: '#/components/schemas/URL' - description: >- - The MCP endpoint to use for the tool group. - args: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - A dictionary of arguments to pass to the tool group. - additionalProperties: false - required: - - toolgroup_id - - provider_id - title: RegisterToolGroupRequest - RegisterVectorDbRequest: - type: object - properties: - vector_db_id: - type: string - description: >- - The identifier of the vector database to register. - embedding_model: - type: string - description: The embedding model to use. - embedding_dimension: - type: integer - description: The dimension of the embedding model. - provider_id: - type: string - description: The identifier of the provider. - vector_db_name: - type: string - description: The name of the vector database. - provider_vector_db_id: - type: string - description: >- - The identifier of the vector database in the provider. - additionalProperties: false - required: - - vector_db_id - - embedding_model - title: RegisterVectorDbRequest - RerankRequest: - type: object - properties: - model: - type: string - description: >- - The identifier of the reranking model to use. The model must be a reranking - model registered with Llama Stack and available via the /models endpoint. - query: - oneOf: - - type: string - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - description: >- - The search query to rank items against. Can be a string, text content - part, or image content part. The input must not exceed the model's max - input token length. - items: - type: array - items: - oneOf: - - type: string - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' - description: >- - List of items to rerank. Each item can be a string, text content part, - or image content part. Each input must not exceed the model's max input - token length. - max_num_results: - type: integer - description: >- - (Optional) Maximum number of results to return. Default: returns all. - additionalProperties: false - required: - - model - - query - - items - title: RerankRequest - RerankData: - type: object - properties: - index: - type: integer - description: >- - The original index of the document in the input list - relevance_score: - type: number - description: >- - The relevance score from the model output. Values are inverted when applicable - so that higher scores indicate greater relevance. - additionalProperties: false - required: - - index - - relevance_score - title: RerankData - description: >- - A single rerank result from a reranking response. - RerankResponse: - type: object - properties: - data: - type: array - items: - $ref: '#/components/schemas/RerankData' - description: >- - List of rerank result objects, sorted by relevance score (descending) - additionalProperties: false - required: - - data - title: RerankResponse - description: Response from a reranking request. - ResumeAgentTurnRequest: - type: object - properties: - tool_responses: - type: array - items: - $ref: '#/components/schemas/ToolResponse' - description: >- - The tool call responses to resume the turn with. - stream: - type: boolean - description: Whether to stream the response. - additionalProperties: false - required: - - tool_responses - title: ResumeAgentTurnRequest - RunEvalRequest: - type: object - properties: - benchmark_config: - $ref: '#/components/schemas/BenchmarkConfig' - description: The configuration for the benchmark. - additionalProperties: false - required: - - benchmark_config - title: RunEvalRequest - RunModerationRequest: - type: object - properties: - input: - oneOf: - - type: string - - type: array - items: - type: string - description: >- - Input (or inputs) to classify. Can be a single string, an array of strings, - or an array of multi-modal input objects similar to other models. - model: - type: string - description: >- - The content moderation model you would like to use. - additionalProperties: false - required: - - input - - model - title: RunModerationRequest - ModerationObject: - type: object - properties: - id: - type: string - description: >- - The unique identifier for the moderation request. - model: - type: string - description: >- - The model used to generate the moderation results. - results: - type: array - items: - $ref: '#/components/schemas/ModerationObjectResults' - description: A list of moderation objects - additionalProperties: false - required: - - id - - model - - results - title: ModerationObject - description: A moderation object. - ModerationObjectResults: - type: object - properties: - flagged: - type: boolean - description: >- - Whether any of the below categories are flagged. - categories: - type: object - additionalProperties: - type: boolean - description: >- - A list of the categories, and whether they are flagged or not. - category_applied_input_types: - type: object - additionalProperties: - type: array - items: - type: string - description: >- - A list of the categories along with the input type(s) that the score applies - to. - category_scores: - type: object - additionalProperties: - type: number - description: >- - A list of the categories along with their scores as predicted by model. - user_message: - type: string - metadata: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - additionalProperties: false - required: - - flagged - - metadata - title: ModerationObjectResults - description: A moderation object. - RunShieldRequest: - type: object - properties: - shield_id: - type: string - description: The identifier of the shield to run. - messages: - type: array - items: - $ref: '#/components/schemas/Message' - description: The messages to run the shield on. - params: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The parameters of the shield. - additionalProperties: false - required: - - shield_id - - messages - - params - title: RunShieldRequest - RunShieldResponse: - type: object - properties: - violation: - $ref: '#/components/schemas/SafetyViolation' - description: >- - (Optional) Safety violation detected by the shield, if any - additionalProperties: false - title: RunShieldResponse - description: Response from running a safety shield. - SaveSpansToDatasetRequest: - type: object - properties: - attribute_filters: - type: array - items: - $ref: '#/components/schemas/QueryCondition' - description: >- - The attribute filters to apply to the spans. - attributes_to_save: - type: array - items: - type: string - description: The attributes to save to the dataset. - dataset_id: - type: string - description: >- - The ID of the dataset to save the spans to. - max_depth: - type: integer - description: The maximum depth of the tree. - additionalProperties: false - required: - - attribute_filters - - attributes_to_save - - dataset_id - title: SaveSpansToDatasetRequest - ScoreRequest: - type: object - properties: - input_rows: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The rows to score. - scoring_functions: - type: object - additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. - additionalProperties: false - required: - - input_rows - - scoring_functions - title: ScoreRequest - ScoreResponse: - type: object - properties: - results: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult. - additionalProperties: false - required: - - results - title: ScoreResponse - description: The response from scoring. - ScoreBatchRequest: - type: object - properties: - dataset_id: - type: string - description: The ID of the dataset to score. - scoring_functions: - type: object - additionalProperties: - oneOf: - - $ref: '#/components/schemas/ScoringFnParams' - - type: 'null' - description: >- - The scoring functions to use for the scoring. - save_results_dataset: - type: boolean - description: >- - Whether to save the results to a dataset. - additionalProperties: false - required: - - dataset_id - - scoring_functions - - save_results_dataset - title: ScoreBatchRequest - ScoreBatchResponse: - type: object - properties: - dataset_id: - type: string - description: >- - (Optional) The identifier of the dataset that was scored - results: - type: object - additionalProperties: - $ref: '#/components/schemas/ScoringResult' - description: >- - A map of scoring function name to ScoringResult - additionalProperties: false - required: - - results - title: ScoreBatchResponse - description: >- - Response from batch scoring operations on datasets. - SetDefaultVersionRequest: - type: object - properties: - version: - type: integer - description: The version to set as default. - additionalProperties: false - required: - - version - title: SetDefaultVersionRequest - AlgorithmConfig: - oneOf: - - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QATFinetuningConfig' - discriminator: - propertyName: type - mapping: - LoRA: '#/components/schemas/LoraFinetuningConfig' - QAT: '#/components/schemas/QATFinetuningConfig' - LoraFinetuningConfig: - type: object - properties: - type: - type: string - const: LoRA - default: LoRA - description: Algorithm type identifier, always "LoRA" - lora_attn_modules: - type: array - items: - type: string - description: >- - List of attention module names to apply LoRA to - apply_lora_to_mlp: - type: boolean - description: Whether to apply LoRA to MLP layers - apply_lora_to_output: - type: boolean - description: >- - Whether to apply LoRA to output projection layers - rank: - type: integer - description: >- - Rank of the LoRA adaptation (lower rank = fewer parameters) - alpha: - type: integer - description: >- - LoRA scaling parameter that controls adaptation strength - use_dora: - type: boolean - default: false - description: >- - (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) - quantize_base: - type: boolean - default: false - description: >- - (Optional) Whether to quantize the base model weights - additionalProperties: false - required: - - type - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha - title: LoraFinetuningConfig - description: >- - Configuration for Low-Rank Adaptation (LoRA) fine-tuning. - QATFinetuningConfig: - type: object - properties: - type: - type: string - const: QAT - default: QAT - description: Algorithm type identifier, always "QAT" - quantizer_name: - type: string - description: >- - Name of the quantization algorithm to use - group_size: - type: integer - description: Size of groups for grouped quantization - additionalProperties: false - required: - - type - - quantizer_name - - group_size - title: QATFinetuningConfig - description: >- - Configuration for Quantization-Aware Training (QAT) fine-tuning. - SupervisedFineTuneRequest: - type: object - properties: - job_uuid: - type: string - description: The UUID of the job to create. - training_config: - $ref: '#/components/schemas/TrainingConfig' - description: The training configuration. - hyperparam_search_config: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The hyperparam search configuration. - logger_config: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: The logger configuration. - model: - type: string - description: The model to fine-tune. - checkpoint_dir: - type: string - description: The directory to save checkpoint(s) to. - algorithm_config: - $ref: '#/components/schemas/AlgorithmConfig' - description: The algorithm configuration. - additionalProperties: false - required: - - job_uuid - - training_config - - hyperparam_search_config - - logger_config - title: SupervisedFineTuneRequest - SyntheticDataGenerateRequest: - type: object - properties: - dialogs: - type: array - items: - $ref: '#/components/schemas/Message' - description: >- - List of conversation messages to use as input for synthetic data generation - filtering_function: - type: string - enum: - - none - - random - - top_k - - top_p - - top_k_top_p - - sigmoid - description: >- - Type of filtering to apply to generated synthetic data samples - model: - type: string - description: >- - (Optional) The identifier of the model to use. The model must be registered - with Llama Stack and available via the /models endpoint - additionalProperties: false - required: - - dialogs - - filtering_function - title: SyntheticDataGenerateRequest - SyntheticDataGenerationResponse: - type: object - properties: - synthetic_data: - type: array - items: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - List of generated synthetic data samples that passed the filtering criteria - statistics: - type: object - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - description: >- - (Optional) Statistical information about the generation process and filtering - results - additionalProperties: false - required: - - synthetic_data - title: SyntheticDataGenerationResponse - description: >- - Response from the synthetic data generation. Batch of (prompt, response, score) - tuples that pass the threshold. - UpdatePromptRequest: - type: object - properties: - prompt: - type: string - description: The updated prompt text content. - version: - type: integer - description: >- - The current version of the prompt being updated. - variables: - type: array - items: - type: string - description: >- - Updated list of variable names that can be used in the prompt template. - set_as_default: - type: boolean - description: >- - Set the new version as the default (default=True). - additionalProperties: false - required: - - prompt - - version - - set_as_default - title: UpdatePromptRequest ->>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) VersionInfo: type: object properties: diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index c1d4203c2..fcc16332f 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -201,7 +201,6 @@ class InferenceRouter(Inference): max_num_results=max_num_results, ) - async def openai_completion( self, model: str, diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 8f4c564c8..dfbcf476d 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -181,7 +181,14 @@ def model_providers(llama_stack_client): @pytest.fixture(autouse=True) def skip_if_no_model(request): - model_fixtures = ["text_model_id", "vision_model_id", "embedding_model_id", "judge_model_id", "shield_id", "rerank_model_id"] + model_fixtures = [ + "text_model_id", + "vision_model_id", + "embedding_model_id", + "judge_model_id", + "shield_id", + "rerank_model_id", + ] test_func = request.node.function actual_params = inspect.signature(test_func).parameters.keys() From 6b4940806f2aa6b411f14e28f9c1414df9d059e5 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Wed, 1 Oct 2025 10:37:58 -0700 Subject: [PATCH 18/18] Fix rerank integration test based on client side changes --- docs/docs/providers/agents/index.mdx | 2 +- docs/docs/providers/inference/index.mdx | 3 +- docs/static/deprecated-llama-stack-spec.html | 2 +- docs/static/deprecated-llama-stack-spec.yaml | 7 +- docs/static/stainless-llama-stack-spec.html | 7 +- docs/static/stainless-llama-stack-spec.yaml | 11 +- example.py | 257 ------------------- tests/integration/inference/test_rerank.py | 14 +- 8 files changed, 27 insertions(+), 276 deletions(-) delete mode 100644 example.py diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 200d0119f..06eb104af 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -14,4 +14,4 @@ Agents APIs for creating and interacting with agentic systems. -This section contains documentation for all available providers for the **agents** API. \ No newline at end of file +This section contains documentation for all available providers for the **agents** API. diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index 065f620df..63741f202 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -4,8 +4,7 @@ description: "Llama Stack Inference API for generating completions, chat complet This API provides the raw interface to the underlying models. Three kinds of models are supported: - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models rerank the documents by relevance." - + - Rerank models: these models reorder the documents based on their relevance to a query." sidebar_label: Inference title: Inference --- diff --git a/docs/static/deprecated-llama-stack-spec.html b/docs/static/deprecated-llama-stack-spec.html index 7edfe3f5d..f0dd903a6 100644 --- a/docs/static/deprecated-llama-stack-spec.html +++ b/docs/static/deprecated-llama-stack-spec.html @@ -13335,7 +13335,7 @@ }, { "name": "Inference", - "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", + "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.", "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings." }, { diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml index ca832d46b..48863025f 100644 --- a/docs/static/deprecated-llama-stack-spec.yaml +++ b/docs/static/deprecated-llama-stack-spec.yaml @@ -9990,13 +9990,16 @@ tags: description: '' - name: Inference description: >- - This API provides the raw interface to the underlying models. Two kinds of models - are supported: + This API provides the raw interface to the underlying models. Three kinds of + models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. + + - Rerank models: these models reorder the documents based on their relevance + to a query. x-displayName: >- Llama Stack Inference API for generating completions, chat completions, and embeddings. diff --git a/docs/static/stainless-llama-stack-spec.html b/docs/static/stainless-llama-stack-spec.html index 7ec48ef74..6bc67536d 100644 --- a/docs/static/stainless-llama-stack-spec.html +++ b/docs/static/stainless-llama-stack-spec.html @@ -8838,7 +8838,8 @@ "type": "string", "enum": [ "llm", - "embedding" + "embedding", + "rerank" ], "title": "ModelType", "description": "Enumeration of supported model types in Llama Stack." @@ -17033,7 +17034,7 @@ "properties": { "model": { "type": "string", - "description": "The identifier of the reranking model to use." + "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint." }, "query": { "oneOf": [ @@ -18456,7 +18457,7 @@ }, { "name": "Inference", - "description": "This API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", + "description": "This API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.", "x-displayName": "Llama Stack Inference API for generating completions, chat completions, and embeddings." }, { diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml index 3bede159b..8fc70a5cd 100644 --- a/docs/static/stainless-llama-stack-spec.yaml +++ b/docs/static/stainless-llama-stack-spec.yaml @@ -6603,6 +6603,7 @@ components: enum: - llm - embedding + - rerank title: ModelType description: >- Enumeration of supported model types in Llama Stack. @@ -12693,7 +12694,8 @@ components: model: type: string description: >- - The identifier of the reranking model to use. + The identifier of the reranking model to use. The model must be a reranking + model registered with Llama Stack and available via the /models endpoint. query: oneOf: - type: string @@ -13774,13 +13776,16 @@ tags: description: '' - name: Inference description: >- - This API provides the raw interface to the underlying models. Two kinds of models - are supported: + This API provides the raw interface to the underlying models. Three kinds of + models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. + + - Rerank models: these models reorder the documents based on their relevance + to a query. x-displayName: >- Llama Stack Inference API for generating completions, chat completions, and embeddings. diff --git a/example.py b/example.py deleted file mode 100644 index 7e968e24a..000000000 --- a/example.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import os - -os.environ["NVIDIA_API_KEY"] = "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" -# Option 1: Use default NIM URL (will auto-switch to ai.api.nvidia.com for rerank) -# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com" -# Option 2: Use AI Foundation URL directly for rerank models -# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com/v1" -os.environ["NVIDIA_BASE_URL"] = "https://integrate.api.nvidia.com" - -import base64 -import io -from PIL import Image - -from llama_stack.core.library_client import LlamaStackAsLibraryClient - -client = LlamaStackAsLibraryClient("nvidia") -client.initialize() - -# # response = client.inference.completion( -# # model_id="meta/llama-3.1-8b-instruct", -# # content="Complete the sentence using one word: Roses are red, violets are :", -# # stream=False, -# # sampling_params={ -# # "max_tokens": 50, -# # }, -# # ) -# # print(f"Response: {response.content}") - - -# response = client.inference.chat_completion( -# model_id="nvidia/nvidia-nemotron-nano-9b-v2", -# messages=[ -# { -# "role": "system", -# "content": "/think", -# }, -# { -# "role": "user", -# "content": "How are you?", -# }, -# ], -# stream=False, -# sampling_params={ -# "max_tokens": 1024, -# }, -# ) -# print(f"Response: {response}") - - -print(client.models.list()) -rerank_response = client.inference.rerank( - model="nvidia/llama-3.2-nv-rerankqa-1b-v2", - query="query", - items=[ - "item_1", - "item_2", - "item_3", - ] -) - -print(rerank_response) -for i, result in enumerate(rerank_response): - print(f"{i+1}. [Index: {result.index}, " - f"Score: {(result.relevance_score):.3f}]") - -# # from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition - -# # tool_definition = ToolDefinition( -# # tool_name="get_weather", -# # description="Get current weather information for a location", -# # parameters={ -# # "location": ToolParamDefinition( -# # param_type="string", -# # description="The city and state, e.g. San Francisco, CA", -# # required=True -# # ), -# # "unit": ToolParamDefinition( -# # param_type="string", -# # description="Temperature unit (celsius or fahrenheit)", -# # required=False, -# # default="celsius" -# # ) -# # } -# # ) - -# # # tool_response = client.inference.chat_completion( -# # # model_id="meta-llama/Llama-3.1-8B-Instruct", -# # # messages=[ -# # # {"role": "user", "content": "What's the weather like in San Francisco?"} -# # # ], -# # # tools=[tool_definition], -# # # ) - -# # # print(f"Tool Response: {tool_response.completion_message.content}") -# # # if tool_response.completion_message.tool_calls: -# # # for tool_call in tool_response.completion_message.tool_calls: -# # # print(f"Tool Called: {tool_call.tool_name}") -# # # print(f"Arguments: {tool_call.arguments}") - - -# # # from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType - -# # # person_schema = { -# # # "type": "object", -# # # "properties": { -# # # "name": {"type": "string"}, -# # # "age": {"type": "integer"}, -# # # "occupation": {"type": "string"}, -# # # }, -# # # "required": ["name", "age", "occupation"] -# # # } - -# # # response_format = JsonSchemaResponseFormat( -# # # type=ResponseFormatType.json_schema, -# # # json_schema=person_schema -# # # ) - -# # # structured_response = client.inference.chat_completion( -# # # model_id="meta-llama/Llama-3.1-8B-Instruct", -# # # messages=[ -# # # { -# # # "role": "user", -# # # "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. " -# # # } -# # # ], -# # # response_format=response_format, -# # # ) - -# # # print(f"Structured Response: {structured_response.completion_message.content}") - -# # # print("\n" + "="*50) -# # # print("VISION LANGUAGE MODEL (VLM) EXAMPLE") -# # # print("="*50) - -# # def load_image_as_base64(image_path): -# # with open(image_path, "rb") as image_file: -# # img_bytes = image_file.read() -# # return base64.b64encode(img_bytes).decode("utf-8") - -# # image_path = "/home/jiayin/llama-stack/docs/dog.jpg" -# # demo_image_b64 = load_image_as_base64(image_path) - -# # vlm_response = client.inference.chat_completion( -# # model_id="nvidia/vila", -# # messages=[ -# # { -# # "role": "user", -# # "content": [ -# # { -# # "type": "image", -# # "image": { -# # "data": demo_image_b64, -# # }, -# # }, -# # { -# # "type": "text", -# # "text": "Please describe what you see in this image in detail.", -# # }, -# # ], -# # } -# # ], -# # ) - -# # print(f"VLM Response: {vlm_response.completion_message.content}") - -# # # print("\n" + "="*50) -# # # print("EMBEDDING EXAMPLE") -# # # print("="*50) - -# # # # Embedding example -# # # embedding_response = client.inference.embeddings( -# # # model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", -# # # contents=["Hello world", "How are you today?"], -# # # task_type="query" -# # # ) - -# # # print(f"Number of embeddings: {len(embedding_response.embeddings)}") -# # # print(f"Embedding dimension: {len(embedding_response.embeddings[0])}") -# # # print(f"First few values: {embedding_response.embeddings[0][:5]}") - -# # # # from openai import OpenAI - -# # # # client = OpenAI( -# # # # base_url = "http://10.176.230.61:8000/v1", -# # # # api_key = "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" -# # # # ) - -# # # # # completion = client.completions.create( -# # # # # model="meta/llama-3.1-405b-instruct", -# # # # # prompt="How are you?", -# # # # # temperature=0.2, -# # # # # top_p=0.7, -# # # # # max_tokens=1024, -# # # # # stream=False -# # # # # ) - -# # # # # # completion = client.chat.completions.create( -# # # # # # model="meta/llama-3.1-8b-instruct", -# # # # # # messages=[{"role":"user","content":"hi"}], -# # # # # # temperature=0.2, -# # # # # # top_p=0.7, -# # # # # # max_tokens=1024, -# # # # # # stream=True -# # # # # # ) - -# # # # # for chunk in completion: -# # # # # if chunk.choices[0].delta.content is not None: -# # # # # print(chunk.choices[0].delta.content, end="") - - -# # # # # response = client.inference.completion( -# # # # # model_id="meta/llama-3.1-8b-instruct", -# # # # # content="Complete the sentence using one word: Roses are red, violets are :", -# # # # # stream=False, -# # # # # sampling_params={ -# # # # # "max_tokens": 50, -# # # # # }, -# # # # # ) -# # # # # print(f"Response: {response.content}") - - - - -# from openai import OpenAI - -# client = OpenAI( -# base_url = "https://integrate.api.nvidia.com/v1", -# api_key = "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" -# ) - -# completion = client.chat.completions.create( -# model="nvidia/nvidia-nemotron-nano-9b-v2", -# messages=[{"role":"system","content":"/think"}], -# temperature=0.6, -# top_p=0.95, -# max_tokens=2048, -# frequency_penalty=0, -# presence_penalty=0, -# stream=True, -# extra_body={ -# "min_thinking_tokens": 1024, -# "max_thinking_tokens": 2048 -# } -# ) - -# for chunk in completion: -# reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None) -# if reasoning: -# print(reasoning, end="") -# if chunk.choices[0].delta.content is not None: -# print(chunk.choices[0].delta.content, end="") diff --git a/tests/integration/inference/test_rerank.py b/tests/integration/inference/test_rerank.py index 4931c3d6c..82f35cd27 100644 --- a/tests/integration/inference/test_rerank.py +++ b/tests/integration/inference/test_rerank.py @@ -6,7 +6,7 @@ import pytest from llama_stack_client import BadRequestError as LlamaStackBadRequestError -from llama_stack_client.types import InferenceRerankResponse +from llama_stack_client.types.alpha import InferenceRerankResponse from llama_stack_client.types.shared.interleaved_content import ( ImageContentItem, ImageContentItemImage, @@ -97,7 +97,7 @@ def _validate_semantic_ranking(response: InferenceRerankResponse, items: list, e def test_rerank_text(client_with_models, rerank_model_id, query, items, inference_provider_type): skip_if_provider_doesnt_support_rerank(inference_provider_type) - response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) + response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items) assert isinstance(response, list) # TODO: Add type validation for response items once InferenceRerankResponseItem is exported from llama stack client. assert len(response) <= len(items) @@ -129,9 +129,9 @@ def test_rerank_image(client_with_models, rerank_model_id, query, items, inferen ValueError if isinstance(client_with_models, LlamaStackAsLibraryClient) else LlamaStackBadRequestError ) with pytest.raises(error_type): - client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) + client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items) else: - response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) + response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items) assert isinstance(response, list) assert len(response) <= len(items) @@ -144,7 +144,7 @@ def test_rerank_max_results(client_with_models, rerank_model_id, inference_provi items = [DUMMY_STRING, DUMMY_STRING2, DUMMY_TEXT, DUMMY_TEXT2] max_num_results = 2 - response = client_with_models.inference.rerank( + response = client_with_models.alpha.inference.rerank( model=rerank_model_id, query=DUMMY_STRING, items=items, @@ -160,7 +160,7 @@ def test_rerank_max_results_larger_than_items(client_with_models, rerank_model_i skip_if_provider_doesnt_support_rerank(inference_provider_type) items = [DUMMY_STRING, DUMMY_STRING2] - response = client_with_models.inference.rerank( + response = client_with_models.alpha.inference.rerank( model=rerank_model_id, query=DUMMY_STRING, items=items, @@ -208,7 +208,7 @@ def test_rerank_semantic_correctness( ): skip_if_provider_doesnt_support_rerank(inference_provider_type) - response = client_with_models.inference.rerank(model=rerank_model_id, query=query, items=items) + response = client_with_models.alpha.inference.rerank(model=rerank_model_id, query=query, items=items) _validate_rerank_response(response, items) _validate_semantic_ranking(response, items, expected_first_item)