From bab9d7aaea9ab3fd7d68cabed6e22345f1d7f739 Mon Sep 17 00:00:00 2001 From: Jiayi Date: Wed, 3 Sep 2025 17:34:05 -0700 Subject: [PATCH] Add rerank API for NVIDIA Inference Provider --- docs/docs/providers/inference/index.mdx | 2 + docs/static/llama-stack-spec.html | 4992 +++++++++++++++++ docs/static/llama-stack-spec.yaml | 3724 ++++++++++++ example.py | 257 + llama_stack/apis/inference/inference.py | 2 +- llama_stack/apis/models/models.py | 2 + llama_stack/core/routers/inference.py | 24 + .../remote/inference/nvidia/models.py | 131 + .../remote/inference/nvidia/nvidia.py | 80 + 9 files changed, 9213 insertions(+), 1 deletion(-) create mode 100644 example.py create mode 100644 llama_stack/providers/remote/inference/nvidia/models.py diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index ebbaf1be1..e96169cad 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -4,6 +4,7 @@ description: "Llama Stack Inference API for generating completions, chat complet This API provides the raw interface to the underlying models. Two kinds of models are supported: - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search." + - Rerank models: these models rerank the documents by relevance." sidebar_label: Inference title: Inference --- @@ -17,5 +18,6 @@ Llama Stack Inference API for generating completions, chat completions, and embe This API provides the raw interface to the underlying models. Two kinds of models are supported: - LLM models: these models generate "raw" and "chat" (conversational) completions. - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models rerank the documents by relevance. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/static/llama-stack-spec.html b/docs/static/llama-stack-spec.html index 96e97035f..b260f01a7 100644 --- a/docs/static/llama-stack-spec.html +++ b/docs/static/llama-stack-spec.html @@ -4819,6 +4819,2834 @@ "title": "OpenAIUserMessageParam", "description": "A message from the user in an OpenAI-compatible chat completion request." }, + "OpenAICompletionWithInputMessages": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The ID of the chat completion" + }, + "choices": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChoice" + }, + "description": "List of choices" + }, + "object": { + "type": "string", + "const": "chat.completion", + "default": "chat.completion", + "description": "The object type, which will be \"chat.completion\"" + }, + "created": { + "type": "integer", + "description": "The Unix timestamp in seconds when the chat completion was created" + }, + "model": { + "type": "string", + "description": "The model that was used to generate the chat completion" + }, + "input_messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIMessageParam" + } + } + }, + "additionalProperties": false, + "required": [ + "id", + "choices", + "object", + "created", + "model", + "input_messages" + ], + "title": "OpenAICompletionWithInputMessages" + }, + "DataSource": { + "oneOf": [ + { + "$ref": "#/components/schemas/URIDataSource" + }, + { + "$ref": "#/components/schemas/RowsDataSource" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "uri": "#/components/schemas/URIDataSource", + "rows": "#/components/schemas/RowsDataSource" + } + } + }, + "Dataset": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "dataset", + "default": "dataset", + "description": "Type of resource, always 'dataset' for datasets" + }, + "purpose": { + "type": "string", + "enum": [ + "post-training/messages", + "eval/question-answer", + "eval/messages-answer" + ], + "description": "Purpose of the dataset indicating its intended use" + }, + "source": { + "$ref": "#/components/schemas/DataSource", + "description": "Data source configuration for the dataset" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Additional metadata for the dataset" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "purpose", + "source", + "metadata" + ], + "title": "Dataset", + "description": "Dataset resource for storing and accessing training or evaluation data." + }, + "RowsDataSource": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "rows", + "default": "rows" + }, + "rows": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "The dataset is stored in rows. E.g. - [ {\"messages\": [{\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}]} ]" + } + }, + "additionalProperties": false, + "required": [ + "type", + "rows" + ], + "title": "RowsDataSource", + "description": "A dataset stored in rows." + }, + "URIDataSource": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "uri", + "default": "uri" + }, + "uri": { + "type": "string", + "description": "The dataset can be obtained from a URI. E.g. - \"https://mywebsite.com/mydata.jsonl\" - \"lsfs://mydata.jsonl\" - \"data:csv;base64,{base64_content}\"" + } + }, + "additionalProperties": false, + "required": [ + "type", + "uri" + ], + "title": "URIDataSource", + "description": "A dataset that can be obtained from a URI." + }, + "Model": { + "type": "object", + "properties": { + "identifier": { + "type": "string", + "description": "Unique identifier for this resource in llama stack" + }, + "provider_resource_id": { + "type": "string", + "description": "Unique identifier for this resource in the provider" + }, + "provider_id": { + "type": "string", + "description": "ID of the provider that owns this resource" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "model", + "default": "model", + "description": "The resource type, always 'model' for model resources" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Any additional metadata for this model" + }, + "model_type": { + "$ref": "#/components/schemas/ModelType", + "default": "llm", + "description": "The type of model (LLM or embedding model)" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "metadata", + "model_type" + ], + "title": "Model", + "description": "A model resource representing an AI model registered in Llama Stack." + }, + "ModelType": { + "type": "string", + "enum": [ + "llm", + "embedding", + "rerank" + ], + "title": "ModelType", + "description": "Enumeration of supported model types in Llama Stack." + }, + "AgentTurnInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "agent_turn_input", + "default": "agent_turn_input", + "description": "Discriminator type. Always \"agent_turn_input\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "AgentTurnInputType", + "description": "Parameter type for agent turn input." + }, + "ArrayType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "array", + "default": "array", + "description": "Discriminator type. Always \"array\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ArrayType", + "description": "Parameter type for array values." + }, + "BooleanType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "boolean", + "default": "boolean", + "description": "Discriminator type. Always \"boolean\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "BooleanType", + "description": "Parameter type for boolean values." + }, + "ChatCompletionInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "chat_completion_input", + "default": "chat_completion_input", + "description": "Discriminator type. Always \"chat_completion_input\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ChatCompletionInputType", + "description": "Parameter type for chat completion input." + }, + "CompletionInputType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "completion_input", + "default": "completion_input", + "description": "Discriminator type. Always \"completion_input\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "CompletionInputType", + "description": "Parameter type for completion input." + }, + "JsonType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "json", + "default": "json", + "description": "Discriminator type. Always \"json\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "JsonType", + "description": "Parameter type for JSON values." + }, + "NumberType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "number", + "default": "number", + "description": "Discriminator type. Always \"number\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "NumberType", + "description": "Parameter type for numeric values." + }, + "ObjectType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "object", + "default": "object", + "description": "Discriminator type. Always \"object\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "ObjectType", + "description": "Parameter type for object values." + }, + "ParamType": { + "oneOf": [ + { + "$ref": "#/components/schemas/StringType" + }, + { + "$ref": "#/components/schemas/NumberType" + }, + { + "$ref": "#/components/schemas/BooleanType" + }, + { + "$ref": "#/components/schemas/ArrayType" + }, + { + "$ref": "#/components/schemas/ObjectType" + }, + { + "$ref": "#/components/schemas/JsonType" + }, + { + "$ref": "#/components/schemas/UnionType" + }, + { + "$ref": "#/components/schemas/ChatCompletionInputType" + }, + { + "$ref": "#/components/schemas/CompletionInputType" + }, + { + "$ref": "#/components/schemas/AgentTurnInputType" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "string": "#/components/schemas/StringType", + "number": "#/components/schemas/NumberType", + "boolean": "#/components/schemas/BooleanType", + "array": "#/components/schemas/ArrayType", + "object": "#/components/schemas/ObjectType", + "json": "#/components/schemas/JsonType", + "union": "#/components/schemas/UnionType", + "chat_completion_input": "#/components/schemas/ChatCompletionInputType", + "completion_input": "#/components/schemas/CompletionInputType", + "agent_turn_input": "#/components/schemas/AgentTurnInputType" + } + } + }, + "ScoringFn": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "scoring_function", + "default": "scoring_function", + "description": "The resource type, always scoring_function" + }, + "description": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "return_type": { + "$ref": "#/components/schemas/ParamType" + }, + "params": { + "$ref": "#/components/schemas/ScoringFnParams" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "metadata", + "return_type" + ], + "title": "ScoringFn", + "description": "A scoring function resource for evaluating model outputs." + }, + "StringType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "string", + "default": "string", + "description": "Discriminator type. Always \"string\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "StringType", + "description": "Parameter type for string values." + }, + "UnionType": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "union", + "default": "union", + "description": "Discriminator type. Always \"union\"" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "UnionType", + "description": "Parameter type for union values." + }, + "Shield": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "shield", + "default": "shield", + "description": "The resource type, always shield" + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Configuration parameters for the shield" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type" + ], + "title": "Shield", + "description": "A safety shield resource that can be used to check content." + }, + "Span": { + "type": "object", + "properties": { + "span_id": { + "type": "string", + "description": "Unique identifier for the span" + }, + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this span belongs to" + }, + "parent_span_id": { + "type": "string", + "description": "(Optional) Unique identifier for the parent span, if this is a child span" + }, + "name": { + "type": "string", + "description": "Human-readable name describing the operation this span represents" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the operation began" + }, + "end_time": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the operation finished, if completed" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the span" + } + }, + "additionalProperties": false, + "required": [ + "span_id", + "trace_id", + "name", + "start_time" + ], + "title": "Span", + "description": "A span representing a single operation within a trace." + }, + "GetSpanTreeRequest": { + "type": "object", + "properties": { + "attributes_to_return": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The attributes to return in the tree." + }, + "max_depth": { + "type": "integer", + "description": "The maximum depth of the tree." + } + }, + "additionalProperties": false, + "title": "GetSpanTreeRequest" + }, + "SpanStatus": { + "type": "string", + "enum": [ + "ok", + "error" + ], + "title": "SpanStatus", + "description": "The status of a span indicating whether it completed successfully or with an error." + }, + "SpanWithStatus": { + "type": "object", + "properties": { + "span_id": { + "type": "string", + "description": "Unique identifier for the span" + }, + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this span belongs to" + }, + "parent_span_id": { + "type": "string", + "description": "(Optional) Unique identifier for the parent span, if this is a child span" + }, + "name": { + "type": "string", + "description": "Human-readable name describing the operation this span represents" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the operation began" + }, + "end_time": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the operation finished, if completed" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the span" + }, + "status": { + "$ref": "#/components/schemas/SpanStatus", + "description": "(Optional) The current status of the span" + } + }, + "additionalProperties": false, + "required": [ + "span_id", + "trace_id", + "name", + "start_time" + ], + "title": "SpanWithStatus", + "description": "A span that includes status information." + }, + "QuerySpanTreeResponse": { + "type": "object", + "properties": { + "data": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/SpanWithStatus" + }, + "description": "Dictionary mapping span IDs to spans with status information" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "QuerySpanTreeResponse", + "description": "Response containing a tree structure of spans." + }, + "Tool": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "tool", + "default": "tool", + "description": "Type of resource, always 'tool'" + }, + "toolgroup_id": { + "type": "string", + "description": "ID of the tool group this tool belongs to" + }, + "description": { + "type": "string", + "description": "Human-readable description of what the tool does" + }, + "parameters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolParameter" + }, + "description": "List of parameters this tool accepts" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Additional metadata about the tool" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "toolgroup_id", + "description", + "parameters" + ], + "title": "Tool", + "description": "A tool that can be invoked by agents." + }, + "ToolGroup": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "tool_group", + "default": "tool_group", + "description": "Type of resource, always 'tool_group'" + }, + "mcp_endpoint": { + "$ref": "#/components/schemas/URL", + "description": "(Optional) Model Context Protocol endpoint for remote tools" + }, + "args": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Additional arguments for the tool group" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type" + ], + "title": "ToolGroup", + "description": "A group of related tools managed together." + }, + "Trace": { + "type": "object", + "properties": { + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace" + }, + "root_span_id": { + "type": "string", + "description": "Unique identifier for the root span that started this trace" + }, + "start_time": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the trace began" + }, + "end_time": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the trace finished, if completed" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "root_span_id", + "start_time" + ], + "title": "Trace", + "description": "A trace representing the complete execution path of a request across multiple operations." + }, + "Checkpoint": { + "type": "object", + "properties": { + "identifier": { + "type": "string", + "description": "Unique identifier for the checkpoint" + }, + "created_at": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the checkpoint was created" + }, + "epoch": { + "type": "integer", + "description": "Training epoch when the checkpoint was saved" + }, + "post_training_job_id": { + "type": "string", + "description": "Identifier of the training job that created this checkpoint" + }, + "path": { + "type": "string", + "description": "File system path where the checkpoint is stored" + }, + "training_metrics": { + "$ref": "#/components/schemas/PostTrainingMetric", + "description": "(Optional) Training metrics associated with this checkpoint" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "created_at", + "epoch", + "post_training_job_id", + "path" + ], + "title": "Checkpoint", + "description": "Checkpoint created during training runs." + }, + "PostTrainingJobArtifactsResponse": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string", + "description": "Unique identifier for the training job" + }, + "checkpoints": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Checkpoint" + }, + "description": "List of model checkpoints created during training" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "checkpoints" + ], + "title": "PostTrainingJobArtifactsResponse", + "description": "Artifacts of a finetuning job." + }, + "PostTrainingMetric": { + "type": "object", + "properties": { + "epoch": { + "type": "integer", + "description": "Training epoch number" + }, + "train_loss": { + "type": "number", + "description": "Loss value on the training dataset" + }, + "validation_loss": { + "type": "number", + "description": "Loss value on the validation dataset" + }, + "perplexity": { + "type": "number", + "description": "Perplexity metric indicating model confidence" + } + }, + "additionalProperties": false, + "required": [ + "epoch", + "train_loss", + "validation_loss", + "perplexity" + ], + "title": "PostTrainingMetric", + "description": "Training metrics captured during post-training jobs." + }, + "PostTrainingJobStatusResponse": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string", + "description": "Unique identifier for the training job" + }, + "status": { + "type": "string", + "enum": [ + "completed", + "in_progress", + "failed", + "scheduled", + "cancelled" + ], + "description": "Current status of the training job" + }, + "scheduled_at": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the job was scheduled" + }, + "started_at": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the job execution began" + }, + "completed_at": { + "type": "string", + "format": "date-time", + "description": "(Optional) Timestamp when the job finished, if completed" + }, + "resources_allocated": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Information about computational resources allocated to the job" + }, + "checkpoints": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Checkpoint" + }, + "description": "List of model checkpoints created during training" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "status", + "checkpoints" + ], + "title": "PostTrainingJobStatusResponse", + "description": "Status of a finetuning job." + }, + "ListPostTrainingJobsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid" + ], + "title": "PostTrainingJob" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListPostTrainingJobsResponse" + }, + "VectorDB": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_resource_id": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "model", + "shield", + "vector_db", + "dataset", + "scoring_function", + "benchmark", + "tool", + "tool_group", + "prompt" + ], + "const": "vector_db", + "default": "vector_db", + "description": "Type of resource, always 'vector_db' for vector databases" + }, + "embedding_model": { + "type": "string", + "description": "Name of the embedding model to use for vector generation" + }, + "embedding_dimension": { + "type": "integer", + "description": "Dimension of the embedding vectors" + }, + "vector_db_name": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "provider_id", + "type", + "embedding_model", + "embedding_dimension" + ], + "title": "VectorDB", + "description": "Vector database resource for storing and querying vector embeddings." + }, + "HealthInfo": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "OK", + "Error", + "Not Implemented" + ], + "description": "Current health status of the service" + } + }, + "additionalProperties": false, + "required": [ + "status" + ], + "title": "HealthInfo", + "description": "Health status information for the service." + }, + "RAGDocument": { + "type": "object", + "properties": { + "document_id": { + "type": "string", + "description": "The unique identifier for the document." + }, + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/InterleavedContentItem" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/InterleavedContentItem" + } + }, + { + "$ref": "#/components/schemas/URL" + } + ], + "description": "The content of the document." + }, + "mime_type": { + "type": "string", + "description": "The MIME type of the document." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Additional metadata for the document." + } + }, + "additionalProperties": false, + "required": [ + "document_id", + "content", + "metadata" + ], + "title": "RAGDocument", + "description": "A document to be used for document ingestion in the RAG Tool." + }, + "InsertRequest": { + "type": "object", + "properties": { + "documents": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RAGDocument" + }, + "description": "List of documents to index in the RAG system" + }, + "vector_db_id": { + "type": "string", + "description": "ID of the vector database to store the document embeddings" + }, + "chunk_size_in_tokens": { + "type": "integer", + "description": "(Optional) Size in tokens for document chunking during indexing" + } + }, + "additionalProperties": false, + "required": [ + "documents", + "vector_db_id", + "chunk_size_in_tokens" + ], + "title": "InsertRequest" + }, + "Chunk": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The content of the chunk, which can be interleaved text, images, or other types." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Metadata associated with the chunk that will be used in the model context during inference." + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Optional embedding for the chunk. If not provided, it will be computed later." + }, + "stored_chunk_id": { + "type": "string", + "description": "The chunk ID that is stored in the vector database. Used for backend functionality." + }, + "chunk_metadata": { + "$ref": "#/components/schemas/ChunkMetadata", + "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." + } + }, + "additionalProperties": false, + "required": [ + "content", + "metadata" + ], + "title": "Chunk", + "description": "A chunk of content that can be inserted into a vector database." + }, + "ChunkMetadata": { + "type": "object", + "properties": { + "chunk_id": { + "type": "string", + "description": "The ID of the chunk. If not set, it will be generated based on the document ID and content." + }, + "document_id": { + "type": "string", + "description": "The ID of the document this chunk belongs to." + }, + "source": { + "type": "string", + "description": "The source of the content, such as a URL, file path, or other identifier." + }, + "created_timestamp": { + "type": "integer", + "description": "An optional timestamp indicating when the chunk was created." + }, + "updated_timestamp": { + "type": "integer", + "description": "An optional timestamp indicating when the chunk was last updated." + }, + "chunk_window": { + "type": "string", + "description": "The window of the chunk, which can be used to group related chunks together." + }, + "chunk_tokenizer": { + "type": "string", + "description": "The tokenizer used to create the chunk. Default is Tiktoken." + }, + "chunk_embedding_model": { + "type": "string", + "description": "The embedding model used to create the chunk's embedding." + }, + "chunk_embedding_dimension": { + "type": "integer", + "description": "The dimension of the embedding vector for the chunk." + }, + "content_token_count": { + "type": "integer", + "description": "The number of tokens in the content of the chunk." + }, + "metadata_token_count": { + "type": "integer", + "description": "The number of tokens in the metadata of the chunk." + } + }, + "additionalProperties": false, + "title": "ChunkMetadata", + "description": "`ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional information about the chunk that will not be used in the context during inference, but is required for backend functionality. The `ChunkMetadata` is set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not expected to change after. Use `Chunk.metadata` for metadata that will be used in the context during inference." + }, + "InsertChunksRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string", + "description": "The identifier of the vector database to insert the chunks into." + }, + "chunks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Chunk" + }, + "description": "The chunks to insert. Each `Chunk` should contain content which can be interleaved text, images, or other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are optional. If `metadata` is provided, you configure how Llama Stack formats the chunk during generation. If `embedding` is not provided, it will be computed later." + }, + "ttl_seconds": { + "type": "integer", + "description": "The time to live of the chunks." + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "chunks" + ], + "title": "InsertChunksRequest" + }, + "ProviderInfo": { + "type": "object", + "properties": { + "api": { + "type": "string", + "description": "The API name this provider implements" + }, + "provider_id": { + "type": "string", + "description": "Unique identifier for the provider" + }, + "provider_type": { + "type": "string", + "description": "The type of provider implementation" + }, + "config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Configuration parameters for the provider" + }, + "health": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Current health status of the provider" + } + }, + "additionalProperties": false, + "required": [ + "api", + "provider_id", + "provider_type", + "config", + "health" + ], + "title": "ProviderInfo", + "description": "Information about a registered provider including its configuration and health status." + }, + "InvokeToolRequest": { + "type": "object", + "properties": { + "tool_name": { + "type": "string", + "description": "The name of the tool to invoke." + }, + "kwargs": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "A dictionary of arguments to pass to the tool." + } + }, + "additionalProperties": false, + "required": [ + "tool_name", + "kwargs" + ], + "title": "InvokeToolRequest" + }, + "ToolInvocationResult": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "(Optional) The output content from the tool execution" + }, + "error_message": { + "type": "string", + "description": "(Optional) Error message if the tool execution failed" + }, + "error_code": { + "type": "integer", + "description": "(Optional) Numeric error code if the tool execution failed" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Additional metadata about the tool execution" + } + }, + "additionalProperties": false, + "title": "ToolInvocationResult", + "description": "Result of a tool invocation." + }, + "PaginatedResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "The list of items for the current page" + }, + "has_more": { + "type": "boolean", + "description": "Whether there are more items available after this set" + }, + "url": { + "type": "string", + "description": "The URL for accessing this list" + } + }, + "additionalProperties": false, + "required": [ + "data", + "has_more" + ], + "title": "PaginatedResponse", + "description": "A generic paginated response that follows a simple format." + }, + "Job": { + "type": "object", + "properties": { + "job_id": { + "type": "string", + "description": "Unique identifier for the job" + }, + "status": { + "type": "string", + "enum": [ + "completed", + "in_progress", + "failed", + "scheduled", + "cancelled" + ], + "description": "Current execution status of the job" + } + }, + "additionalProperties": false, + "required": [ + "job_id", + "status" + ], + "title": "Job", + "description": "A job execution instance with status tracking." + }, + "ListBenchmarksResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Benchmark" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListBenchmarksResponse" + }, + "Order": { + "type": "string", + "enum": [ + "asc", + "desc" + ], + "title": "Order", + "description": "Sort order for paginated responses." + }, + "ListOpenAIChatCompletionResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The ID of the chat completion" + }, + "choices": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIChoice" + }, + "description": "List of choices" + }, + "object": { + "type": "string", + "const": "chat.completion", + "default": "chat.completion", + "description": "The object type, which will be \"chat.completion\"" + }, + "created": { + "type": "integer", + "description": "The Unix timestamp in seconds when the chat completion was created" + }, + "model": { + "type": "string", + "description": "The model that was used to generate the chat completion" + }, + "input_messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIMessageParam" + } + } + }, + "additionalProperties": false, + "required": [ + "id", + "choices", + "object", + "created", + "model", + "input_messages" + ], + "title": "OpenAICompletionWithInputMessages" + }, + "description": "List of chat completion objects with their input messages" + }, + "has_more": { + "type": "boolean", + "description": "Whether there are more completions available beyond this list" + }, + "first_id": { + "type": "string", + "description": "ID of the first completion in this list" + }, + "last_id": { + "type": "string", + "description": "ID of the last completion in this list" + }, + "object": { + "type": "string", + "const": "list", + "default": "list", + "description": "Must be \"list\" to identify this as a list response" + } + }, + "additionalProperties": false, + "required": [ + "data", + "has_more", + "first_id", + "last_id", + "object" + ], + "title": "ListOpenAIChatCompletionResponse", + "description": "Response from listing OpenAI-compatible chat completions." + }, + "ListDatasetsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Dataset" + }, + "description": "List of datasets" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListDatasetsResponse", + "description": "Response from listing datasets." + }, + "ListModelsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Model" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListModelsResponse" + }, + "ListOpenAIResponseInputItem": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + }, + "description": "List of input items" + }, + "object": { + "type": "string", + "const": "list", + "default": "list", + "description": "Object type identifier, always \"list\"" + } + }, + "additionalProperties": false, + "required": [ + "data", + "object" + ], + "title": "ListOpenAIResponseInputItem", + "description": "List container for OpenAI response input items." + }, + "ListOpenAIResponseObject": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseObjectWithInput" + }, + "description": "List of response objects with their input context" + }, + "has_more": { + "type": "boolean", + "description": "Whether there are more results available beyond this page" + }, + "first_id": { + "type": "string", + "description": "Identifier of the first item in this page" + }, + "last_id": { + "type": "string", + "description": "Identifier of the last item in this page" + }, + "object": { + "type": "string", + "const": "list", + "default": "list", + "description": "Object type identifier, always \"list\"" + } + }, + "additionalProperties": false, + "required": [ + "data", + "has_more", + "first_id", + "last_id", + "object" + ], + "title": "ListOpenAIResponseObject", + "description": "Paginated list of OpenAI response objects with navigation metadata." + }, + "OpenAIResponseObjectWithInput": { + "type": "object", + "properties": { + "created_at": { + "type": "integer", + "description": "Unix timestamp when the response was created" + }, + "error": { + "$ref": "#/components/schemas/OpenAIResponseError", + "description": "(Optional) Error details if the response generation failed" + }, + "id": { + "type": "string", + "description": "Unique identifier for this response" + }, + "model": { + "type": "string", + "description": "Model identifier used for generation" + }, + "object": { + "type": "string", + "const": "response", + "default": "response", + "description": "Object type identifier, always \"response\"" + }, + "output": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseOutput" + }, + "description": "List of generated output items (messages, tool calls, etc.)" + }, + "parallel_tool_calls": { + "type": "boolean", + "default": false, + "description": "Whether tool calls can be executed in parallel" + }, + "previous_response_id": { + "type": "string", + "description": "(Optional) ID of the previous response in a conversation" + }, + "status": { + "type": "string", + "description": "Current status of the response generation" + }, + "temperature": { + "type": "number", + "description": "(Optional) Sampling temperature used for generation" + }, + "text": { + "$ref": "#/components/schemas/OpenAIResponseText", + "description": "Text formatting configuration for the response" + }, + "top_p": { + "type": "number", + "description": "(Optional) Nucleus sampling parameter used for generation" + }, + "truncation": { + "type": "string", + "description": "(Optional) Truncation strategy applied to the response" + }, + "user": { + "type": "string", + "description": "(Optional) User identifier associated with the request" + }, + "input": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OpenAIResponseInput" + }, + "description": "List of input items that led to this response" + } + }, + "additionalProperties": false, + "required": [ + "created_at", + "id", + "model", + "object", + "output", + "parallel_tool_calls", + "status", + "text", + "input" + ], + "title": "OpenAIResponseObjectWithInput", + "description": "OpenAI response object extended with input context information." + }, + "ListPromptsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Prompt" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListPromptsResponse", + "description": "Response model to list prompts." + }, + "ListProvidersResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ProviderInfo" + }, + "description": "List of provider information objects" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListProvidersResponse", + "description": "Response containing a list of all available providers." + }, + "RouteInfo": { + "type": "object", + "properties": { + "route": { + "type": "string", + "description": "The API endpoint path" + }, + "method": { + "type": "string", + "description": "HTTP method for the route" + }, + "provider_types": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of provider types that implement this route" + } + }, + "additionalProperties": false, + "required": [ + "route", + "method", + "provider_types" + ], + "title": "RouteInfo", + "description": "Information about an API route including its path, method, and implementing providers." + }, + "ListRoutesResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RouteInfo" + }, + "description": "List of available route information objects" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListRoutesResponse", + "description": "Response containing a list of all available API routes." + }, + "ListToolDefsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDef" + }, + "description": "List of tool definitions" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListToolDefsResponse", + "description": "Response containing a list of tool definitions." + }, + "ListScoringFunctionsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ScoringFn" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListScoringFunctionsResponse" + }, + "ListShieldsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Shield" + } + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListShieldsResponse" + }, + "ListToolGroupsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolGroup" + }, + "description": "List of tool groups" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListToolGroupsResponse", + "description": "Response containing a list of tool groups." + }, + "ListToolsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tool" + }, + "description": "List of tools" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListToolsResponse", + "description": "Response containing a list of tools." + }, + "ListVectorDBsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/VectorDB" + }, + "description": "List of vector databases" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "ListVectorDBsResponse", + "description": "Response from listing vector databases." + }, + "Event": { + "oneOf": [ + { + "$ref": "#/components/schemas/UnstructuredLogEvent" + }, + { + "$ref": "#/components/schemas/MetricEvent" + }, + { + "$ref": "#/components/schemas/StructuredLogEvent" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "unstructured_log": "#/components/schemas/UnstructuredLogEvent", + "metric": "#/components/schemas/MetricEvent", + "structured_log": "#/components/schemas/StructuredLogEvent" + } + } + }, + "EventType": { + "type": "string", + "enum": [ + "unstructured_log", + "structured_log", + "metric" + ], + "title": "EventType", + "description": "The type of telemetry event being logged." + }, + "LogSeverity": { + "type": "string", + "enum": [ + "verbose", + "debug", + "info", + "warn", + "error", + "critical" + ], + "title": "LogSeverity", + "description": "The severity level of a log message." + }, + "MetricEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this event belongs to" + }, + "span_id": { + "type": "string", + "description": "Unique identifier for the span this event belongs to" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the event occurred" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the event" + }, + "type": { + "$ref": "#/components/schemas/EventType", + "const": "metric", + "default": "metric", + "description": "Event type identifier set to METRIC" + }, + "metric": { + "type": "string", + "description": "The name of the metric being measured" + }, + "value": { + "oneOf": [ + { + "type": "integer" + }, + { + "type": "number" + } + ], + "description": "The numeric value of the metric measurement" + }, + "unit": { + "type": "string", + "description": "The unit of measurement for the metric value" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "metric", + "value", + "unit" + ], + "title": "MetricEvent", + "description": "A metric event containing a measured value." + }, + "SpanEndPayload": { + "type": "object", + "properties": { + "type": { + "$ref": "#/components/schemas/StructuredLogType", + "const": "span_end", + "default": "span_end", + "description": "Payload type identifier set to SPAN_END" + }, + "status": { + "$ref": "#/components/schemas/SpanStatus", + "description": "The final status of the span indicating success or failure" + } + }, + "additionalProperties": false, + "required": [ + "type", + "status" + ], + "title": "SpanEndPayload", + "description": "Payload for a span end event." + }, + "SpanStartPayload": { + "type": "object", + "properties": { + "type": { + "$ref": "#/components/schemas/StructuredLogType", + "const": "span_start", + "default": "span_start", + "description": "Payload type identifier set to SPAN_START" + }, + "name": { + "type": "string", + "description": "Human-readable name describing the operation this span represents" + }, + "parent_span_id": { + "type": "string", + "description": "(Optional) Unique identifier for the parent span, if this is a child span" + } + }, + "additionalProperties": false, + "required": [ + "type", + "name" + ], + "title": "SpanStartPayload", + "description": "Payload for a span start event." + }, + "StructuredLogEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this event belongs to" + }, + "span_id": { + "type": "string", + "description": "Unique identifier for the span this event belongs to" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the event occurred" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the event" + }, + "type": { + "$ref": "#/components/schemas/EventType", + "const": "structured_log", + "default": "structured_log", + "description": "Event type identifier set to STRUCTURED_LOG" + }, + "payload": { + "$ref": "#/components/schemas/StructuredLogPayload", + "description": "The structured payload data for the log event" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "payload" + ], + "title": "StructuredLogEvent", + "description": "A structured log event containing typed payload data." + }, + "StructuredLogPayload": { + "oneOf": [ + { + "$ref": "#/components/schemas/SpanStartPayload" + }, + { + "$ref": "#/components/schemas/SpanEndPayload" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "span_start": "#/components/schemas/SpanStartPayload", + "span_end": "#/components/schemas/SpanEndPayload" + } + } + }, + "StructuredLogType": { + "type": "string", + "enum": [ + "span_start", + "span_end" + ], + "title": "StructuredLogType", + "description": "The type of structured log event payload." + }, + "UnstructuredLogEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string", + "description": "Unique identifier for the trace this event belongs to" + }, + "span_id": { + "type": "string", + "description": "Unique identifier for the span this event belongs to" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "Timestamp when the event occurred" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] + }, + "description": "(Optional) Key-value pairs containing additional metadata about the event" + }, + "type": { + "$ref": "#/components/schemas/EventType", + "const": "unstructured_log", + "default": "unstructured_log", + "description": "Event type identifier set to UNSTRUCTURED_LOG" + }, + "message": { + "type": "string", + "description": "The log message text" + }, + "severity": { + "$ref": "#/components/schemas/LogSeverity", + "description": "The severity level of the log message" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "message", + "severity" + ], + "title": "UnstructuredLogEvent", + "description": "An unstructured log event containing a simple text message." + }, + "LogEventRequest": { + "type": "object", + "properties": { + "event": { + "$ref": "#/components/schemas/Event", + "description": "The event to log." + }, + "ttl_seconds": { + "type": "integer", + "description": "The time to live of the event." + } + }, + "additionalProperties": false, + "required": [ + "event", + "ttl_seconds" + ], + "title": "LogEventRequest" + }, + "VectorStoreChunkingStrategy": { + "oneOf": [ + { + "$ref": "#/components/schemas/VectorStoreChunkingStrategyAuto" + }, + { + "$ref": "#/components/schemas/VectorStoreChunkingStrategyStatic" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "auto": "#/components/schemas/VectorStoreChunkingStrategyAuto", + "static": "#/components/schemas/VectorStoreChunkingStrategyStatic" + } + } + }, + "VectorStoreChunkingStrategyAuto": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "auto", + "default": "auto", + "description": "Strategy type, always \"auto\" for automatic chunking" + } + }, + "additionalProperties": false, + "required": [ + "type" + ], + "title": "VectorStoreChunkingStrategyAuto", + "description": "Automatic chunking strategy for vector store files." + }, + "VectorStoreChunkingStrategyStatic": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "static", + "default": "static", + "description": "Strategy type, always \"static\" for static chunking" + }, + "static": { + "$ref": "#/components/schemas/VectorStoreChunkingStrategyStaticConfig", + "description": "Configuration parameters for the static chunking strategy" + } + }, + "additionalProperties": false, + "required": [ + "type", + "static" + ], + "title": "VectorStoreChunkingStrategyStatic", + "description": "Static chunking strategy with configurable parameters." + }, + "VectorStoreChunkingStrategyStaticConfig": { + "type": "object", + "properties": { + "chunk_overlap_tokens": { + "type": "integer", + "default": 400, + "description": "Number of tokens to overlap between adjacent chunks" + }, + "max_chunk_size_tokens": { + "type": "integer", + "default": 800, + "description": "Maximum number of tokens per chunk, must be between 100 and 4096" + } + }, + "additionalProperties": false, + "required": [ + "chunk_overlap_tokens", + "max_chunk_size_tokens" + ], + "title": "VectorStoreChunkingStrategyStaticConfig", + "description": "Configuration for static chunking strategy." + }, + "OpenaiAttachFileToVectorStoreRequest": { + "type": "object", + "properties": { + "file_id": { + "type": "string", + "description": "The ID of the file to attach to the vector store." + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The key-value attributes stored with the file, which can be used for filtering." + }, + "chunking_strategy": { + "$ref": "#/components/schemas/VectorStoreChunkingStrategy", + "description": "The chunking strategy to use for the file." + } + }, + "additionalProperties": false, + "required": [ + "file_id" + ], + "title": "OpenaiAttachFileToVectorStoreRequest" + }, + "VectorStoreFileLastError": { + "type": "object", + "properties": { + "code": { + "oneOf": [ + { + "type": "string", + "const": "server_error" + }, + { + "type": "string", + "const": "rate_limit_exceeded" + } + ], + "description": "Error code indicating the type of failure" + }, + "message": { + "type": "string", + "description": "Human-readable error message describing the failure" + } + }, + "additionalProperties": false, + "required": [ + "code", + "message" + ], + "title": "VectorStoreFileLastError", + "description": "Error information for failed vector store file processing." + }, + "VectorStoreFileObject": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique identifier for the file" + }, + "object": { + "type": "string", + "default": "vector_store.file", + "description": "Object type identifier, always \"vector_store.file\"" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Key-value attributes associated with the file" + }, + "chunking_strategy": { + "$ref": "#/components/schemas/VectorStoreChunkingStrategy", + "description": "Strategy used for splitting the file into chunks" + }, + "created_at": { + "type": "integer", + "description": "Timestamp when the file was added to the vector store" + }, + "last_error": { + "$ref": "#/components/schemas/VectorStoreFileLastError", + "description": "(Optional) Error information if file processing failed" + }, + "status": { + "$ref": "#/components/schemas/VectorStoreFileStatus", + "description": "Current processing status of the file" + }, + "usage_bytes": { + "type": "integer", + "default": 0, + "description": "Storage space used by this file in bytes" + }, + "vector_store_id": { + "type": "string", + "description": "ID of the vector store containing this file" + } + }, + "additionalProperties": false, + "required": [ + "id", + "object", + "attributes", + "chunking_strategy", + "created_at", + "status", + "usage_bytes", + "vector_store_id" + ], + "title": "VectorStoreFileObject", + "description": "OpenAI Vector Store File object." + }, + "VectorStoreFileStatus": { + "oneOf": [ + { + "type": "string", + "const": "completed" + }, + { + "type": "string", + "const": "in_progress" + }, + { + "type": "string", + "const": "cancelled" + }, + { + "type": "string", + "const": "failed" + } + ] + }, "OpenAIJSONSchema": { "type": "object", "properties": { @@ -12782,6 +15610,2170 @@ "title": "VectorStoreSearchResponsePage", "description": "Paginated response from searching a vector store." }, +<<<<<<< HEAD +======= + "OpenaiUpdateVectorStoreRequest": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the vector store." + }, + "expires_after": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The expiration policy for a vector store." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Set of 16 key-value pairs that can be attached to an object." + } + }, + "additionalProperties": false, + "title": "OpenaiUpdateVectorStoreRequest" + }, + "OpenaiUpdateVectorStoreFileRequest": { + "type": "object", + "properties": { + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The updated key-value attributes to store with the file." + } + }, + "additionalProperties": false, + "required": [ + "attributes" + ], + "title": "OpenaiUpdateVectorStoreFileRequest" + }, + "DPOAlignmentConfig": { + "type": "object", + "properties": { + "beta": { + "type": "number", + "description": "Temperature parameter for the DPO loss" + }, + "loss_type": { + "$ref": "#/components/schemas/DPOLossType", + "default": "sigmoid", + "description": "The type of loss function to use for DPO" + } + }, + "additionalProperties": false, + "required": [ + "beta", + "loss_type" + ], + "title": "DPOAlignmentConfig", + "description": "Configuration for Direct Preference Optimization (DPO) alignment." + }, + "DPOLossType": { + "type": "string", + "enum": [ + "sigmoid", + "hinge", + "ipo", + "kto_pair" + ], + "title": "DPOLossType" + }, + "DataConfig": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "description": "Unique identifier for the training dataset" + }, + "batch_size": { + "type": "integer", + "description": "Number of samples per training batch" + }, + "shuffle": { + "type": "boolean", + "description": "Whether to shuffle the dataset during training" + }, + "data_format": { + "$ref": "#/components/schemas/DatasetFormat", + "description": "Format of the dataset (instruct or dialog)" + }, + "validation_dataset_id": { + "type": "string", + "description": "(Optional) Unique identifier for the validation dataset" + }, + "packed": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to pack multiple samples into a single sequence for efficiency" + }, + "train_on_input": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to compute loss on input tokens as well as output tokens" + } + }, + "additionalProperties": false, + "required": [ + "dataset_id", + "batch_size", + "shuffle", + "data_format" + ], + "title": "DataConfig", + "description": "Configuration for training data and data loading." + }, + "DatasetFormat": { + "type": "string", + "enum": [ + "instruct", + "dialog" + ], + "title": "DatasetFormat", + "description": "Format of the training dataset." + }, + "EfficiencyConfig": { + "type": "object", + "properties": { + "enable_activation_checkpointing": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to use activation checkpointing to reduce memory usage" + }, + "enable_activation_offloading": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to offload activations to CPU to save GPU memory" + }, + "memory_efficient_fsdp_wrap": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to use memory-efficient FSDP wrapping" + }, + "fsdp_cpu_offload": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to offload FSDP parameters to CPU" + } + }, + "additionalProperties": false, + "title": "EfficiencyConfig", + "description": "Configuration for memory and compute efficiency optimizations." + }, + "OptimizerConfig": { + "type": "object", + "properties": { + "optimizer_type": { + "$ref": "#/components/schemas/OptimizerType", + "description": "Type of optimizer to use (adam, adamw, or sgd)" + }, + "lr": { + "type": "number", + "description": "Learning rate for the optimizer" + }, + "weight_decay": { + "type": "number", + "description": "Weight decay coefficient for regularization" + }, + "num_warmup_steps": { + "type": "integer", + "description": "Number of steps for learning rate warmup" + } + }, + "additionalProperties": false, + "required": [ + "optimizer_type", + "lr", + "weight_decay", + "num_warmup_steps" + ], + "title": "OptimizerConfig", + "description": "Configuration parameters for the optimization algorithm." + }, + "OptimizerType": { + "type": "string", + "enum": [ + "adam", + "adamw", + "sgd" + ], + "title": "OptimizerType", + "description": "Available optimizer algorithms for training." + }, + "TrainingConfig": { + "type": "object", + "properties": { + "n_epochs": { + "type": "integer", + "description": "Number of training epochs to run" + }, + "max_steps_per_epoch": { + "type": "integer", + "default": 1, + "description": "Maximum number of steps to run per epoch" + }, + "gradient_accumulation_steps": { + "type": "integer", + "default": 1, + "description": "Number of steps to accumulate gradients before updating" + }, + "max_validation_steps": { + "type": "integer", + "default": 1, + "description": "(Optional) Maximum number of validation steps per epoch" + }, + "data_config": { + "$ref": "#/components/schemas/DataConfig", + "description": "(Optional) Configuration for data loading and formatting" + }, + "optimizer_config": { + "$ref": "#/components/schemas/OptimizerConfig", + "description": "(Optional) Configuration for the optimization algorithm" + }, + "efficiency_config": { + "$ref": "#/components/schemas/EfficiencyConfig", + "description": "(Optional) Configuration for memory and compute optimizations" + }, + "dtype": { + "type": "string", + "default": "bf16", + "description": "(Optional) Data type for model parameters (bf16, fp16, fp32)" + } + }, + "additionalProperties": false, + "required": [ + "n_epochs", + "max_steps_per_epoch", + "gradient_accumulation_steps" + ], + "title": "TrainingConfig", + "description": "Comprehensive configuration for the training process." + }, + "PreferenceOptimizeRequest": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string", + "description": "The UUID of the job to create." + }, + "finetuned_model": { + "type": "string", + "description": "The model to fine-tune." + }, + "algorithm_config": { + "$ref": "#/components/schemas/DPOAlignmentConfig", + "description": "The algorithm configuration." + }, + "training_config": { + "$ref": "#/components/schemas/TrainingConfig", + "description": "The training configuration." + }, + "hyperparam_search_config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The hyperparam search configuration." + }, + "logger_config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The logger configuration." + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "finetuned_model", + "algorithm_config", + "training_config", + "hyperparam_search_config", + "logger_config" + ], + "title": "PreferenceOptimizeRequest" + }, + "PostTrainingJob": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid" + ], + "title": "PostTrainingJob" + }, + "DefaultRAGQueryGeneratorConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "default", + "default": "default", + "description": "Type of query generator, always 'default'" + }, + "separator": { + "type": "string", + "default": " ", + "description": "String separator used to join query terms" + } + }, + "additionalProperties": false, + "required": [ + "type", + "separator" + ], + "title": "DefaultRAGQueryGeneratorConfig", + "description": "Configuration for the default RAG query generator." + }, + "LLMRAGQueryGeneratorConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "llm", + "default": "llm", + "description": "Type of query generator, always 'llm'" + }, + "model": { + "type": "string", + "description": "Name of the language model to use for query generation" + }, + "template": { + "type": "string", + "description": "Template string for formatting the query generation prompt" + } + }, + "additionalProperties": false, + "required": [ + "type", + "model", + "template" + ], + "title": "LLMRAGQueryGeneratorConfig", + "description": "Configuration for the LLM-based RAG query generator." + }, + "RAGQueryConfig": { + "type": "object", + "properties": { + "query_generator_config": { + "$ref": "#/components/schemas/RAGQueryGeneratorConfig", + "description": "Configuration for the query generator." + }, + "max_tokens_in_context": { + "type": "integer", + "default": 4096, + "description": "Maximum number of tokens in the context." + }, + "max_chunks": { + "type": "integer", + "default": 5, + "description": "Maximum number of chunks to retrieve." + }, + "chunk_template": { + "type": "string", + "default": "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n", + "description": "Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: \"Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n\"" + }, + "mode": { + "$ref": "#/components/schemas/RAGSearchMode", + "default": "vector", + "description": "Search mode for retrieval—either \"vector\", \"keyword\", or \"hybrid\". Default \"vector\"." + }, + "ranker": { + "$ref": "#/components/schemas/Ranker", + "description": "Configuration for the ranker to use in hybrid search. Defaults to RRF ranker." + } + }, + "additionalProperties": false, + "required": [ + "query_generator_config", + "max_tokens_in_context", + "max_chunks", + "chunk_template" + ], + "title": "RAGQueryConfig", + "description": "Configuration for the RAG query generation." + }, + "RAGQueryGeneratorConfig": { + "oneOf": [ + { + "$ref": "#/components/schemas/DefaultRAGQueryGeneratorConfig" + }, + { + "$ref": "#/components/schemas/LLMRAGQueryGeneratorConfig" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "default": "#/components/schemas/DefaultRAGQueryGeneratorConfig", + "llm": "#/components/schemas/LLMRAGQueryGeneratorConfig" + } + } + }, + "RAGSearchMode": { + "type": "string", + "enum": [ + "vector", + "keyword", + "hybrid" + ], + "title": "RAGSearchMode", + "description": "Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search for semantic matching - KEYWORD: Uses keyword-based search for exact matching - HYBRID: Combines both vector and keyword search for better results" + }, + "RRFRanker": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "rrf", + "default": "rrf", + "description": "The type of ranker, always \"rrf\"" + }, + "impact_factor": { + "type": "number", + "default": 60.0, + "description": "The impact factor for RRF scoring. Higher values give more weight to higher-ranked results. Must be greater than 0" + } + }, + "additionalProperties": false, + "required": [ + "type", + "impact_factor" + ], + "title": "RRFRanker", + "description": "Reciprocal Rank Fusion (RRF) ranker configuration." + }, + "Ranker": { + "oneOf": [ + { + "$ref": "#/components/schemas/RRFRanker" + }, + { + "$ref": "#/components/schemas/WeightedRanker" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "rrf": "#/components/schemas/RRFRanker", + "weighted": "#/components/schemas/WeightedRanker" + } + } + }, + "WeightedRanker": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "weighted", + "default": "weighted", + "description": "The type of ranker, always \"weighted\"" + }, + "alpha": { + "type": "number", + "default": 0.5, + "description": "Weight factor between 0 and 1. 0 means only use keyword scores, 1 means only use vector scores, values in between blend both scores." + } + }, + "additionalProperties": false, + "required": [ + "type", + "alpha" + ], + "title": "WeightedRanker", + "description": "Weighted ranker configuration that combines vector and keyword scores." + }, + "QueryRequest": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The query content to search for in the indexed documents" + }, + "vector_db_ids": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of vector database IDs to search within" + }, + "query_config": { + "$ref": "#/components/schemas/RAGQueryConfig", + "description": "(Optional) Configuration parameters for the query operation" + } + }, + "additionalProperties": false, + "required": [ + "content", + "vector_db_ids" + ], + "title": "QueryRequest" + }, + "RAGQueryResult": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "(Optional) The retrieved content from the query" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Additional metadata about the query result" + } + }, + "additionalProperties": false, + "required": [ + "metadata" + ], + "title": "RAGQueryResult", + "description": "Result of a RAG query containing retrieved content and metadata." + }, + "QueryChunksRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string", + "description": "The identifier of the vector database to query." + }, + "query": { + "$ref": "#/components/schemas/InterleavedContent", + "description": "The query to search for." + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The parameters of the query." + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "query" + ], + "title": "QueryChunksRequest" + }, + "QueryChunksResponse": { + "type": "object", + "properties": { + "chunks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Chunk" + }, + "description": "List of content chunks returned from the query" + }, + "scores": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Relevance scores corresponding to each returned chunk" + } + }, + "additionalProperties": false, + "required": [ + "chunks", + "scores" + ], + "title": "QueryChunksResponse", + "description": "Response from querying chunks in a vector database." + }, + "QueryMetricsRequest": { + "type": "object", + "properties": { + "start_time": { + "type": "integer", + "description": "The start time of the metric to query." + }, + "end_time": { + "type": "integer", + "description": "The end time of the metric to query." + }, + "granularity": { + "type": "string", + "description": "The granularity of the metric to query." + }, + "query_type": { + "type": "string", + "enum": [ + "range", + "instant" + ], + "description": "The type of query to perform." + }, + "label_matchers": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the label to match" + }, + "value": { + "type": "string", + "description": "The value to match against" + }, + "operator": { + "type": "string", + "enum": [ + "=", + "!=", + "=~", + "!~" + ], + "description": "The comparison operator to use for matching", + "default": "=" + } + }, + "additionalProperties": false, + "required": [ + "name", + "value", + "operator" + ], + "title": "MetricLabelMatcher", + "description": "A matcher for filtering metrics by label values." + }, + "description": "The label matchers to apply to the metric." + } + }, + "additionalProperties": false, + "required": [ + "start_time", + "query_type" + ], + "title": "QueryMetricsRequest" + }, + "MetricDataPoint": { + "type": "object", + "properties": { + "timestamp": { + "type": "integer", + "description": "Unix timestamp when the metric value was recorded" + }, + "value": { + "type": "number", + "description": "The numeric value of the metric at this timestamp" + }, + "unit": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "timestamp", + "value", + "unit" + ], + "title": "MetricDataPoint", + "description": "A single data point in a metric time series." + }, + "MetricLabel": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the label" + }, + "value": { + "type": "string", + "description": "The value of the label" + } + }, + "additionalProperties": false, + "required": [ + "name", + "value" + ], + "title": "MetricLabel", + "description": "A label associated with a metric." + }, + "MetricSeries": { + "type": "object", + "properties": { + "metric": { + "type": "string", + "description": "The name of the metric" + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricLabel" + }, + "description": "List of labels associated with this metric series" + }, + "values": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricDataPoint" + }, + "description": "List of data points in chronological order" + } + }, + "additionalProperties": false, + "required": [ + "metric", + "labels", + "values" + ], + "title": "MetricSeries", + "description": "A time series of metric data points." + }, + "QueryMetricsResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MetricSeries" + }, + "description": "List of metric series matching the query criteria" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "QueryMetricsResponse", + "description": "Response containing metric time series data." + }, + "QueryCondition": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "The attribute key to filter on" + }, + "op": { + "$ref": "#/components/schemas/QueryConditionOp", + "description": "The comparison operator to apply" + }, + "value": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ], + "description": "The value to compare against" + } + }, + "additionalProperties": false, + "required": [ + "key", + "op", + "value" + ], + "title": "QueryCondition", + "description": "A condition for filtering query results." + }, + "QueryConditionOp": { + "type": "string", + "enum": [ + "eq", + "ne", + "gt", + "lt" + ], + "title": "QueryConditionOp", + "description": "Comparison operators for query conditions." + }, + "QuerySpansRequest": { + "type": "object", + "properties": { + "attribute_filters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/QueryCondition" + }, + "description": "The attribute filters to apply to the spans." + }, + "attributes_to_return": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The attributes to return in the spans." + }, + "max_depth": { + "type": "integer", + "description": "The maximum depth of the tree." + } + }, + "additionalProperties": false, + "required": [ + "attribute_filters", + "attributes_to_return" + ], + "title": "QuerySpansRequest" + }, + "QuerySpansResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Span" + }, + "description": "List of spans matching the query criteria" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "QuerySpansResponse", + "description": "Response containing a list of spans." + }, + "QueryTracesRequest": { + "type": "object", + "properties": { + "attribute_filters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/QueryCondition" + }, + "description": "The attribute filters to apply to the traces." + }, + "limit": { + "type": "integer", + "description": "The limit of traces to return." + }, + "offset": { + "type": "integer", + "description": "The offset of the traces to return." + }, + "order_by": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The order by of the traces to return." + } + }, + "additionalProperties": false, + "title": "QueryTracesRequest" + }, + "QueryTracesResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Trace" + }, + "description": "List of traces matching the query criteria" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "QueryTracesResponse", + "description": "Response containing a list of traces." + }, + "RegisterBenchmarkRequest": { + "type": "object", + "properties": { + "benchmark_id": { + "type": "string", + "description": "The ID of the benchmark to register." + }, + "dataset_id": { + "type": "string", + "description": "The ID of the dataset to use for the benchmark." + }, + "scoring_functions": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The scoring functions to use for the benchmark." + }, + "provider_benchmark_id": { + "type": "string", + "description": "The ID of the provider benchmark to use for the benchmark." + }, + "provider_id": { + "type": "string", + "description": "The ID of the provider to use for the benchmark." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The metadata to use for the benchmark." + } + }, + "additionalProperties": false, + "required": [ + "benchmark_id", + "dataset_id", + "scoring_functions" + ], + "title": "RegisterBenchmarkRequest" + }, + "RegisterDatasetRequest": { + "type": "object", + "properties": { + "purpose": { + "type": "string", + "enum": [ + "post-training/messages", + "eval/question-answer", + "eval/messages-answer" + ], + "description": "The purpose of the dataset. One of: - \"post-training/messages\": The dataset contains a messages column with list of messages for post-training. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } - \"eval/question-answer\": The dataset contains a question column and an answer column for evaluation. { \"question\": \"What is the capital of France?\", \"answer\": \"Paris\" } - \"eval/messages-answer\": The dataset contains a messages column with list of messages and an answer column for evaluation. { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, my name is John Doe.\"}, {\"role\": \"assistant\", \"content\": \"Hello, John Doe. How can I help you today?\"}, {\"role\": \"user\", \"content\": \"What's my name?\"}, ], \"answer\": \"John Doe\" }" + }, + "source": { + "$ref": "#/components/schemas/DataSource", + "description": "The data source of the dataset. Ensure that the data source schema is compatible with the purpose of the dataset. Examples: - { \"type\": \"uri\", \"uri\": \"https://mywebsite.com/mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"lsfs://mydata.jsonl\" } - { \"type\": \"uri\", \"uri\": \"data:csv;base64,{base64_content}\" } - { \"type\": \"uri\", \"uri\": \"huggingface://llamastack/simpleqa?split=train\" } - { \"type\": \"rows\", \"rows\": [ { \"messages\": [ {\"role\": \"user\", \"content\": \"Hello, world!\"}, {\"role\": \"assistant\", \"content\": \"Hello, world!\"}, ] } ] }" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The metadata for the dataset. - E.g. {\"description\": \"My dataset\"}." + }, + "dataset_id": { + "type": "string", + "description": "The ID of the dataset. If not provided, an ID will be generated." + } + }, + "additionalProperties": false, + "required": [ + "purpose", + "source" + ], + "title": "RegisterDatasetRequest" + }, + "RegisterModelRequest": { + "type": "object", + "properties": { + "model_id": { + "type": "string", + "description": "The identifier of the model to register." + }, + "provider_model_id": { + "type": "string", + "description": "The identifier of the model in the provider." + }, + "provider_id": { + "type": "string", + "description": "The identifier of the provider." + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "Any additional metadata for this model." + }, + "model_type": { + "$ref": "#/components/schemas/ModelType", + "description": "The type of model to register." + } + }, + "additionalProperties": false, + "required": [ + "model_id" + ], + "title": "RegisterModelRequest" + }, + "RegisterScoringFunctionRequest": { + "type": "object", + "properties": { + "scoring_fn_id": { + "type": "string", + "description": "The ID of the scoring function to register." + }, + "description": { + "type": "string", + "description": "The description of the scoring function." + }, + "return_type": { + "$ref": "#/components/schemas/ParamType", + "description": "The return type of the scoring function." + }, + "provider_scoring_fn_id": { + "type": "string", + "description": "The ID of the provider scoring function to use for the scoring function." + }, + "provider_id": { + "type": "string", + "description": "The ID of the provider to use for the scoring function." + }, + "params": { + "$ref": "#/components/schemas/ScoringFnParams", + "description": "The parameters for the scoring function for benchmark eval, these can be overridden for app eval." + } + }, + "additionalProperties": false, + "required": [ + "scoring_fn_id", + "description", + "return_type" + ], + "title": "RegisterScoringFunctionRequest" + }, + "RegisterShieldRequest": { + "type": "object", + "properties": { + "shield_id": { + "type": "string", + "description": "The identifier of the shield to register." + }, + "provider_shield_id": { + "type": "string", + "description": "The identifier of the shield in the provider." + }, + "provider_id": { + "type": "string", + "description": "The identifier of the provider." + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The parameters of the shield." + } + }, + "additionalProperties": false, + "required": [ + "shield_id" + ], + "title": "RegisterShieldRequest" + }, + "RegisterToolGroupRequest": { + "type": "object", + "properties": { + "toolgroup_id": { + "type": "string", + "description": "The ID of the tool group to register." + }, + "provider_id": { + "type": "string", + "description": "The ID of the provider to use for the tool group." + }, + "mcp_endpoint": { + "$ref": "#/components/schemas/URL", + "description": "The MCP endpoint to use for the tool group." + }, + "args": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "A dictionary of arguments to pass to the tool group." + } + }, + "additionalProperties": false, + "required": [ + "toolgroup_id", + "provider_id" + ], + "title": "RegisterToolGroupRequest" + }, + "RegisterVectorDbRequest": { + "type": "object", + "properties": { + "vector_db_id": { + "type": "string", + "description": "The identifier of the vector database to register." + }, + "embedding_model": { + "type": "string", + "description": "The embedding model to use." + }, + "embedding_dimension": { + "type": "integer", + "description": "The dimension of the embedding model." + }, + "provider_id": { + "type": "string", + "description": "The identifier of the provider." + }, + "vector_db_name": { + "type": "string", + "description": "The name of the vector database." + }, + "provider_vector_db_id": { + "type": "string", + "description": "The identifier of the vector database in the provider." + } + }, + "additionalProperties": false, + "required": [ + "vector_db_id", + "embedding_model" + ], + "title": "RegisterVectorDbRequest" + }, + "RerankRequest": { + "type": "object", + "properties": { + "model": { + "type": "string", + "description": "The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint." + }, + "query": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" + } + ], + "description": "The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length." + }, + "items": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam" + }, + { + "$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam" + } + ] + }, + "description": "List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length." + }, + "max_num_results": { + "type": "integer", + "description": "(Optional) Maximum number of results to return. Default: returns all." + } + }, + "additionalProperties": false, + "required": [ + "model", + "query", + "items" + ], + "title": "RerankRequest" + }, + "RerankData": { + "type": "object", + "properties": { + "index": { + "type": "integer", + "description": "The original index of the document in the input list" + }, + "relevance_score": { + "type": "number", + "description": "The relevance score from the model output. Values are inverted when applicable so that higher scores indicate greater relevance." + } + }, + "additionalProperties": false, + "required": [ + "index", + "relevance_score" + ], + "title": "RerankData", + "description": "A single rerank result from a reranking response." + }, + "RerankResponse": { + "type": "object", + "properties": { + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/RerankData" + }, + "description": "List of rerank result objects, sorted by relevance score (descending)" + } + }, + "additionalProperties": false, + "required": [ + "data" + ], + "title": "RerankResponse", + "description": "Response from a reranking request." + }, + "ResumeAgentTurnRequest": { + "type": "object", + "properties": { + "tool_responses": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolResponse" + }, + "description": "The tool call responses to resume the turn with." + }, + "stream": { + "type": "boolean", + "description": "Whether to stream the response." + } + }, + "additionalProperties": false, + "required": [ + "tool_responses" + ], + "title": "ResumeAgentTurnRequest" + }, + "RunEvalRequest": { + "type": "object", + "properties": { + "benchmark_config": { + "$ref": "#/components/schemas/BenchmarkConfig", + "description": "The configuration for the benchmark." + } + }, + "additionalProperties": false, + "required": [ + "benchmark_config" + ], + "title": "RunEvalRequest" + }, + "RunModerationRequest": { + "type": "object", + "properties": { + "input": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ], + "description": "Input (or inputs) to classify. Can be a single string, an array of strings, or an array of multi-modal input objects similar to other models." + }, + "model": { + "type": "string", + "description": "The content moderation model you would like to use." + } + }, + "additionalProperties": false, + "required": [ + "input", + "model" + ], + "title": "RunModerationRequest" + }, + "ModerationObject": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The unique identifier for the moderation request." + }, + "model": { + "type": "string", + "description": "The model used to generate the moderation results." + }, + "results": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ModerationObjectResults" + }, + "description": "A list of moderation objects" + } + }, + "additionalProperties": false, + "required": [ + "id", + "model", + "results" + ], + "title": "ModerationObject", + "description": "A moderation object." + }, + "ModerationObjectResults": { + "type": "object", + "properties": { + "flagged": { + "type": "boolean", + "description": "Whether any of the below categories are flagged." + }, + "categories": { + "type": "object", + "additionalProperties": { + "type": "boolean" + }, + "description": "A list of the categories, and whether they are flagged or not." + }, + "category_applied_input_types": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "string" + } + }, + "description": "A list of the categories along with the input type(s) that the score applies to." + }, + "category_scores": { + "type": "object", + "additionalProperties": { + "type": "number" + }, + "description": "A list of the categories along with their scores as predicted by model." + }, + "user_message": { + "type": "string" + }, + "metadata": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "flagged", + "metadata" + ], + "title": "ModerationObjectResults", + "description": "A moderation object." + }, + "RunShieldRequest": { + "type": "object", + "properties": { + "shield_id": { + "type": "string", + "description": "The identifier of the shield to run." + }, + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + }, + "description": "The messages to run the shield on." + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The parameters of the shield." + } + }, + "additionalProperties": false, + "required": [ + "shield_id", + "messages", + "params" + ], + "title": "RunShieldRequest" + }, + "RunShieldResponse": { + "type": "object", + "properties": { + "violation": { + "$ref": "#/components/schemas/SafetyViolation", + "description": "(Optional) Safety violation detected by the shield, if any" + } + }, + "additionalProperties": false, + "title": "RunShieldResponse", + "description": "Response from running a safety shield." + }, + "SaveSpansToDatasetRequest": { + "type": "object", + "properties": { + "attribute_filters": { + "type": "array", + "items": { + "$ref": "#/components/schemas/QueryCondition" + }, + "description": "The attribute filters to apply to the spans." + }, + "attributes_to_save": { + "type": "array", + "items": { + "type": "string" + }, + "description": "The attributes to save to the dataset." + }, + "dataset_id": { + "type": "string", + "description": "The ID of the dataset to save the spans to." + }, + "max_depth": { + "type": "integer", + "description": "The maximum depth of the tree." + } + }, + "additionalProperties": false, + "required": [ + "attribute_filters", + "attributes_to_save", + "dataset_id" + ], + "title": "SaveSpansToDatasetRequest" + }, + "ScoreRequest": { + "type": "object", + "properties": { + "input_rows": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "The rows to score." + }, + "scoring_functions": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "$ref": "#/components/schemas/ScoringFnParams" + }, + { + "type": "null" + } + ] + }, + "description": "The scoring functions to use for the scoring." + } + }, + "additionalProperties": false, + "required": [ + "input_rows", + "scoring_functions" + ], + "title": "ScoreRequest" + }, + "ScoreResponse": { + "type": "object", + "properties": { + "results": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ScoringResult" + }, + "description": "A map of scoring function name to ScoringResult." + } + }, + "additionalProperties": false, + "required": [ + "results" + ], + "title": "ScoreResponse", + "description": "The response from scoring." + }, + "ScoreBatchRequest": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "description": "The ID of the dataset to score." + }, + "scoring_functions": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "$ref": "#/components/schemas/ScoringFnParams" + }, + { + "type": "null" + } + ] + }, + "description": "The scoring functions to use for the scoring." + }, + "save_results_dataset": { + "type": "boolean", + "description": "Whether to save the results to a dataset." + } + }, + "additionalProperties": false, + "required": [ + "dataset_id", + "scoring_functions", + "save_results_dataset" + ], + "title": "ScoreBatchRequest" + }, + "ScoreBatchResponse": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "description": "(Optional) The identifier of the dataset that was scored" + }, + "results": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ScoringResult" + }, + "description": "A map of scoring function name to ScoringResult" + } + }, + "additionalProperties": false, + "required": [ + "results" + ], + "title": "ScoreBatchResponse", + "description": "Response from batch scoring operations on datasets." + }, + "SetDefaultVersionRequest": { + "type": "object", + "properties": { + "version": { + "type": "integer", + "description": "The version to set as default." + } + }, + "additionalProperties": false, + "required": [ + "version" + ], + "title": "SetDefaultVersionRequest" + }, + "AlgorithmConfig": { + "oneOf": [ + { + "$ref": "#/components/schemas/LoraFinetuningConfig" + }, + { + "$ref": "#/components/schemas/QATFinetuningConfig" + } + ], + "discriminator": { + "propertyName": "type", + "mapping": { + "LoRA": "#/components/schemas/LoraFinetuningConfig", + "QAT": "#/components/schemas/QATFinetuningConfig" + } + } + }, + "LoraFinetuningConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "LoRA", + "default": "LoRA", + "description": "Algorithm type identifier, always \"LoRA\"" + }, + "lora_attn_modules": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of attention module names to apply LoRA to" + }, + "apply_lora_to_mlp": { + "type": "boolean", + "description": "Whether to apply LoRA to MLP layers" + }, + "apply_lora_to_output": { + "type": "boolean", + "description": "Whether to apply LoRA to output projection layers" + }, + "rank": { + "type": "integer", + "description": "Rank of the LoRA adaptation (lower rank = fewer parameters)" + }, + "alpha": { + "type": "integer", + "description": "LoRA scaling parameter that controls adaptation strength" + }, + "use_dora": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation)" + }, + "quantize_base": { + "type": "boolean", + "default": false, + "description": "(Optional) Whether to quantize the base model weights" + } + }, + "additionalProperties": false, + "required": [ + "type", + "lora_attn_modules", + "apply_lora_to_mlp", + "apply_lora_to_output", + "rank", + "alpha" + ], + "title": "LoraFinetuningConfig", + "description": "Configuration for Low-Rank Adaptation (LoRA) fine-tuning." + }, + "QATFinetuningConfig": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "QAT", + "default": "QAT", + "description": "Algorithm type identifier, always \"QAT\"" + }, + "quantizer_name": { + "type": "string", + "description": "Name of the quantization algorithm to use" + }, + "group_size": { + "type": "integer", + "description": "Size of groups for grouped quantization" + } + }, + "additionalProperties": false, + "required": [ + "type", + "quantizer_name", + "group_size" + ], + "title": "QATFinetuningConfig", + "description": "Configuration for Quantization-Aware Training (QAT) fine-tuning." + }, + "SupervisedFineTuneRequest": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string", + "description": "The UUID of the job to create." + }, + "training_config": { + "$ref": "#/components/schemas/TrainingConfig", + "description": "The training configuration." + }, + "hyperparam_search_config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The hyperparam search configuration." + }, + "logger_config": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "The logger configuration." + }, + "model": { + "type": "string", + "description": "The model to fine-tune." + }, + "checkpoint_dir": { + "type": "string", + "description": "The directory to save checkpoint(s) to." + }, + "algorithm_config": { + "$ref": "#/components/schemas/AlgorithmConfig", + "description": "The algorithm configuration." + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "training_config", + "hyperparam_search_config", + "logger_config" + ], + "title": "SupervisedFineTuneRequest" + }, + "SyntheticDataGenerateRequest": { + "type": "object", + "properties": { + "dialogs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + }, + "description": "List of conversation messages to use as input for synthetic data generation" + }, + "filtering_function": { + "type": "string", + "enum": [ + "none", + "random", + "top_k", + "top_p", + "top_k_top_p", + "sigmoid" + ], + "description": "Type of filtering to apply to generated synthetic data samples" + }, + "model": { + "type": "string", + "description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint" + } + }, + "additionalProperties": false, + "required": [ + "dialogs", + "filtering_function" + ], + "title": "SyntheticDataGenerateRequest" + }, + "SyntheticDataGenerationResponse": { + "type": "object", + "properties": { + "synthetic_data": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "description": "List of generated synthetic data samples that passed the filtering criteria" + }, + "statistics": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + }, + "description": "(Optional) Statistical information about the generation process and filtering results" + } + }, + "additionalProperties": false, + "required": [ + "synthetic_data" + ], + "title": "SyntheticDataGenerationResponse", + "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." + }, + "UpdatePromptRequest": { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "The updated prompt text content." + }, + "version": { + "type": "integer", + "description": "The current version of the prompt being updated." + }, + "variables": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Updated list of variable names that can be used in the prompt template." + }, + "set_as_default": { + "type": "boolean", + "description": "Set the new version as the default (default=True)." + } + }, + "additionalProperties": false, + "required": [ + "prompt", + "version", + "set_as_default" + ], + "title": "UpdatePromptRequest" + }, +>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) "VersionInfo": { "type": "object", "properties": { diff --git a/docs/static/llama-stack-spec.yaml b/docs/static/llama-stack-spec.yaml index b9e03d614..ebe142557 100644 --- a/docs/static/llama-stack-spec.yaml +++ b/docs/static/llama-stack-spec.yaml @@ -3634,6 +3634,2130 @@ components: title: OpenAIUserMessageParam description: >- A message from the user in an OpenAI-compatible chat completion request. +<<<<<<< HEAD +======= + OpenAICompletionWithInputMessages: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + input_messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + additionalProperties: false + required: + - id + - choices + - object + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + DataSource: + oneOf: + - $ref: '#/components/schemas/URIDataSource' + - $ref: '#/components/schemas/RowsDataSource' + discriminator: + propertyName: type + mapping: + uri: '#/components/schemas/URIDataSource' + rows: '#/components/schemas/RowsDataSource' + Dataset: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: dataset + default: dataset + description: >- + Type of resource, always 'dataset' for datasets + purpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + description: >- + Purpose of the dataset indicating its intended use + source: + $ref: '#/components/schemas/DataSource' + description: >- + Data source configuration for the dataset + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Additional metadata for the dataset + additionalProperties: false + required: + - identifier + - provider_id + - type + - purpose + - source + - metadata + title: Dataset + description: >- + Dataset resource for storing and accessing training or evaluation data. + RowsDataSource: + type: object + properties: + type: + type: string + const: rows + default: rows + rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The dataset is stored in rows. E.g. - [ {"messages": [{"role": "user", + "content": "Hello, world!"}, {"role": "assistant", "content": "Hello, + world!"}]} ] + additionalProperties: false + required: + - type + - rows + title: RowsDataSource + description: A dataset stored in rows. + URIDataSource: + type: object + properties: + type: + type: string + const: uri + default: uri + uri: + type: string + description: >- + The dataset can be obtained from a URI. E.g. - "https://mywebsite.com/mydata.jsonl" + - "lsfs://mydata.jsonl" - "data:csv;base64,{base64_content}" + additionalProperties: false + required: + - type + - uri + title: URIDataSource + description: >- + A dataset that can be obtained from a URI. + Model: + type: object + properties: + identifier: + type: string + description: >- + Unique identifier for this resource in llama stack + provider_resource_id: + type: string + description: >- + Unique identifier for this resource in the provider + provider_id: + type: string + description: >- + ID of the provider that owns this resource + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: model + default: model + description: >- + The resource type, always 'model' for model resources + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Any additional metadata for this model + model_type: + $ref: '#/components/schemas/ModelType' + default: llm + description: >- + The type of model (LLM or embedding model) + additionalProperties: false + required: + - identifier + - provider_id + - type + - metadata + - model_type + title: Model + description: >- + A model resource representing an AI model registered in Llama Stack. + ModelType: + type: string + enum: + - llm + - embedding + - rerank + title: ModelType + description: >- + Enumeration of supported model types in Llama Stack. + AgentTurnInputType: + type: object + properties: + type: + type: string + const: agent_turn_input + default: agent_turn_input + description: >- + Discriminator type. Always "agent_turn_input" + additionalProperties: false + required: + - type + title: AgentTurnInputType + description: Parameter type for agent turn input. + ArrayType: + type: object + properties: + type: + type: string + const: array + default: array + description: Discriminator type. Always "array" + additionalProperties: false + required: + - type + title: ArrayType + description: Parameter type for array values. + BooleanType: + type: object + properties: + type: + type: string + const: boolean + default: boolean + description: Discriminator type. Always "boolean" + additionalProperties: false + required: + - type + title: BooleanType + description: Parameter type for boolean values. + ChatCompletionInputType: + type: object + properties: + type: + type: string + const: chat_completion_input + default: chat_completion_input + description: >- + Discriminator type. Always "chat_completion_input" + additionalProperties: false + required: + - type + title: ChatCompletionInputType + description: >- + Parameter type for chat completion input. + CompletionInputType: + type: object + properties: + type: + type: string + const: completion_input + default: completion_input + description: >- + Discriminator type. Always "completion_input" + additionalProperties: false + required: + - type + title: CompletionInputType + description: Parameter type for completion input. + JsonType: + type: object + properties: + type: + type: string + const: json + default: json + description: Discriminator type. Always "json" + additionalProperties: false + required: + - type + title: JsonType + description: Parameter type for JSON values. + NumberType: + type: object + properties: + type: + type: string + const: number + default: number + description: Discriminator type. Always "number" + additionalProperties: false + required: + - type + title: NumberType + description: Parameter type for numeric values. + ObjectType: + type: object + properties: + type: + type: string + const: object + default: object + description: Discriminator type. Always "object" + additionalProperties: false + required: + - type + title: ObjectType + description: Parameter type for object values. + ParamType: + oneOf: + - $ref: '#/components/schemas/StringType' + - $ref: '#/components/schemas/NumberType' + - $ref: '#/components/schemas/BooleanType' + - $ref: '#/components/schemas/ArrayType' + - $ref: '#/components/schemas/ObjectType' + - $ref: '#/components/schemas/JsonType' + - $ref: '#/components/schemas/UnionType' + - $ref: '#/components/schemas/ChatCompletionInputType' + - $ref: '#/components/schemas/CompletionInputType' + - $ref: '#/components/schemas/AgentTurnInputType' + discriminator: + propertyName: type + mapping: + string: '#/components/schemas/StringType' + number: '#/components/schemas/NumberType' + boolean: '#/components/schemas/BooleanType' + array: '#/components/schemas/ArrayType' + object: '#/components/schemas/ObjectType' + json: '#/components/schemas/JsonType' + union: '#/components/schemas/UnionType' + chat_completion_input: '#/components/schemas/ChatCompletionInputType' + completion_input: '#/components/schemas/CompletionInputType' + agent_turn_input: '#/components/schemas/AgentTurnInputType' + ScoringFn: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: scoring_function + default: scoring_function + description: >- + The resource type, always scoring_function + description: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + return_type: + $ref: '#/components/schemas/ParamType' + params: + $ref: '#/components/schemas/ScoringFnParams' + additionalProperties: false + required: + - identifier + - provider_id + - type + - metadata + - return_type + title: ScoringFn + description: >- + A scoring function resource for evaluating model outputs. + StringType: + type: object + properties: + type: + type: string + const: string + default: string + description: Discriminator type. Always "string" + additionalProperties: false + required: + - type + title: StringType + description: Parameter type for string values. + UnionType: + type: object + properties: + type: + type: string + const: union + default: union + description: Discriminator type. Always "union" + additionalProperties: false + required: + - type + title: UnionType + description: Parameter type for union values. + Shield: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: shield + default: shield + description: The resource type, always shield + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Configuration parameters for the shield + additionalProperties: false + required: + - identifier + - provider_id + - type + title: Shield + description: >- + A safety shield resource that can be used to check content. + Span: + type: object + properties: + span_id: + type: string + description: Unique identifier for the span + trace_id: + type: string + description: >- + Unique identifier for the trace this span belongs to + parent_span_id: + type: string + description: >- + (Optional) Unique identifier for the parent span, if this is a child span + name: + type: string + description: >- + Human-readable name describing the operation this span represents + start_time: + type: string + format: date-time + description: Timestamp when the operation began + end_time: + type: string + format: date-time + description: >- + (Optional) Timestamp when the operation finished, if completed + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Key-value pairs containing additional metadata about the span + additionalProperties: false + required: + - span_id + - trace_id + - name + - start_time + title: Span + description: >- + A span representing a single operation within a trace. + GetSpanTreeRequest: + type: object + properties: + attributes_to_return: + type: array + items: + type: string + description: The attributes to return in the tree. + max_depth: + type: integer + description: The maximum depth of the tree. + additionalProperties: false + title: GetSpanTreeRequest + SpanStatus: + type: string + enum: + - ok + - error + title: SpanStatus + description: >- + The status of a span indicating whether it completed successfully or with + an error. + SpanWithStatus: + type: object + properties: + span_id: + type: string + description: Unique identifier for the span + trace_id: + type: string + description: >- + Unique identifier for the trace this span belongs to + parent_span_id: + type: string + description: >- + (Optional) Unique identifier for the parent span, if this is a child span + name: + type: string + description: >- + Human-readable name describing the operation this span represents + start_time: + type: string + format: date-time + description: Timestamp when the operation began + end_time: + type: string + format: date-time + description: >- + (Optional) Timestamp when the operation finished, if completed + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Key-value pairs containing additional metadata about the span + status: + $ref: '#/components/schemas/SpanStatus' + description: >- + (Optional) The current status of the span + additionalProperties: false + required: + - span_id + - trace_id + - name + - start_time + title: SpanWithStatus + description: A span that includes status information. + QuerySpanTreeResponse: + type: object + properties: + data: + type: object + additionalProperties: + $ref: '#/components/schemas/SpanWithStatus' + description: >- + Dictionary mapping span IDs to spans with status information + additionalProperties: false + required: + - data + title: QuerySpanTreeResponse + description: >- + Response containing a tree structure of spans. + Tool: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: tool + default: tool + description: Type of resource, always 'tool' + toolgroup_id: + type: string + description: >- + ID of the tool group this tool belongs to + description: + type: string + description: >- + Human-readable description of what the tool does + parameters: + type: array + items: + $ref: '#/components/schemas/ToolParameter' + description: List of parameters this tool accepts + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool + additionalProperties: false + required: + - identifier + - provider_id + - type + - toolgroup_id + - description + - parameters + title: Tool + description: A tool that can be invoked by agents. + ToolGroup: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: tool_group + default: tool_group + description: Type of resource, always 'tool_group' + mcp_endpoint: + $ref: '#/components/schemas/URL' + description: >- + (Optional) Model Context Protocol endpoint for remote tools + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional arguments for the tool group + additionalProperties: false + required: + - identifier + - provider_id + - type + title: ToolGroup + description: >- + A group of related tools managed together. + Trace: + type: object + properties: + trace_id: + type: string + description: Unique identifier for the trace + root_span_id: + type: string + description: >- + Unique identifier for the root span that started this trace + start_time: + type: string + format: date-time + description: Timestamp when the trace began + end_time: + type: string + format: date-time + description: >- + (Optional) Timestamp when the trace finished, if completed + additionalProperties: false + required: + - trace_id + - root_span_id + - start_time + title: Trace + description: >- + A trace representing the complete execution path of a request across multiple + operations. + Checkpoint: + type: object + properties: + identifier: + type: string + description: Unique identifier for the checkpoint + created_at: + type: string + format: date-time + description: >- + Timestamp when the checkpoint was created + epoch: + type: integer + description: >- + Training epoch when the checkpoint was saved + post_training_job_id: + type: string + description: >- + Identifier of the training job that created this checkpoint + path: + type: string + description: >- + File system path where the checkpoint is stored + training_metrics: + $ref: '#/components/schemas/PostTrainingMetric' + description: >- + (Optional) Training metrics associated with this checkpoint + additionalProperties: false + required: + - identifier + - created_at + - epoch + - post_training_job_id + - path + title: Checkpoint + description: Checkpoint created during training runs. + PostTrainingJobArtifactsResponse: + type: object + properties: + job_uuid: + type: string + description: Unique identifier for the training job + checkpoints: + type: array + items: + $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training + additionalProperties: false + required: + - job_uuid + - checkpoints + title: PostTrainingJobArtifactsResponse + description: Artifacts of a finetuning job. + PostTrainingMetric: + type: object + properties: + epoch: + type: integer + description: Training epoch number + train_loss: + type: number + description: Loss value on the training dataset + validation_loss: + type: number + description: Loss value on the validation dataset + perplexity: + type: number + description: >- + Perplexity metric indicating model confidence + additionalProperties: false + required: + - epoch + - train_loss + - validation_loss + - perplexity + title: PostTrainingMetric + description: >- + Training metrics captured during post-training jobs. + PostTrainingJobStatusResponse: + type: object + properties: + job_uuid: + type: string + description: Unique identifier for the training job + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: Current status of the training job + scheduled_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job was scheduled + started_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job execution began + completed_at: + type: string + format: date-time + description: >- + (Optional) Timestamp when the job finished, if completed + resources_allocated: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Information about computational resources allocated to the + job + checkpoints: + type: array + items: + $ref: '#/components/schemas/Checkpoint' + description: >- + List of model checkpoints created during training + additionalProperties: false + required: + - job_uuid + - status + - checkpoints + title: PostTrainingJobStatusResponse + description: Status of a finetuning job. + ListPostTrainingJobsResponse: + type: object + properties: + data: + type: array + items: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: PostTrainingJob + additionalProperties: false + required: + - data + title: ListPostTrainingJobsResponse + VectorDB: + type: object + properties: + identifier: + type: string + provider_resource_id: + type: string + provider_id: + type: string + type: + type: string + enum: + - model + - shield + - vector_db + - dataset + - scoring_function + - benchmark + - tool + - tool_group + - prompt + const: vector_db + default: vector_db + description: >- + Type of resource, always 'vector_db' for vector databases + embedding_model: + type: string + description: >- + Name of the embedding model to use for vector generation + embedding_dimension: + type: integer + description: Dimension of the embedding vectors + vector_db_name: + type: string + additionalProperties: false + required: + - identifier + - provider_id + - type + - embedding_model + - embedding_dimension + title: VectorDB + description: >- + Vector database resource for storing and querying vector embeddings. + HealthInfo: + type: object + properties: + status: + type: string + enum: + - OK + - Error + - Not Implemented + description: Current health status of the service + additionalProperties: false + required: + - status + title: HealthInfo + description: >- + Health status information for the service. + RAGDocument: + type: object + properties: + document_id: + type: string + description: The unique identifier for the document. + content: + oneOf: + - type: string + - $ref: '#/components/schemas/InterleavedContentItem' + - type: array + items: + $ref: '#/components/schemas/InterleavedContentItem' + - $ref: '#/components/schemas/URL' + description: The content of the document. + mime_type: + type: string + description: The MIME type of the document. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Additional metadata for the document. + additionalProperties: false + required: + - document_id + - content + - metadata + title: RAGDocument + description: >- + A document to be used for document ingestion in the RAG Tool. + InsertRequest: + type: object + properties: + documents: + type: array + items: + $ref: '#/components/schemas/RAGDocument' + description: >- + List of documents to index in the RAG system + vector_db_id: + type: string + description: >- + ID of the vector database to store the document embeddings + chunk_size_in_tokens: + type: integer + description: >- + (Optional) Size in tokens for document chunking during indexing + additionalProperties: false + required: + - documents + - vector_db_id + - chunk_size_in_tokens + title: InsertRequest + Chunk: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The content of the chunk, which can be interleaved text, images, or other + types. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Metadata associated with the chunk that will be used in the model context + during inference. + embedding: + type: array + items: + type: number + description: >- + Optional embedding for the chunk. If not provided, it will be computed + later. + stored_chunk_id: + type: string + description: >- + The chunk ID that is stored in the vector database. Used for backend functionality. + chunk_metadata: + $ref: '#/components/schemas/ChunkMetadata' + description: >- + Metadata for the chunk that will NOT be used in the context during inference. + The `chunk_metadata` is required backend functionality. + additionalProperties: false + required: + - content + - metadata + title: Chunk + description: >- + A chunk of content that can be inserted into a vector database. + ChunkMetadata: + type: object + properties: + chunk_id: + type: string + description: >- + The ID of the chunk. If not set, it will be generated based on the document + ID and content. + document_id: + type: string + description: >- + The ID of the document this chunk belongs to. + source: + type: string + description: >- + The source of the content, such as a URL, file path, or other identifier. + created_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was created. + updated_timestamp: + type: integer + description: >- + An optional timestamp indicating when the chunk was last updated. + chunk_window: + type: string + description: >- + The window of the chunk, which can be used to group related chunks together. + chunk_tokenizer: + type: string + description: >- + The tokenizer used to create the chunk. Default is Tiktoken. + chunk_embedding_model: + type: string + description: >- + The embedding model used to create the chunk's embedding. + chunk_embedding_dimension: + type: integer + description: >- + The dimension of the embedding vector for the chunk. + content_token_count: + type: integer + description: >- + The number of tokens in the content of the chunk. + metadata_token_count: + type: integer + description: >- + The number of tokens in the metadata of the chunk. + additionalProperties: false + title: ChunkMetadata + description: >- + `ChunkMetadata` is backend metadata for a `Chunk` that is used to store additional + information about the chunk that will not be used in the context during + inference, but is required for backend functionality. The `ChunkMetadata` is + set during chunk creation in `MemoryToolRuntimeImpl().insert()`and is not + expected to change after. Use `Chunk.metadata` for metadata that will + be used in the context during inference. + InsertChunksRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to insert the chunks into. + chunks: + type: array + items: + $ref: '#/components/schemas/Chunk' + description: >- + The chunks to insert. Each `Chunk` should contain content which can be + interleaved text, images, or other types. `metadata`: `dict[str, Any]` + and `embedding`: `List[float]` are optional. If `metadata` is provided, + you configure how Llama Stack formats the chunk during generation. If + `embedding` is not provided, it will be computed later. + ttl_seconds: + type: integer + description: The time to live of the chunks. + additionalProperties: false + required: + - vector_db_id + - chunks + title: InsertChunksRequest + ProviderInfo: + type: object + properties: + api: + type: string + description: The API name this provider implements + provider_id: + type: string + description: Unique identifier for the provider + provider_type: + type: string + description: The type of provider implementation + config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Configuration parameters for the provider + health: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Current health status of the provider + additionalProperties: false + required: + - api + - provider_id + - provider_type + - config + - health + title: ProviderInfo + description: >- + Information about a registered provider including its configuration and health + status. + InvokeToolRequest: + type: object + properties: + tool_name: + type: string + description: The name of the tool to invoke. + kwargs: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + A dictionary of arguments to pass to the tool. + additionalProperties: false + required: + - tool_name + - kwargs + title: InvokeToolRequest + ToolInvocationResult: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) The output content from the tool execution + error_message: + type: string + description: >- + (Optional) Error message if the tool execution failed + error_code: + type: integer + description: >- + (Optional) Numeric error code if the tool execution failed + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Additional metadata about the tool execution + additionalProperties: false + title: ToolInvocationResult + description: Result of a tool invocation. + PaginatedResponse: + type: object + properties: + data: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The list of items for the current page + has_more: + type: boolean + description: >- + Whether there are more items available after this set + url: + type: string + description: The URL for accessing this list + additionalProperties: false + required: + - data + - has_more + title: PaginatedResponse + description: >- + A generic paginated response that follows a simple format. + Job: + type: object + properties: + job_id: + type: string + description: Unique identifier for the job + status: + type: string + enum: + - completed + - in_progress + - failed + - scheduled + - cancelled + description: Current execution status of the job + additionalProperties: false + required: + - job_id + - status + title: Job + description: >- + A job execution instance with status tracking. + ListBenchmarksResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Benchmark' + additionalProperties: false + required: + - data + title: ListBenchmarksResponse + Order: + type: string + enum: + - asc + - desc + title: Order + description: Sort order for paginated responses. + ListOpenAIChatCompletionResponse: + type: object + properties: + data: + type: array + items: + type: object + properties: + id: + type: string + description: The ID of the chat completion + choices: + type: array + items: + $ref: '#/components/schemas/OpenAIChoice' + description: List of choices + object: + type: string + const: chat.completion + default: chat.completion + description: >- + The object type, which will be "chat.completion" + created: + type: integer + description: >- + The Unix timestamp in seconds when the chat completion was created + model: + type: string + description: >- + The model that was used to generate the chat completion + input_messages: + type: array + items: + $ref: '#/components/schemas/OpenAIMessageParam' + additionalProperties: false + required: + - id + - choices + - object + - created + - model + - input_messages + title: OpenAICompletionWithInputMessages + description: >- + List of chat completion objects with their input messages + has_more: + type: boolean + description: >- + Whether there are more completions available beyond this list + first_id: + type: string + description: ID of the first completion in this list + last_id: + type: string + description: ID of the last completion in this list + object: + type: string + const: list + default: list + description: >- + Must be "list" to identify this as a list response + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIChatCompletionResponse + description: >- + Response from listing OpenAI-compatible chat completions. + ListDatasetsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Dataset' + description: List of datasets + additionalProperties: false + required: + - data + title: ListDatasetsResponse + description: Response from listing datasets. + ListModelsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Model' + additionalProperties: false + required: + - data + title: ListModelsResponse + ListOpenAIResponseInputItem: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: List of input items + object: + type: string + const: list + default: list + description: Object type identifier, always "list" + additionalProperties: false + required: + - data + - object + title: ListOpenAIResponseInputItem + description: >- + List container for OpenAI response input items. + ListOpenAIResponseObject: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseObjectWithInput' + description: >- + List of response objects with their input context + has_more: + type: boolean + description: >- + Whether there are more results available beyond this page + first_id: + type: string + description: >- + Identifier of the first item in this page + last_id: + type: string + description: Identifier of the last item in this page + object: + type: string + const: list + default: list + description: Object type identifier, always "list" + additionalProperties: false + required: + - data + - has_more + - first_id + - last_id + - object + title: ListOpenAIResponseObject + description: >- + Paginated list of OpenAI response objects with navigation metadata. + OpenAIResponseObjectWithInput: + type: object + properties: + created_at: + type: integer + description: >- + Unix timestamp when the response was created + error: + $ref: '#/components/schemas/OpenAIResponseError' + description: >- + (Optional) Error details if the response generation failed + id: + type: string + description: Unique identifier for this response + model: + type: string + description: Model identifier used for generation + object: + type: string + const: response + default: response + description: >- + Object type identifier, always "response" + output: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseOutput' + description: >- + List of generated output items (messages, tool calls, etc.) + parallel_tool_calls: + type: boolean + default: false + description: >- + Whether tool calls can be executed in parallel + previous_response_id: + type: string + description: >- + (Optional) ID of the previous response in a conversation + status: + type: string + description: >- + Current status of the response generation + temperature: + type: number + description: >- + (Optional) Sampling temperature used for generation + text: + $ref: '#/components/schemas/OpenAIResponseText' + description: >- + Text formatting configuration for the response + top_p: + type: number + description: >- + (Optional) Nucleus sampling parameter used for generation + truncation: + type: string + description: >- + (Optional) Truncation strategy applied to the response + user: + type: string + description: >- + (Optional) User identifier associated with the request + input: + type: array + items: + $ref: '#/components/schemas/OpenAIResponseInput' + description: >- + List of input items that led to this response + additionalProperties: false + required: + - created_at + - id + - model + - object + - output + - parallel_tool_calls + - status + - text + - input + title: OpenAIResponseObjectWithInput + description: >- + OpenAI response object extended with input context information. + ListPromptsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Prompt' + additionalProperties: false + required: + - data + title: ListPromptsResponse + description: Response model to list prompts. + ListProvidersResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ProviderInfo' + description: List of provider information objects + additionalProperties: false + required: + - data + title: ListProvidersResponse + description: >- + Response containing a list of all available providers. + RouteInfo: + type: object + properties: + route: + type: string + description: The API endpoint path + method: + type: string + description: HTTP method for the route + provider_types: + type: array + items: + type: string + description: >- + List of provider types that implement this route + additionalProperties: false + required: + - route + - method + - provider_types + title: RouteInfo + description: >- + Information about an API route including its path, method, and implementing + providers. + ListRoutesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/RouteInfo' + description: >- + List of available route information objects + additionalProperties: false + required: + - data + title: ListRoutesResponse + description: >- + Response containing a list of all available API routes. + ListToolDefsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ToolDef' + description: List of tool definitions + additionalProperties: false + required: + - data + title: ListToolDefsResponse + description: >- + Response containing a list of tool definitions. + ListScoringFunctionsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ScoringFn' + additionalProperties: false + required: + - data + title: ListScoringFunctionsResponse + ListShieldsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Shield' + additionalProperties: false + required: + - data + title: ListShieldsResponse + ListToolGroupsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/ToolGroup' + description: List of tool groups + additionalProperties: false + required: + - data + title: ListToolGroupsResponse + description: >- + Response containing a list of tool groups. + ListToolsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Tool' + description: List of tools + additionalProperties: false + required: + - data + title: ListToolsResponse + description: Response containing a list of tools. + ListVectorDBsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/VectorDB' + description: List of vector databases + additionalProperties: false + required: + - data + title: ListVectorDBsResponse + description: Response from listing vector databases. + Event: + oneOf: + - $ref: '#/components/schemas/UnstructuredLogEvent' + - $ref: '#/components/schemas/MetricEvent' + - $ref: '#/components/schemas/StructuredLogEvent' + discriminator: + propertyName: type + mapping: + unstructured_log: '#/components/schemas/UnstructuredLogEvent' + metric: '#/components/schemas/MetricEvent' + structured_log: '#/components/schemas/StructuredLogEvent' + EventType: + type: string + enum: + - unstructured_log + - structured_log + - metric + title: EventType + description: >- + The type of telemetry event being logged. + LogSeverity: + type: string + enum: + - verbose + - debug + - info + - warn + - error + - critical + title: LogSeverity + description: The severity level of a log message. + MetricEvent: + type: object + properties: + trace_id: + type: string + description: >- + Unique identifier for the trace this event belongs to + span_id: + type: string + description: >- + Unique identifier for the span this event belongs to + timestamp: + type: string + format: date-time + description: Timestamp when the event occurred + attributes: + type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + description: >- + (Optional) Key-value pairs containing additional metadata about the event + type: + $ref: '#/components/schemas/EventType' + const: metric + default: metric + description: Event type identifier set to METRIC + metric: + type: string + description: The name of the metric being measured + value: + oneOf: + - type: integer + - type: number + description: >- + The numeric value of the metric measurement + unit: + type: string + description: >- + The unit of measurement for the metric value + additionalProperties: false + required: + - trace_id + - span_id + - timestamp + - type + - metric + - value + - unit + title: MetricEvent + description: >- + A metric event containing a measured value. + SpanEndPayload: + type: object + properties: + type: + $ref: '#/components/schemas/StructuredLogType' + const: span_end + default: span_end + description: Payload type identifier set to SPAN_END + status: + $ref: '#/components/schemas/SpanStatus' + description: >- + The final status of the span indicating success or failure + additionalProperties: false + required: + - type + - status + title: SpanEndPayload + description: Payload for a span end event. + SpanStartPayload: + type: object + properties: + type: + $ref: '#/components/schemas/StructuredLogType' + const: span_start + default: span_start + description: >- + Payload type identifier set to SPAN_START + name: + type: string + description: >- + Human-readable name describing the operation this span represents + parent_span_id: + type: string + description: >- + (Optional) Unique identifier for the parent span, if this is a child span + additionalProperties: false + required: + - type + - name + title: SpanStartPayload + description: Payload for a span start event. + StructuredLogEvent: + type: object + properties: + trace_id: + type: string + description: >- + Unique identifier for the trace this event belongs to + span_id: + type: string + description: >- + Unique identifier for the span this event belongs to + timestamp: + type: string + format: date-time + description: Timestamp when the event occurred + attributes: + type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + description: >- + (Optional) Key-value pairs containing additional metadata about the event + type: + $ref: '#/components/schemas/EventType' + const: structured_log + default: structured_log + description: >- + Event type identifier set to STRUCTURED_LOG + payload: + $ref: '#/components/schemas/StructuredLogPayload' + description: >- + The structured payload data for the log event + additionalProperties: false + required: + - trace_id + - span_id + - timestamp + - type + - payload + title: StructuredLogEvent + description: >- + A structured log event containing typed payload data. + StructuredLogPayload: + oneOf: + - $ref: '#/components/schemas/SpanStartPayload' + - $ref: '#/components/schemas/SpanEndPayload' + discriminator: + propertyName: type + mapping: + span_start: '#/components/schemas/SpanStartPayload' + span_end: '#/components/schemas/SpanEndPayload' + StructuredLogType: + type: string + enum: + - span_start + - span_end + title: StructuredLogType + description: >- + The type of structured log event payload. + UnstructuredLogEvent: + type: object + properties: + trace_id: + type: string + description: >- + Unique identifier for the trace this event belongs to + span_id: + type: string + description: >- + Unique identifier for the span this event belongs to + timestamp: + type: string + format: date-time + description: Timestamp when the event occurred + attributes: + type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + description: >- + (Optional) Key-value pairs containing additional metadata about the event + type: + $ref: '#/components/schemas/EventType' + const: unstructured_log + default: unstructured_log + description: >- + Event type identifier set to UNSTRUCTURED_LOG + message: + type: string + description: The log message text + severity: + $ref: '#/components/schemas/LogSeverity' + description: The severity level of the log message + additionalProperties: false + required: + - trace_id + - span_id + - timestamp + - type + - message + - severity + title: UnstructuredLogEvent + description: >- + An unstructured log event containing a simple text message. + LogEventRequest: + type: object + properties: + event: + $ref: '#/components/schemas/Event' + description: The event to log. + ttl_seconds: + type: integer + description: The time to live of the event. + additionalProperties: false + required: + - event + - ttl_seconds + title: LogEventRequest + VectorStoreChunkingStrategy: + oneOf: + - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' + - $ref: '#/components/schemas/VectorStoreChunkingStrategyStatic' + discriminator: + propertyName: type + mapping: + auto: '#/components/schemas/VectorStoreChunkingStrategyAuto' + static: '#/components/schemas/VectorStoreChunkingStrategyStatic' + VectorStoreChunkingStrategyAuto: + type: object + properties: + type: + type: string + const: auto + default: auto + description: >- + Strategy type, always "auto" for automatic chunking + additionalProperties: false + required: + - type + title: VectorStoreChunkingStrategyAuto + description: >- + Automatic chunking strategy for vector store files. + VectorStoreChunkingStrategyStatic: + type: object + properties: + type: + type: string + const: static + default: static + description: >- + Strategy type, always "static" for static chunking + static: + $ref: '#/components/schemas/VectorStoreChunkingStrategyStaticConfig' + description: >- + Configuration parameters for the static chunking strategy + additionalProperties: false + required: + - type + - static + title: VectorStoreChunkingStrategyStatic + description: >- + Static chunking strategy with configurable parameters. + VectorStoreChunkingStrategyStaticConfig: + type: object + properties: + chunk_overlap_tokens: + type: integer + default: 400 + description: >- + Number of tokens to overlap between adjacent chunks + max_chunk_size_tokens: + type: integer + default: 800 + description: >- + Maximum number of tokens per chunk, must be between 100 and 4096 + additionalProperties: false + required: + - chunk_overlap_tokens + - max_chunk_size_tokens + title: VectorStoreChunkingStrategyStaticConfig + description: >- + Configuration for static chunking strategy. + OpenaiAttachFileToVectorStoreRequest: + type: object + properties: + file_id: + type: string + description: >- + The ID of the file to attach to the vector store. + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The key-value attributes stored with the file, which can be used for filtering. + chunking_strategy: + $ref: '#/components/schemas/VectorStoreChunkingStrategy' + description: >- + The chunking strategy to use for the file. + additionalProperties: false + required: + - file_id + title: OpenaiAttachFileToVectorStoreRequest + VectorStoreFileLastError: + type: object + properties: + code: + oneOf: + - type: string + const: server_error + - type: string + const: rate_limit_exceeded + description: >- + Error code indicating the type of failure + message: + type: string + description: >- + Human-readable error message describing the failure + additionalProperties: false + required: + - code + - message + title: VectorStoreFileLastError + description: >- + Error information for failed vector store file processing. + VectorStoreFileObject: + type: object + properties: + id: + type: string + description: Unique identifier for the file + object: + type: string + default: vector_store.file + description: >- + Object type identifier, always "vector_store.file" + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Key-value attributes associated with the file + chunking_strategy: + $ref: '#/components/schemas/VectorStoreChunkingStrategy' + description: >- + Strategy used for splitting the file into chunks + created_at: + type: integer + description: >- + Timestamp when the file was added to the vector store + last_error: + $ref: '#/components/schemas/VectorStoreFileLastError' + description: >- + (Optional) Error information if file processing failed + status: + $ref: '#/components/schemas/VectorStoreFileStatus' + description: Current processing status of the file + usage_bytes: + type: integer + default: 0 + description: Storage space used by this file in bytes + vector_store_id: + type: string + description: >- + ID of the vector store containing this file + additionalProperties: false + required: + - id + - object + - attributes + - chunking_strategy + - created_at + - status + - usage_bytes + - vector_store_id + title: VectorStoreFileObject + description: OpenAI Vector Store File object. + VectorStoreFileStatus: + oneOf: + - type: string + const: completed + - type: string + const: in_progress + - type: string + const: cancelled + - type: string + const: failed +>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) OpenAIJSONSchema: type: object properties: @@ -9582,6 +11706,1606 @@ components: title: VectorStoreSearchResponsePage description: >- Paginated response from searching a vector store. +<<<<<<< HEAD +======= + OpenaiUpdateVectorStoreRequest: + type: object + properties: + name: + type: string + description: The name of the vector store. + expires_after: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The expiration policy for a vector store. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Set of 16 key-value pairs that can be attached to an object. + additionalProperties: false + title: OpenaiUpdateVectorStoreRequest + OpenaiUpdateVectorStoreFileRequest: + type: object + properties: + attributes: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The updated key-value attributes to store with the file. + additionalProperties: false + required: + - attributes + title: OpenaiUpdateVectorStoreFileRequest + DPOAlignmentConfig: + type: object + properties: + beta: + type: number + description: Temperature parameter for the DPO loss + loss_type: + $ref: '#/components/schemas/DPOLossType' + default: sigmoid + description: The type of loss function to use for DPO + additionalProperties: false + required: + - beta + - loss_type + title: DPOAlignmentConfig + description: >- + Configuration for Direct Preference Optimization (DPO) alignment. + DPOLossType: + type: string + enum: + - sigmoid + - hinge + - ipo + - kto_pair + title: DPOLossType + DataConfig: + type: object + properties: + dataset_id: + type: string + description: >- + Unique identifier for the training dataset + batch_size: + type: integer + description: Number of samples per training batch + shuffle: + type: boolean + description: >- + Whether to shuffle the dataset during training + data_format: + $ref: '#/components/schemas/DatasetFormat' + description: >- + Format of the dataset (instruct or dialog) + validation_dataset_id: + type: string + description: >- + (Optional) Unique identifier for the validation dataset + packed: + type: boolean + default: false + description: >- + (Optional) Whether to pack multiple samples into a single sequence for + efficiency + train_on_input: + type: boolean + default: false + description: >- + (Optional) Whether to compute loss on input tokens as well as output tokens + additionalProperties: false + required: + - dataset_id + - batch_size + - shuffle + - data_format + title: DataConfig + description: >- + Configuration for training data and data loading. + DatasetFormat: + type: string + enum: + - instruct + - dialog + title: DatasetFormat + description: Format of the training dataset. + EfficiencyConfig: + type: object + properties: + enable_activation_checkpointing: + type: boolean + default: false + description: >- + (Optional) Whether to use activation checkpointing to reduce memory usage + enable_activation_offloading: + type: boolean + default: false + description: >- + (Optional) Whether to offload activations to CPU to save GPU memory + memory_efficient_fsdp_wrap: + type: boolean + default: false + description: >- + (Optional) Whether to use memory-efficient FSDP wrapping + fsdp_cpu_offload: + type: boolean + default: false + description: >- + (Optional) Whether to offload FSDP parameters to CPU + additionalProperties: false + title: EfficiencyConfig + description: >- + Configuration for memory and compute efficiency optimizations. + OptimizerConfig: + type: object + properties: + optimizer_type: + $ref: '#/components/schemas/OptimizerType' + description: >- + Type of optimizer to use (adam, adamw, or sgd) + lr: + type: number + description: Learning rate for the optimizer + weight_decay: + type: number + description: >- + Weight decay coefficient for regularization + num_warmup_steps: + type: integer + description: Number of steps for learning rate warmup + additionalProperties: false + required: + - optimizer_type + - lr + - weight_decay + - num_warmup_steps + title: OptimizerConfig + description: >- + Configuration parameters for the optimization algorithm. + OptimizerType: + type: string + enum: + - adam + - adamw + - sgd + title: OptimizerType + description: >- + Available optimizer algorithms for training. + TrainingConfig: + type: object + properties: + n_epochs: + type: integer + description: Number of training epochs to run + max_steps_per_epoch: + type: integer + default: 1 + description: Maximum number of steps to run per epoch + gradient_accumulation_steps: + type: integer + default: 1 + description: >- + Number of steps to accumulate gradients before updating + max_validation_steps: + type: integer + default: 1 + description: >- + (Optional) Maximum number of validation steps per epoch + data_config: + $ref: '#/components/schemas/DataConfig' + description: >- + (Optional) Configuration for data loading and formatting + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + description: >- + (Optional) Configuration for the optimization algorithm + efficiency_config: + $ref: '#/components/schemas/EfficiencyConfig' + description: >- + (Optional) Configuration for memory and compute optimizations + dtype: + type: string + default: bf16 + description: >- + (Optional) Data type for model parameters (bf16, fp16, fp32) + additionalProperties: false + required: + - n_epochs + - max_steps_per_epoch + - gradient_accumulation_steps + title: TrainingConfig + description: >- + Comprehensive configuration for the training process. + PreferenceOptimizeRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to create. + finetuned_model: + type: string + description: The model to fine-tune. + algorithm_config: + $ref: '#/components/schemas/DPOAlignmentConfig' + description: The algorithm configuration. + training_config: + $ref: '#/components/schemas/TrainingConfig' + description: The training configuration. + hyperparam_search_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The hyperparam search configuration. + logger_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The logger configuration. + additionalProperties: false + required: + - job_uuid + - finetuned_model + - algorithm_config + - training_config + - hyperparam_search_config + - logger_config + title: PreferenceOptimizeRequest + PostTrainingJob: + type: object + properties: + job_uuid: + type: string + additionalProperties: false + required: + - job_uuid + title: PostTrainingJob + DefaultRAGQueryGeneratorConfig: + type: object + properties: + type: + type: string + const: default + default: default + description: >- + Type of query generator, always 'default' + separator: + type: string + default: ' ' + description: >- + String separator used to join query terms + additionalProperties: false + required: + - type + - separator + title: DefaultRAGQueryGeneratorConfig + description: >- + Configuration for the default RAG query generator. + LLMRAGQueryGeneratorConfig: + type: object + properties: + type: + type: string + const: llm + default: llm + description: Type of query generator, always 'llm' + model: + type: string + description: >- + Name of the language model to use for query generation + template: + type: string + description: >- + Template string for formatting the query generation prompt + additionalProperties: false + required: + - type + - model + - template + title: LLMRAGQueryGeneratorConfig + description: >- + Configuration for the LLM-based RAG query generator. + RAGQueryConfig: + type: object + properties: + query_generator_config: + $ref: '#/components/schemas/RAGQueryGeneratorConfig' + description: Configuration for the query generator. + max_tokens_in_context: + type: integer + default: 4096 + description: Maximum number of tokens in the context. + max_chunks: + type: integer + default: 5 + description: Maximum number of chunks to retrieve. + chunk_template: + type: string + default: > + Result {index} + + Content: {chunk.content} + + Metadata: {metadata} + description: >- + Template for formatting each retrieved chunk in the context. Available + placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk + content string), {metadata} (chunk metadata dict). Default: "Result {index}\nContent: + {chunk.content}\nMetadata: {metadata}\n" + mode: + $ref: '#/components/schemas/RAGSearchMode' + default: vector + description: >- + Search mode for retrieval—either "vector", "keyword", or "hybrid". Default + "vector". + ranker: + $ref: '#/components/schemas/Ranker' + description: >- + Configuration for the ranker to use in hybrid search. Defaults to RRF + ranker. + additionalProperties: false + required: + - query_generator_config + - max_tokens_in_context + - max_chunks + - chunk_template + title: RAGQueryConfig + description: >- + Configuration for the RAG query generation. + RAGQueryGeneratorConfig: + oneOf: + - $ref: '#/components/schemas/DefaultRAGQueryGeneratorConfig' + - $ref: '#/components/schemas/LLMRAGQueryGeneratorConfig' + discriminator: + propertyName: type + mapping: + default: '#/components/schemas/DefaultRAGQueryGeneratorConfig' + llm: '#/components/schemas/LLMRAGQueryGeneratorConfig' + RAGSearchMode: + type: string + enum: + - vector + - keyword + - hybrid + title: RAGSearchMode + description: >- + Search modes for RAG query retrieval: - VECTOR: Uses vector similarity search + for semantic matching - KEYWORD: Uses keyword-based search for exact matching + - HYBRID: Combines both vector and keyword search for better results + RRFRanker: + type: object + properties: + type: + type: string + const: rrf + default: rrf + description: The type of ranker, always "rrf" + impact_factor: + type: number + default: 60.0 + description: >- + The impact factor for RRF scoring. Higher values give more weight to higher-ranked + results. Must be greater than 0 + additionalProperties: false + required: + - type + - impact_factor + title: RRFRanker + description: >- + Reciprocal Rank Fusion (RRF) ranker configuration. + Ranker: + oneOf: + - $ref: '#/components/schemas/RRFRanker' + - $ref: '#/components/schemas/WeightedRanker' + discriminator: + propertyName: type + mapping: + rrf: '#/components/schemas/RRFRanker' + weighted: '#/components/schemas/WeightedRanker' + WeightedRanker: + type: object + properties: + type: + type: string + const: weighted + default: weighted + description: The type of ranker, always "weighted" + alpha: + type: number + default: 0.5 + description: >- + Weight factor between 0 and 1. 0 means only use keyword scores, 1 means + only use vector scores, values in between blend both scores. + additionalProperties: false + required: + - type + - alpha + title: WeightedRanker + description: >- + Weighted ranker configuration that combines vector and keyword scores. + QueryRequest: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + The query content to search for in the indexed documents + vector_db_ids: + type: array + items: + type: string + description: >- + List of vector database IDs to search within + query_config: + $ref: '#/components/schemas/RAGQueryConfig' + description: >- + (Optional) Configuration parameters for the query operation + additionalProperties: false + required: + - content + - vector_db_ids + title: QueryRequest + RAGQueryResult: + type: object + properties: + content: + $ref: '#/components/schemas/InterleavedContent' + description: >- + (Optional) The retrieved content from the query + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + Additional metadata about the query result + additionalProperties: false + required: + - metadata + title: RAGQueryResult + description: >- + Result of a RAG query containing retrieved content and metadata. + QueryChunksRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to query. + query: + $ref: '#/components/schemas/InterleavedContent' + description: The query to search for. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the query. + additionalProperties: false + required: + - vector_db_id + - query + title: QueryChunksRequest + QueryChunksResponse: + type: object + properties: + chunks: + type: array + items: + $ref: '#/components/schemas/Chunk' + description: >- + List of content chunks returned from the query + scores: + type: array + items: + type: number + description: >- + Relevance scores corresponding to each returned chunk + additionalProperties: false + required: + - chunks + - scores + title: QueryChunksResponse + description: >- + Response from querying chunks in a vector database. + QueryMetricsRequest: + type: object + properties: + start_time: + type: integer + description: The start time of the metric to query. + end_time: + type: integer + description: The end time of the metric to query. + granularity: + type: string + description: The granularity of the metric to query. + query_type: + type: string + enum: + - range + - instant + description: The type of query to perform. + label_matchers: + type: array + items: + type: object + properties: + name: + type: string + description: The name of the label to match + value: + type: string + description: The value to match against + operator: + type: string + enum: + - '=' + - '!=' + - =~ + - '!~' + description: >- + The comparison operator to use for matching + default: '=' + additionalProperties: false + required: + - name + - value + - operator + title: MetricLabelMatcher + description: >- + A matcher for filtering metrics by label values. + description: >- + The label matchers to apply to the metric. + additionalProperties: false + required: + - start_time + - query_type + title: QueryMetricsRequest + MetricDataPoint: + type: object + properties: + timestamp: + type: integer + description: >- + Unix timestamp when the metric value was recorded + value: + type: number + description: >- + The numeric value of the metric at this timestamp + unit: + type: string + additionalProperties: false + required: + - timestamp + - value + - unit + title: MetricDataPoint + description: >- + A single data point in a metric time series. + MetricLabel: + type: object + properties: + name: + type: string + description: The name of the label + value: + type: string + description: The value of the label + additionalProperties: false + required: + - name + - value + title: MetricLabel + description: A label associated with a metric. + MetricSeries: + type: object + properties: + metric: + type: string + description: The name of the metric + labels: + type: array + items: + $ref: '#/components/schemas/MetricLabel' + description: >- + List of labels associated with this metric series + values: + type: array + items: + $ref: '#/components/schemas/MetricDataPoint' + description: >- + List of data points in chronological order + additionalProperties: false + required: + - metric + - labels + - values + title: MetricSeries + description: A time series of metric data points. + QueryMetricsResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/MetricSeries' + description: >- + List of metric series matching the query criteria + additionalProperties: false + required: + - data + title: QueryMetricsResponse + description: >- + Response containing metric time series data. + QueryCondition: + type: object + properties: + key: + type: string + description: The attribute key to filter on + op: + $ref: '#/components/schemas/QueryConditionOp' + description: The comparison operator to apply + value: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The value to compare against + additionalProperties: false + required: + - key + - op + - value + title: QueryCondition + description: A condition for filtering query results. + QueryConditionOp: + type: string + enum: + - eq + - ne + - gt + - lt + title: QueryConditionOp + description: >- + Comparison operators for query conditions. + QuerySpansRequest: + type: object + properties: + attribute_filters: + type: array + items: + $ref: '#/components/schemas/QueryCondition' + description: >- + The attribute filters to apply to the spans. + attributes_to_return: + type: array + items: + type: string + description: The attributes to return in the spans. + max_depth: + type: integer + description: The maximum depth of the tree. + additionalProperties: false + required: + - attribute_filters + - attributes_to_return + title: QuerySpansRequest + QuerySpansResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Span' + description: >- + List of spans matching the query criteria + additionalProperties: false + required: + - data + title: QuerySpansResponse + description: Response containing a list of spans. + QueryTracesRequest: + type: object + properties: + attribute_filters: + type: array + items: + $ref: '#/components/schemas/QueryCondition' + description: >- + The attribute filters to apply to the traces. + limit: + type: integer + description: The limit of traces to return. + offset: + type: integer + description: The offset of the traces to return. + order_by: + type: array + items: + type: string + description: The order by of the traces to return. + additionalProperties: false + title: QueryTracesRequest + QueryTracesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/Trace' + description: >- + List of traces matching the query criteria + additionalProperties: false + required: + - data + title: QueryTracesResponse + description: Response containing a list of traces. + RegisterBenchmarkRequest: + type: object + properties: + benchmark_id: + type: string + description: The ID of the benchmark to register. + dataset_id: + type: string + description: >- + The ID of the dataset to use for the benchmark. + scoring_functions: + type: array + items: + type: string + description: >- + The scoring functions to use for the benchmark. + provider_benchmark_id: + type: string + description: >- + The ID of the provider benchmark to use for the benchmark. + provider_id: + type: string + description: >- + The ID of the provider to use for the benchmark. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The metadata to use for the benchmark. + additionalProperties: false + required: + - benchmark_id + - dataset_id + - scoring_functions + title: RegisterBenchmarkRequest + RegisterDatasetRequest: + type: object + properties: + purpose: + type: string + enum: + - post-training/messages + - eval/question-answer + - eval/messages-answer + description: >- + The purpose of the dataset. One of: - "post-training/messages": The dataset + contains a messages column with list of messages for post-training. { + "messages": [ {"role": "user", "content": "Hello, world!"}, {"role": "assistant", + "content": "Hello, world!"}, ] } - "eval/question-answer": The dataset + contains a question column and an answer column for evaluation. { "question": + "What is the capital of France?", "answer": "Paris" } - "eval/messages-answer": + The dataset contains a messages column with list of messages and an answer + column for evaluation. { "messages": [ {"role": "user", "content": "Hello, + my name is John Doe."}, {"role": "assistant", "content": "Hello, John + Doe. How can I help you today?"}, {"role": "user", "content": "What's + my name?"}, ], "answer": "John Doe" } + source: + $ref: '#/components/schemas/DataSource' + description: >- + The data source of the dataset. Ensure that the data source schema is + compatible with the purpose of the dataset. Examples: - { "type": "uri", + "uri": "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri": + "lsfs://mydata.jsonl" } - { "type": "uri", "uri": "data:csv;base64,{base64_content}" + } - { "type": "uri", "uri": "huggingface://llamastack/simpleqa?split=train" + } - { "type": "rows", "rows": [ { "messages": [ {"role": "user", "content": + "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] + } ] } + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + The metadata for the dataset. - E.g. {"description": "My dataset"}. + dataset_id: + type: string + description: >- + The ID of the dataset. If not provided, an ID will be generated. + additionalProperties: false + required: + - purpose + - source + title: RegisterDatasetRequest + RegisterModelRequest: + type: object + properties: + model_id: + type: string + description: The identifier of the model to register. + provider_model_id: + type: string + description: >- + The identifier of the model in the provider. + provider_id: + type: string + description: The identifier of the provider. + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: Any additional metadata for this model. + model_type: + $ref: '#/components/schemas/ModelType' + description: The type of model to register. + additionalProperties: false + required: + - model_id + title: RegisterModelRequest + RegisterScoringFunctionRequest: + type: object + properties: + scoring_fn_id: + type: string + description: >- + The ID of the scoring function to register. + description: + type: string + description: The description of the scoring function. + return_type: + $ref: '#/components/schemas/ParamType' + description: The return type of the scoring function. + provider_scoring_fn_id: + type: string + description: >- + The ID of the provider scoring function to use for the scoring function. + provider_id: + type: string + description: >- + The ID of the provider to use for the scoring function. + params: + $ref: '#/components/schemas/ScoringFnParams' + description: >- + The parameters for the scoring function for benchmark eval, these can + be overridden for app eval. + additionalProperties: false + required: + - scoring_fn_id + - description + - return_type + title: RegisterScoringFunctionRequest + RegisterShieldRequest: + type: object + properties: + shield_id: + type: string + description: >- + The identifier of the shield to register. + provider_shield_id: + type: string + description: >- + The identifier of the shield in the provider. + provider_id: + type: string + description: The identifier of the provider. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the shield. + additionalProperties: false + required: + - shield_id + title: RegisterShieldRequest + RegisterToolGroupRequest: + type: object + properties: + toolgroup_id: + type: string + description: The ID of the tool group to register. + provider_id: + type: string + description: >- + The ID of the provider to use for the tool group. + mcp_endpoint: + $ref: '#/components/schemas/URL' + description: >- + The MCP endpoint to use for the tool group. + args: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + A dictionary of arguments to pass to the tool group. + additionalProperties: false + required: + - toolgroup_id + - provider_id + title: RegisterToolGroupRequest + RegisterVectorDbRequest: + type: object + properties: + vector_db_id: + type: string + description: >- + The identifier of the vector database to register. + embedding_model: + type: string + description: The embedding model to use. + embedding_dimension: + type: integer + description: The dimension of the embedding model. + provider_id: + type: string + description: The identifier of the provider. + vector_db_name: + type: string + description: The name of the vector database. + provider_vector_db_id: + type: string + description: >- + The identifier of the vector database in the provider. + additionalProperties: false + required: + - vector_db_id + - embedding_model + title: RegisterVectorDbRequest + RerankRequest: + type: object + properties: + model: + type: string + description: >- + The identifier of the reranking model to use. The model must be a reranking + model registered with Llama Stack and available via the /models endpoint. + query: + oneOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + description: >- + The search query to rank items against. Can be a string, text content + part, or image content part. The input must not exceed the model's max + input token length. + items: + type: array + items: + oneOf: + - type: string + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' + - $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam' + description: >- + List of items to rerank. Each item can be a string, text content part, + or image content part. Each input must not exceed the model's max input + token length. + max_num_results: + type: integer + description: >- + (Optional) Maximum number of results to return. Default: returns all. + additionalProperties: false + required: + - model + - query + - items + title: RerankRequest + RerankData: + type: object + properties: + index: + type: integer + description: >- + The original index of the document in the input list + relevance_score: + type: number + description: >- + The relevance score from the model output. Values are inverted when applicable + so that higher scores indicate greater relevance. + additionalProperties: false + required: + - index + - relevance_score + title: RerankData + description: >- + A single rerank result from a reranking response. + RerankResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/RerankData' + description: >- + List of rerank result objects, sorted by relevance score (descending) + additionalProperties: false + required: + - data + title: RerankResponse + description: Response from a reranking request. + ResumeAgentTurnRequest: + type: object + properties: + tool_responses: + type: array + items: + $ref: '#/components/schemas/ToolResponse' + description: >- + The tool call responses to resume the turn with. + stream: + type: boolean + description: Whether to stream the response. + additionalProperties: false + required: + - tool_responses + title: ResumeAgentTurnRequest + RunEvalRequest: + type: object + properties: + benchmark_config: + $ref: '#/components/schemas/BenchmarkConfig' + description: The configuration for the benchmark. + additionalProperties: false + required: + - benchmark_config + title: RunEvalRequest + RunModerationRequest: + type: object + properties: + input: + oneOf: + - type: string + - type: array + items: + type: string + description: >- + Input (or inputs) to classify. Can be a single string, an array of strings, + or an array of multi-modal input objects similar to other models. + model: + type: string + description: >- + The content moderation model you would like to use. + additionalProperties: false + required: + - input + - model + title: RunModerationRequest + ModerationObject: + type: object + properties: + id: + type: string + description: >- + The unique identifier for the moderation request. + model: + type: string + description: >- + The model used to generate the moderation results. + results: + type: array + items: + $ref: '#/components/schemas/ModerationObjectResults' + description: A list of moderation objects + additionalProperties: false + required: + - id + - model + - results + title: ModerationObject + description: A moderation object. + ModerationObjectResults: + type: object + properties: + flagged: + type: boolean + description: >- + Whether any of the below categories are flagged. + categories: + type: object + additionalProperties: + type: boolean + description: >- + A list of the categories, and whether they are flagged or not. + category_applied_input_types: + type: object + additionalProperties: + type: array + items: + type: string + description: >- + A list of the categories along with the input type(s) that the score applies + to. + category_scores: + type: object + additionalProperties: + type: number + description: >- + A list of the categories along with their scores as predicted by model. + user_message: + type: string + metadata: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + additionalProperties: false + required: + - flagged + - metadata + title: ModerationObjectResults + description: A moderation object. + RunShieldRequest: + type: object + properties: + shield_id: + type: string + description: The identifier of the shield to run. + messages: + type: array + items: + $ref: '#/components/schemas/Message' + description: The messages to run the shield on. + params: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The parameters of the shield. + additionalProperties: false + required: + - shield_id + - messages + - params + title: RunShieldRequest + RunShieldResponse: + type: object + properties: + violation: + $ref: '#/components/schemas/SafetyViolation' + description: >- + (Optional) Safety violation detected by the shield, if any + additionalProperties: false + title: RunShieldResponse + description: Response from running a safety shield. + SaveSpansToDatasetRequest: + type: object + properties: + attribute_filters: + type: array + items: + $ref: '#/components/schemas/QueryCondition' + description: >- + The attribute filters to apply to the spans. + attributes_to_save: + type: array + items: + type: string + description: The attributes to save to the dataset. + dataset_id: + type: string + description: >- + The ID of the dataset to save the spans to. + max_depth: + type: integer + description: The maximum depth of the tree. + additionalProperties: false + required: + - attribute_filters + - attributes_to_save + - dataset_id + title: SaveSpansToDatasetRequest + ScoreRequest: + type: object + properties: + input_rows: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The rows to score. + scoring_functions: + type: object + additionalProperties: + oneOf: + - $ref: '#/components/schemas/ScoringFnParams' + - type: 'null' + description: >- + The scoring functions to use for the scoring. + additionalProperties: false + required: + - input_rows + - scoring_functions + title: ScoreRequest + ScoreResponse: + type: object + properties: + results: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: >- + A map of scoring function name to ScoringResult. + additionalProperties: false + required: + - results + title: ScoreResponse + description: The response from scoring. + ScoreBatchRequest: + type: object + properties: + dataset_id: + type: string + description: The ID of the dataset to score. + scoring_functions: + type: object + additionalProperties: + oneOf: + - $ref: '#/components/schemas/ScoringFnParams' + - type: 'null' + description: >- + The scoring functions to use for the scoring. + save_results_dataset: + type: boolean + description: >- + Whether to save the results to a dataset. + additionalProperties: false + required: + - dataset_id + - scoring_functions + - save_results_dataset + title: ScoreBatchRequest + ScoreBatchResponse: + type: object + properties: + dataset_id: + type: string + description: >- + (Optional) The identifier of the dataset that was scored + results: + type: object + additionalProperties: + $ref: '#/components/schemas/ScoringResult' + description: >- + A map of scoring function name to ScoringResult + additionalProperties: false + required: + - results + title: ScoreBatchResponse + description: >- + Response from batch scoring operations on datasets. + SetDefaultVersionRequest: + type: object + properties: + version: + type: integer + description: The version to set as default. + additionalProperties: false + required: + - version + title: SetDefaultVersionRequest + AlgorithmConfig: + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + - $ref: '#/components/schemas/QATFinetuningConfig' + discriminator: + propertyName: type + mapping: + LoRA: '#/components/schemas/LoraFinetuningConfig' + QAT: '#/components/schemas/QATFinetuningConfig' + LoraFinetuningConfig: + type: object + properties: + type: + type: string + const: LoRA + default: LoRA + description: Algorithm type identifier, always "LoRA" + lora_attn_modules: + type: array + items: + type: string + description: >- + List of attention module names to apply LoRA to + apply_lora_to_mlp: + type: boolean + description: Whether to apply LoRA to MLP layers + apply_lora_to_output: + type: boolean + description: >- + Whether to apply LoRA to output projection layers + rank: + type: integer + description: >- + Rank of the LoRA adaptation (lower rank = fewer parameters) + alpha: + type: integer + description: >- + LoRA scaling parameter that controls adaptation strength + use_dora: + type: boolean + default: false + description: >- + (Optional) Whether to use DoRA (Weight-Decomposed Low-Rank Adaptation) + quantize_base: + type: boolean + default: false + description: >- + (Optional) Whether to quantize the base model weights + additionalProperties: false + required: + - type + - lora_attn_modules + - apply_lora_to_mlp + - apply_lora_to_output + - rank + - alpha + title: LoraFinetuningConfig + description: >- + Configuration for Low-Rank Adaptation (LoRA) fine-tuning. + QATFinetuningConfig: + type: object + properties: + type: + type: string + const: QAT + default: QAT + description: Algorithm type identifier, always "QAT" + quantizer_name: + type: string + description: >- + Name of the quantization algorithm to use + group_size: + type: integer + description: Size of groups for grouped quantization + additionalProperties: false + required: + - type + - quantizer_name + - group_size + title: QATFinetuningConfig + description: >- + Configuration for Quantization-Aware Training (QAT) fine-tuning. + SupervisedFineTuneRequest: + type: object + properties: + job_uuid: + type: string + description: The UUID of the job to create. + training_config: + $ref: '#/components/schemas/TrainingConfig' + description: The training configuration. + hyperparam_search_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The hyperparam search configuration. + logger_config: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: The logger configuration. + model: + type: string + description: The model to fine-tune. + checkpoint_dir: + type: string + description: The directory to save checkpoint(s) to. + algorithm_config: + $ref: '#/components/schemas/AlgorithmConfig' + description: The algorithm configuration. + additionalProperties: false + required: + - job_uuid + - training_config + - hyperparam_search_config + - logger_config + title: SupervisedFineTuneRequest + SyntheticDataGenerateRequest: + type: object + properties: + dialogs: + type: array + items: + $ref: '#/components/schemas/Message' + description: >- + List of conversation messages to use as input for synthetic data generation + filtering_function: + type: string + enum: + - none + - random + - top_k + - top_p + - top_k_top_p + - sigmoid + description: >- + Type of filtering to apply to generated synthetic data samples + model: + type: string + description: >- + (Optional) The identifier of the model to use. The model must be registered + with Llama Stack and available via the /models endpoint + additionalProperties: false + required: + - dialogs + - filtering_function + title: SyntheticDataGenerateRequest + SyntheticDataGenerationResponse: + type: object + properties: + synthetic_data: + type: array + items: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + List of generated synthetic data samples that passed the filtering criteria + statistics: + type: object + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + description: >- + (Optional) Statistical information about the generation process and filtering + results + additionalProperties: false + required: + - synthetic_data + title: SyntheticDataGenerationResponse + description: >- + Response from the synthetic data generation. Batch of (prompt, response, score) + tuples that pass the threshold. + UpdatePromptRequest: + type: object + properties: + prompt: + type: string + description: The updated prompt text content. + version: + type: integer + description: >- + The current version of the prompt being updated. + variables: + type: array + items: + type: string + description: >- + Updated list of variable names that can be used in the prompt template. + set_as_default: + type: boolean + description: >- + Set the new version as the default (default=True). + additionalProperties: false + required: + - prompt + - version + - set_as_default + title: UpdatePromptRequest +>>>>>>> f7acfa0f (Add rerank API for NVIDIA Inference Provider) VersionInfo: type: object properties: diff --git a/example.py b/example.py new file mode 100644 index 000000000..7e968e24a --- /dev/null +++ b/example.py @@ -0,0 +1,257 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import os + +os.environ["NVIDIA_API_KEY"] = "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" +# Option 1: Use default NIM URL (will auto-switch to ai.api.nvidia.com for rerank) +# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com" +# Option 2: Use AI Foundation URL directly for rerank models +# os.environ["NVIDIA_BASE_URL"] = "https://ai.api.nvidia.com/v1" +os.environ["NVIDIA_BASE_URL"] = "https://integrate.api.nvidia.com" + +import base64 +import io +from PIL import Image + +from llama_stack.core.library_client import LlamaStackAsLibraryClient + +client = LlamaStackAsLibraryClient("nvidia") +client.initialize() + +# # response = client.inference.completion( +# # model_id="meta/llama-3.1-8b-instruct", +# # content="Complete the sentence using one word: Roses are red, violets are :", +# # stream=False, +# # sampling_params={ +# # "max_tokens": 50, +# # }, +# # ) +# # print(f"Response: {response.content}") + + +# response = client.inference.chat_completion( +# model_id="nvidia/nvidia-nemotron-nano-9b-v2", +# messages=[ +# { +# "role": "system", +# "content": "/think", +# }, +# { +# "role": "user", +# "content": "How are you?", +# }, +# ], +# stream=False, +# sampling_params={ +# "max_tokens": 1024, +# }, +# ) +# print(f"Response: {response}") + + +print(client.models.list()) +rerank_response = client.inference.rerank( + model="nvidia/llama-3.2-nv-rerankqa-1b-v2", + query="query", + items=[ + "item_1", + "item_2", + "item_3", + ] +) + +print(rerank_response) +for i, result in enumerate(rerank_response): + print(f"{i+1}. [Index: {result.index}, " + f"Score: {(result.relevance_score):.3f}]") + +# # from llama_stack.models.llama.datatypes import ToolDefinition, ToolParamDefinition + +# # tool_definition = ToolDefinition( +# # tool_name="get_weather", +# # description="Get current weather information for a location", +# # parameters={ +# # "location": ToolParamDefinition( +# # param_type="string", +# # description="The city and state, e.g. San Francisco, CA", +# # required=True +# # ), +# # "unit": ToolParamDefinition( +# # param_type="string", +# # description="Temperature unit (celsius or fahrenheit)", +# # required=False, +# # default="celsius" +# # ) +# # } +# # ) + +# # # tool_response = client.inference.chat_completion( +# # # model_id="meta-llama/Llama-3.1-8B-Instruct", +# # # messages=[ +# # # {"role": "user", "content": "What's the weather like in San Francisco?"} +# # # ], +# # # tools=[tool_definition], +# # # ) + +# # # print(f"Tool Response: {tool_response.completion_message.content}") +# # # if tool_response.completion_message.tool_calls: +# # # for tool_call in tool_response.completion_message.tool_calls: +# # # print(f"Tool Called: {tool_call.tool_name}") +# # # print(f"Arguments: {tool_call.arguments}") + + +# # # from llama_stack.apis.inference import JsonSchemaResponseFormat, ResponseFormatType + +# # # person_schema = { +# # # "type": "object", +# # # "properties": { +# # # "name": {"type": "string"}, +# # # "age": {"type": "integer"}, +# # # "occupation": {"type": "string"}, +# # # }, +# # # "required": ["name", "age", "occupation"] +# # # } + +# # # response_format = JsonSchemaResponseFormat( +# # # type=ResponseFormatType.json_schema, +# # # json_schema=person_schema +# # # ) + +# # # structured_response = client.inference.chat_completion( +# # # model_id="meta-llama/Llama-3.1-8B-Instruct", +# # # messages=[ +# # # { +# # # "role": "user", +# # # "content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. " +# # # } +# # # ], +# # # response_format=response_format, +# # # ) + +# # # print(f"Structured Response: {structured_response.completion_message.content}") + +# # # print("\n" + "="*50) +# # # print("VISION LANGUAGE MODEL (VLM) EXAMPLE") +# # # print("="*50) + +# # def load_image_as_base64(image_path): +# # with open(image_path, "rb") as image_file: +# # img_bytes = image_file.read() +# # return base64.b64encode(img_bytes).decode("utf-8") + +# # image_path = "/home/jiayin/llama-stack/docs/dog.jpg" +# # demo_image_b64 = load_image_as_base64(image_path) + +# # vlm_response = client.inference.chat_completion( +# # model_id="nvidia/vila", +# # messages=[ +# # { +# # "role": "user", +# # "content": [ +# # { +# # "type": "image", +# # "image": { +# # "data": demo_image_b64, +# # }, +# # }, +# # { +# # "type": "text", +# # "text": "Please describe what you see in this image in detail.", +# # }, +# # ], +# # } +# # ], +# # ) + +# # print(f"VLM Response: {vlm_response.completion_message.content}") + +# # # print("\n" + "="*50) +# # # print("EMBEDDING EXAMPLE") +# # # print("="*50) + +# # # # Embedding example +# # # embedding_response = client.inference.embeddings( +# # # model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", +# # # contents=["Hello world", "How are you today?"], +# # # task_type="query" +# # # ) + +# # # print(f"Number of embeddings: {len(embedding_response.embeddings)}") +# # # print(f"Embedding dimension: {len(embedding_response.embeddings[0])}") +# # # print(f"First few values: {embedding_response.embeddings[0][:5]}") + +# # # # from openai import OpenAI + +# # # # client = OpenAI( +# # # # base_url = "http://10.176.230.61:8000/v1", +# # # # api_key = "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" +# # # # ) + +# # # # # completion = client.completions.create( +# # # # # model="meta/llama-3.1-405b-instruct", +# # # # # prompt="How are you?", +# # # # # temperature=0.2, +# # # # # top_p=0.7, +# # # # # max_tokens=1024, +# # # # # stream=False +# # # # # ) + +# # # # # # completion = client.chat.completions.create( +# # # # # # model="meta/llama-3.1-8b-instruct", +# # # # # # messages=[{"role":"user","content":"hi"}], +# # # # # # temperature=0.2, +# # # # # # top_p=0.7, +# # # # # # max_tokens=1024, +# # # # # # stream=True +# # # # # # ) + +# # # # # for chunk in completion: +# # # # # if chunk.choices[0].delta.content is not None: +# # # # # print(chunk.choices[0].delta.content, end="") + + +# # # # # response = client.inference.completion( +# # # # # model_id="meta/llama-3.1-8b-instruct", +# # # # # content="Complete the sentence using one word: Roses are red, violets are :", +# # # # # stream=False, +# # # # # sampling_params={ +# # # # # "max_tokens": 50, +# # # # # }, +# # # # # ) +# # # # # print(f"Response: {response.content}") + + + + +# from openai import OpenAI + +# client = OpenAI( +# base_url = "https://integrate.api.nvidia.com/v1", +# api_key = "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" +# ) + +# completion = client.chat.completions.create( +# model="nvidia/nvidia-nemotron-nano-9b-v2", +# messages=[{"role":"system","content":"/think"}], +# temperature=0.6, +# top_p=0.95, +# max_tokens=2048, +# frequency_penalty=0, +# presence_penalty=0, +# stream=True, +# extra_body={ +# "min_thinking_tokens": 1024, +# "max_thinking_tokens": 2048 +# } +# ) + +# for chunk in completion: +# reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None) +# if reasoning: +# print(reasoning, end="") +# if chunk.choices[0].delta.content is not None: +# print(chunk.choices[0].delta.content, end="") diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index e88a16315..e452d8157 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -1016,7 +1016,7 @@ class InferenceProvider(Protocol): ) -> RerankResponse: """Rerank a list of documents based on their relevance to a query. - :param model: The identifier of the reranking model to use. + :param model: The identifier of the reranking model to use. The model must be a reranking model registered with Llama Stack and available via the /models endpoint. :param query: The search query to rank items against. Can be a string, text content part, or image content part. The input must not exceed the model's max input token length. :param items: List of items to rerank. Each item can be a string, text content part, or image content part. Each input must not exceed the model's max input token length. :param max_num_results: (Optional) Maximum number of results to return. Default: returns all. diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 210ed9246..359f5bf0c 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -27,10 +27,12 @@ class ModelType(StrEnum): """Enumeration of supported model types in Llama Stack. :cvar llm: Large language model for text generation and completion :cvar embedding: Embedding model for converting text to vector representations + :cvar rerank: Reranking model for reordering documents by relevance """ llm = "llm" embedding = "embedding" + rerank = "rerank" @json_schema_type diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py index c4338e614..e5826685e 100644 --- a/llama_stack/core/routers/inference.py +++ b/llama_stack/core/routers/inference.py @@ -41,9 +41,14 @@ from llama_stack.apis.inference import ( OpenAIMessageParam, OpenAIResponseFormatParam, Order, + RerankResponse, StopReason, ToolPromptFormat, ) +from llama_stack.apis.inference.inference import ( + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, +) from llama_stack.apis.models import Model, ModelType from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.log import get_logger @@ -179,6 +184,25 @@ class InferenceRouter(Inference): raise ModelTypeError(model_id, model.model_type, expected_model_type) return model + async def rerank( + self, + model: str, + query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, + items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], + max_num_results: int | None = None, + ) -> RerankResponse: + """Route rerank requests to the appropriate provider based on the model.""" + logger.debug(f"InferenceRouter.rerank: {model}") + model_obj = await self._get_model(model, ModelType.rerank) + provider = await self.routing_table.get_provider_impl(model_obj.identifier) + return await provider.rerank( + model=model_obj.identifier, + query=query, + items=items, + max_num_results=max_num_results, + ) + + async def openai_completion( self, model: str, diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py new file mode 100644 index 000000000..a79a1c6aa --- /dev/null +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -0,0 +1,131 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.apis.models import ModelType +from llama_stack.models.llama.sku_types import CoreModelId +from llama_stack.providers.utils.inference.model_registry import ( + ProviderModelEntry, + build_hf_repo_model_entry, +) + +SAFETY_MODELS_ENTRIES = [] + +# https://docs.nvidia.com/nim/large-language-models/latest/supported-llm-agnostic-architectures.html +MODEL_ENTRIES = [ + build_hf_repo_model_entry( + "meta/llama3-8b-instruct", + CoreModelId.llama3_8b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama3-70b-instruct", + CoreModelId.llama3_70b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.1-8b-instruct", + CoreModelId.llama3_1_8b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.1-70b-instruct", + CoreModelId.llama3_1_70b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.1-405b-instruct", + CoreModelId.llama3_1_405b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.2-1b-instruct", + CoreModelId.llama3_2_1b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.2-3b-instruct", + CoreModelId.llama3_2_3b_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.2-11b-vision-instruct", + CoreModelId.llama3_2_11b_vision_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.2-90b-vision-instruct", + CoreModelId.llama3_2_90b_vision_instruct.value, + ), + build_hf_repo_model_entry( + "meta/llama-3.3-70b-instruct", + CoreModelId.llama3_3_70b_instruct.value, + ), + ProviderModelEntry( + provider_model_id="nvidia/vila", + model_type=ModelType.llm, + ), + # NeMo Retriever Text Embedding models - + # + # https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html + # + # +-----------------------------------+--------+-----------+-----------+------------+ + # | Model ID | Max | Publisher | Embedding | Dynamic | + # | | Tokens | | Dimension | Embeddings | + # +-----------------------------------+--------+-----------+-----------+------------+ + # | nvidia/llama-3.2-nv-embedqa-1b-v2 | 8192 | NVIDIA | 2048 | Yes | + # | nvidia/nv-embedqa-e5-v5 | 512 | NVIDIA | 1024 | No | + # | nvidia/nv-embedqa-mistral-7b-v2 | 512 | NVIDIA | 4096 | No | + # | snowflake/arctic-embed-l | 512 | Snowflake | 1024 | No | + # +-----------------------------------+--------+-----------+-----------+------------+ + ProviderModelEntry( + provider_model_id="nvidia/llama-3.2-nv-embedqa-1b-v2", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 2048, + "context_length": 8192, + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/nv-embedqa-e5-v5", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 1024, + "context_length": 512, + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/nv-embedqa-mistral-7b-v2", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 4096, + "context_length": 512, + }, + ), + ProviderModelEntry( + provider_model_id="snowflake/arctic-embed-l", + model_type=ModelType.embedding, + metadata={ + "embedding_dimension": 1024, + "context_length": 512, + }, + ), + # NVIDIA Reranking models + ProviderModelEntry( + provider_model_id="nv-rerank-qa-mistral-4b:1", + model_type=ModelType.rerank, + metadata={ + "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/nv-rerankqa-mistral-4b-v3", + model_type=ModelType.rerank, + metadata={ + "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking", + }, + ), + ProviderModelEntry( + provider_model_id="nvidia/llama-3.2-nv-rerankqa-1b-v2", + model_type=ModelType.rerank, + metadata={ + "endpoint": "https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking", + }, + ), + # TODO(mf): how do we handle Nemotron models? + # "Llama3.1-Nemotron-51B-Instruct" -> "meta/llama-3.1-nemotron-51b-instruct", +] + SAFETY_MODELS_ENTRIES diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 2e6c3d769..b2fdec61f 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -12,6 +12,12 @@ from llama_stack.apis.inference import ( OpenAIEmbeddingData, OpenAIEmbeddingsResponse, OpenAIEmbeddingUsage, + RerankData, + RerankResponse, +) +from llama_stack.apis.inference.inference import ( + OpenAIChatCompletionContentPartImageParam, + OpenAIChatCompletionContentPartTextParam, ) from llama_stack.log import get_logger from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -80,6 +86,80 @@ class NVIDIAInferenceAdapter(OpenAIMixin, Inference): """ return f"{self._config.url}/v1" if self._config.append_api_version else self._config.url + async def rerank( + self, + model: str, + query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam, + items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam], + max_num_results: int | None = None, + ) -> RerankResponse: + provider_model_id = await self._get_provider_model_id(model) + + ranking_url = self.get_base_url() + model_obj = await self.model_store.get_model(model) + + if _is_nvidia_hosted(self._config) and "endpoint" in model_obj.metadata: + ranking_url = model_obj.metadata["endpoint"] + + logger.debug(f"Using rerank endpoint: {ranking_url} for model: {provider_model_id}") + + # Convert query to text format + if isinstance(query, str): + query_text = query + elif hasattr(query, "text"): + query_text = query.text + else: + raise ValueError("Query must be a string or text content part") + + # Convert items to text format + passages = [] + for item in items: + if isinstance(item, str): + passages.append({"text": item}) + elif hasattr(item, "text"): + passages.append({"text": item.text}) + else: + raise ValueError("Items must be strings or text content parts") + + payload = { + "model": provider_model_id, + "query": {"text": query_text}, + "passages": passages, + } + + headers = { + "Authorization": f"Bearer {self.get_api_key()}", + "Content-Type": "application/json", + } + + import aiohttp + + try: + async with aiohttp.ClientSession() as session: + async with session.post(ranking_url, headers=headers, json=payload) as response: + if response.status != 200: + response_text = await response.text() + raise ConnectionError( + f"NVIDIA rerank API request failed with status {response.status}: {response_text}" + ) + + result = await response.json() + rankings = result.get("rankings", []) + + # Convert to RerankData format + rerank_data = [] + for ranking in rankings: + rerank_data.append(RerankData(index=ranking["index"], relevance_score=ranking["logit"])) + + # Apply max_num_results limit if specified + if max_num_results is not None: + rerank_data = rerank_data[:max_num_results] + + return RerankResponse(data=rerank_data) + + except aiohttp.ClientError as e: + raise ConnectionError(f"Failed to connect to NVIDIA rerank API at {ranking_url}: {e}") from e + async def openai_embeddings( self, model: str,