Merge remote-tracking branch 'origin/main' into TamiTakamiya/tool-param-definition-update

This commit is contained in:
Ashwin Bharambe 2025-09-27 10:47:08 -07:00
commit c1818350c8
479 changed files with 74743 additions and 8997 deletions

View file

@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
)
from llama_stack.apis.safety import SafetyViolation
from llama_stack.apis.tools import ToolDef
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
from .openai_responses import (
@ -481,7 +482,7 @@ class Agents(Protocol):
- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
"""
@webmethod(route="/agents", method="POST", descriptive_name="create_agent")
@webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
async def create_agent(
self,
agent_config: AgentConfig,
@ -494,7 +495,10 @@ class Agents(Protocol):
...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn"
route="/agents/{agent_id}/session/{session_id}/turn",
method="POST",
descriptive_name="create_agent_turn",
level=LLAMA_STACK_API_V1,
)
async def create_agent_turn(
self,
@ -524,6 +528,7 @@ class Agents(Protocol):
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
method="POST",
descriptive_name="resume_agent_turn",
level=LLAMA_STACK_API_V1,
)
async def resume_agent_turn(
self,
@ -549,6 +554,7 @@ class Agents(Protocol):
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
method="GET",
level=LLAMA_STACK_API_V1,
)
async def get_agents_turn(
self,
@ -568,6 +574,7 @@ class Agents(Protocol):
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
method="GET",
level=LLAMA_STACK_API_V1,
)
async def get_agents_step(
self,
@ -586,7 +593,12 @@ class Agents(Protocol):
"""
...
@webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session")
@webmethod(
route="/agents/{agent_id}/session",
method="POST",
descriptive_name="create_agent_session",
level=LLAMA_STACK_API_V1,
)
async def create_agent_session(
self,
agent_id: str,
@ -600,7 +612,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_agents_session(
self,
session_id: str,
@ -616,7 +628,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
@webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_agents_session(
self,
session_id: str,
@ -629,7 +641,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/agents/{agent_id}", method="DELETE")
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_agent(
self,
agent_id: str,
@ -640,7 +652,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/agents", method="GET")
@webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
"""List all agents.
@ -650,7 +662,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/agents/{agent_id}", method="GET")
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_agent(self, agent_id: str) -> Agent:
"""Describe an agent by its ID.
@ -659,7 +671,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/agents/{agent_id}/sessions", method="GET")
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
async def list_agent_sessions(
self,
agent_id: str,
@ -682,7 +694,7 @@ class Agents(Protocol):
#
# Both of these APIs are inherently stateful.
@webmethod(route="/openai/v1/responses/{response_id}", method="GET")
@webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_openai_response(
self,
response_id: str,
@ -694,7 +706,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses", method="POST")
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
async def create_openai_response(
self,
input: str | list[OpenAIResponseInput],
@ -719,7 +731,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses", method="GET")
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_responses(
self,
after: str | None = None,
@ -737,7 +749,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_response_input_items(
self,
response_id: str,
@ -759,7 +771,7 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
"""Delete an OpenAI response by its ID.

View file

@ -1,7 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .batch_inference import *

View file

@ -1,78 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from typing import Protocol, runtime_checkable
from llama_stack.apis.common.job_types import Job
from llama_stack.apis.inference import (
InterleavedContent,
LogProbConfig,
Message,
ResponseFormat,
SamplingParams,
ToolChoice,
ToolDefinition,
ToolPromptFormat,
)
from llama_stack.schema_utils import webmethod
@runtime_checkable
class BatchInference(Protocol):
"""Batch inference API for generating completions and chat completions.
This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.
NOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs
including (post-training, evals, etc).
"""
@webmethod(route="/batch-inference/completion", method="POST")
async def completion(
self,
model: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> Job:
"""Generate completions for a batch of content.
:param model: The model to use for the completion.
:param content_batch: The content to complete.
:param sampling_params: The sampling parameters to use for the completion.
:param response_format: The response format to use for the completion.
:param logprobs: The logprobs to use for the completion.
:returns: A job for the completion.
"""
...
@webmethod(route="/batch-inference/chat-completion", method="POST")
async def chat_completion(
self,
model: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
# zero-shot tool definitions as input to the model
tools: list[ToolDefinition] | None = None,
tool_choice: ToolChoice | None = ToolChoice.auto,
tool_prompt_format: ToolPromptFormat | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> Job:
"""Generate chat completions for a batch of messages.
:param model: The model to use for the chat completion.
:param messages_batch: The messages to complete.
:param sampling_params: The sampling parameters to use for the completion.
:param tools: The tools to use for the chat completion.
:param tool_choice: The tool choice to use for the chat completion.
:param tool_prompt_format: The tool prompt format to use for the chat completion.
:param response_format: The response format to use for the chat completion.
:param logprobs: The logprobs to use for the chat completion.
:returns: A job for the chat completion.
"""
...

View file

@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, webmethod
try:
@ -42,7 +43,7 @@ class Batches(Protocol):
Note: This API is currently under active development and may undergo changes.
"""
@webmethod(route="/openai/v1/batches", method="POST")
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
async def create_batch(
self,
input_file_id: str,
@ -62,7 +63,7 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET")
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
async def retrieve_batch(self, batch_id: str) -> BatchObject:
"""Retrieve information about a specific batch.
@ -71,7 +72,7 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST")
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
async def cancel_batch(self, batch_id: str) -> BatchObject:
"""Cancel a batch that is in progress.
@ -80,7 +81,7 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches", method="GET")
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
async def list_batches(
self,
after: str | None = None,

View file

@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, webmethod
@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):
@runtime_checkable
class Benchmarks(Protocol):
@webmethod(route="/eval/benchmarks", method="GET")
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_benchmarks(self) -> ListBenchmarksResponse:
"""List all benchmarks.
@ -61,7 +63,8 @@ class Benchmarks(Protocol):
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_benchmark(
self,
benchmark_id: str,
@ -73,7 +76,8 @@ class Benchmarks(Protocol):
"""
...
@webmethod(route="/eval/benchmarks", method="POST")
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def register_benchmark(
self,
benchmark_id: str,
@ -94,7 +98,8 @@ class Benchmarks(Protocol):
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE")
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def unregister_benchmark(self, benchmark_id: str) -> None:
"""Unregister a benchmark.

View file

@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasets import Dataset
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import webmethod
@ -20,7 +21,7 @@ class DatasetIO(Protocol):
# keeping for aligning with inference/safety, but this is not used
dataset_store: DatasetStore
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def iterrows(
self,
dataset_id: str,
@ -44,7 +45,7 @@ class DatasetIO(Protocol):
"""
...
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
"""Append rows to a dataset.

View file

@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):
class Datasets(Protocol):
@webmethod(route="/datasets", method="POST")
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
async def register_dataset(
self,
purpose: DatasetPurpose,
@ -214,7 +215,7 @@ class Datasets(Protocol):
"""
...
@webmethod(route="/datasets/{dataset_id:path}", method="GET")
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_dataset(
self,
dataset_id: str,
@ -226,7 +227,7 @@ class Datasets(Protocol):
"""
...
@webmethod(route="/datasets", method="GET")
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
async def list_datasets(self) -> ListDatasetsResponse:
"""List all datasets.
@ -234,7 +235,7 @@ class Datasets(Protocol):
"""
...
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_dataset(
self,
dataset_id: str,

View file

@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult
from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
class Eval(Protocol):
"""Llama Stack Evaluation API for running evaluations on model and agent candidates."""
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def run_eval(
self,
benchmark_id: str,
@ -97,7 +99,10 @@ class Eval(Protocol):
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
@webmethod(
route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def evaluate_rows(
self,
benchmark_id: str,
@ -115,7 +120,10 @@ class Eval(Protocol):
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def job_status(self, benchmark_id: str, job_id: str) -> Job:
"""Get the status of a job.
@ -125,7 +133,13 @@ class Eval(Protocol):
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
method="DELETE",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
"""Cancel a job.
@ -134,7 +148,15 @@ class Eval(Protocol):
"""
...
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
)
async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
"""Get the result of a job.

View file

@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
from pydantic import BaseModel, Field
from llama_stack.apis.common.responses import Order
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
@trace_protocol
class Files(Protocol):
# OpenAI Files API Endpoints
@webmethod(route="/openai/v1/files", method="POST")
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_upload_file(
self,
file: Annotated[UploadFile, File()],
@ -119,7 +120,7 @@ class Files(Protocol):
The file upload should be a multipart form request with:
- file: The File object (not file name) to be uploaded.
- purpose: The intended purpose of the uploaded file.
- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
- expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
:param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
:param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
@ -127,7 +128,7 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files", method="GET")
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files(
self,
after: str | None = None,
@ -146,7 +147,7 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}", method="GET")
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file(
self,
file_id: str,
@ -159,7 +160,7 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_file(
self,
file_id: str,
@ -172,7 +173,7 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file_content(
self,
file_id: str,

View file

@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
from llama_stack.apis.common.responses import Order
from llama_stack.apis.models import Model
from llama_stack.apis.telemetry import MetricResponseMixin
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.models.llama.datatypes import (
BuiltinTool,
StopReason,
@ -913,6 +914,7 @@ class OpenAIEmbeddingData(BaseModel):
"""
object: Literal["embedding"] = "embedding"
# TODO: consider dropping str and using openai.types.embeddings.Embedding instead of OpenAIEmbeddingData
embedding: list[float] | str
index: int
@ -973,26 +975,6 @@ class EmbeddingTaskType(Enum):
document = "document"
@json_schema_type
class BatchCompletionResponse(BaseModel):
"""Response from a batch completion request.
:param batch: List of completion responses, one for each input in the batch
"""
batch: list[CompletionResponse]
@json_schema_type
class BatchChatCompletionResponse(BaseModel):
"""Response from a batch chat completion request.
:param batch: List of chat completion responses, one for each conversation in the batch
"""
batch: list[ChatCompletionResponse]
class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
input_messages: list[OpenAIMessageParam]
@ -1026,7 +1008,7 @@ class InferenceProvider(Protocol):
model_store: ModelStore | None = None
@webmethod(route="/inference/completion", method="POST")
@webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion(
self,
model_id: str,
@ -1049,28 +1031,7 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/inference/batch-completion", method="POST", experimental=True)
async def batch_completion(
self,
model_id: str,
content_batch: list[InterleavedContent],
sampling_params: SamplingParams | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchCompletionResponse:
"""Generate completions for a batch of content using the specified model.
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
:param content_batch: The content to generate completions for.
:param sampling_params: (Optional) Parameters to control the sampling strategy.
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: A BatchCompletionResponse with the full completions.
"""
raise NotImplementedError("Batch completion is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/inference/chat-completion", method="POST")
@webmethod(route="/inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
async def chat_completion(
self,
model_id: str,
@ -1110,32 +1071,7 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/inference/batch-chat-completion", method="POST", experimental=True)
async def batch_chat_completion(
self,
model_id: str,
messages_batch: list[list[Message]],
sampling_params: SamplingParams | None = None,
tools: list[ToolDefinition] | None = None,
tool_config: ToolConfig | None = None,
response_format: ResponseFormat | None = None,
logprobs: LogProbConfig | None = None,
) -> BatchChatCompletionResponse:
"""Generate chat completions for a batch of messages using the specified model.
:param model_id: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
:param messages_batch: The messages to generate completions for.
:param sampling_params: (Optional) Parameters to control the sampling strategy.
:param tools: (Optional) List of tool definitions available to the model.
:param tool_config: (Optional) Configuration for tool use.
:param response_format: (Optional) Grammar specification for guided (structured) decoding.
:param logprobs: (Optional) If specified, log probabilities for each token position will be returned.
:returns: A BatchChatCompletionResponse with the full completions.
"""
raise NotImplementedError("Batch chat completion is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/inference/embeddings", method="POST")
@webmethod(route="/inference/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def embeddings(
self,
model_id: str,
@ -1155,7 +1091,7 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/inference/rerank", method="POST", experimental=True)
@webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
async def rerank(
self,
model: str,
@ -1174,7 +1110,7 @@ class InferenceProvider(Protocol):
raise NotImplementedError("Reranking is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/openai/v1/completions", method="POST")
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_completion(
self,
# Standard OpenAI completion parameters
@ -1225,7 +1161,7 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/openai/v1/chat/completions", method="POST")
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_chat_completion(
self,
model: str,
@ -1281,7 +1217,7 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/openai/v1/embeddings", method="POST")
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def openai_embeddings(
self,
model: str,
@ -1310,7 +1246,7 @@ class Inference(InferenceProvider):
- Embedding models: these models generate embeddings to be used for semantic search.
"""
@webmethod(route="/openai/v1/chat/completions", method="GET")
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
async def list_chat_completions(
self,
after: str | None = None,
@ -1328,7 +1264,7 @@ class Inference(InferenceProvider):
"""
raise NotImplementedError("List chat completions is not implemented")
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
"""Describe a chat completion by its ID.

View file

@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.datatypes import HealthStatus
from llama_stack.schema_utils import json_schema_type, webmethod
@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):
@runtime_checkable
class Inspect(Protocol):
@webmethod(route="/inspect/routes", method="GET")
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
async def list_routes(self) -> ListRoutesResponse:
"""List all available API routes with their methods and implementing providers.
@ -65,7 +66,7 @@ class Inspect(Protocol):
"""
...
@webmethod(route="/health", method="GET")
@webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
async def health(self) -> HealthInfo:
"""Get the current health status of the service.
@ -73,7 +74,7 @@ class Inspect(Protocol):
"""
...
@webmethod(route="/version", method="GET")
@webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
async def version(self) -> VersionInfo:
"""Get the version of the service.

View file

@ -10,6 +10,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, ConfigDict, Field, field_validator
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@ -102,7 +103,7 @@ class OpenAIListModelsResponse(BaseModel):
@runtime_checkable
@trace_protocol
class Models(Protocol):
@webmethod(route="/models", method="GET")
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
async def list_models(self) -> ListModelsResponse:
"""List all models.
@ -110,7 +111,7 @@ class Models(Protocol):
"""
...
@webmethod(route="/openai/v1/models", method="GET")
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_models(self) -> OpenAIListModelsResponse:
"""List models using the OpenAI API.
@ -118,7 +119,7 @@ class Models(Protocol):
"""
...
@webmethod(route="/models/{model_id:path}", method="GET")
@webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_model(
self,
model_id: str,
@ -130,7 +131,7 @@ class Models(Protocol):
"""
...
@webmethod(route="/models", method="POST")
@webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
async def register_model(
self,
model_id: str,
@ -150,7 +151,7 @@ class Models(Protocol):
"""
...
@webmethod(route="/models/{model_id:path}", method="DELETE")
@webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_model(
self,
model_id: str,

View file

@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.content_types import URL
from llama_stack.apis.common.job_types import JobStatus
from llama_stack.apis.common.training_types import Checkpoint
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -283,7 +284,8 @@ class PostTrainingJobArtifactsResponse(BaseModel):
class PostTraining(Protocol):
@webmethod(route="/post-training/supervised-fine-tune", method="POST")
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def supervised_fine_tune(
self,
job_uuid: str,
@ -310,7 +312,8 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/preference-optimize", method="POST")
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def preference_optimize(
self,
job_uuid: str,
@ -332,7 +335,8 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/jobs", method="GET")
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
"""Get all training jobs.
@ -340,7 +344,8 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/job/status", method="GET")
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
"""Get the status of a training job.
@ -349,7 +354,8 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/job/cancel", method="POST")
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def cancel_training_job(self, job_uuid: str) -> None:
"""Cancel a training job.
@ -357,7 +363,8 @@ class PostTraining(Protocol):
"""
...
@webmethod(route="/post-training/job/artifacts", method="GET")
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
"""Get the artifacts of a training job.

View file

@ -10,6 +10,7 @@ from typing import Protocol, runtime_checkable
from pydantic import BaseModel, Field, field_validator, model_validator
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@ -95,7 +96,7 @@ class ListPromptsResponse(BaseModel):
class Prompts(Protocol):
"""Protocol for prompt management operations."""
@webmethod(route="/prompts", method="GET")
@webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
async def list_prompts(self) -> ListPromptsResponse:
"""List all prompts.
@ -103,7 +104,7 @@ class Prompts(Protocol):
"""
...
@webmethod(route="/prompts/{prompt_id}/versions", method="GET")
@webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1)
async def list_prompt_versions(
self,
prompt_id: str,
@ -115,7 +116,7 @@ class Prompts(Protocol):
"""
...
@webmethod(route="/prompts/{prompt_id}", method="GET")
@webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_prompt(
self,
prompt_id: str,
@ -129,7 +130,7 @@ class Prompts(Protocol):
"""
...
@webmethod(route="/prompts", method="POST")
@webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1)
async def create_prompt(
self,
prompt: str,
@ -143,7 +144,7 @@ class Prompts(Protocol):
"""
...
@webmethod(route="/prompts/{prompt_id}", method="PUT")
@webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1)
async def update_prompt(
self,
prompt_id: str,
@ -163,7 +164,7 @@ class Prompts(Protocol):
"""
...
@webmethod(route="/prompts/{prompt_id}", method="DELETE")
@webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_prompt(
self,
prompt_id: str,
@ -174,7 +175,7 @@ class Prompts(Protocol):
"""
...
@webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT")
@webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1)
async def set_default_version(
self,
prompt_id: str,

View file

@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.datatypes import HealthResponse
from llama_stack.schema_utils import json_schema_type, webmethod
@ -45,7 +46,7 @@ class Providers(Protocol):
Providers API for inspecting, listing, and modifying providers and their configurations.
"""
@webmethod(route="/providers", method="GET")
@webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
async def list_providers(self) -> ListProvidersResponse:
"""List all available providers.
@ -53,7 +54,7 @@ class Providers(Protocol):
"""
...
@webmethod(route="/providers/{provider_id}", method="GET")
@webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
async def inspect_provider(self, provider_id: str) -> ProviderInfo:
"""Get detailed information about a specific provider.

View file

@ -11,6 +11,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.inference import Message
from llama_stack.apis.shields import Shield
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@ -97,7 +98,7 @@ class ShieldStore(Protocol):
class Safety(Protocol):
shield_store: ShieldStore
@webmethod(route="/safety/run-shield", method="POST")
@webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
async def run_shield(
self,
shield_id: str,
@ -113,7 +114,7 @@ class Safety(Protocol):
"""
...
@webmethod(route="/openai/v1/moderations", method="POST")
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
"""Classifies if text and/or image inputs are potentially harmful.
:param input: Input (or inputs) to classify.

View file

@ -9,6 +9,7 @@ from typing import Any, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, webmethod
# mapping of metric to value
@ -61,7 +62,7 @@ class ScoringFunctionStore(Protocol):
class Scoring(Protocol):
scoring_function_store: ScoringFunctionStore
@webmethod(route="/scoring/score-batch", method="POST")
@webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1)
async def score_batch(
self,
dataset_id: str,
@ -77,7 +78,7 @@ class Scoring(Protocol):
"""
...
@webmethod(route="/scoring/score", method="POST")
@webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1)
async def score(
self,
input_rows: list[dict[str, Any]],

View file

@ -18,6 +18,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.type_system import ParamType
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -160,7 +161,7 @@ class ListScoringFunctionsResponse(BaseModel):
@runtime_checkable
class ScoringFunctions(Protocol):
@webmethod(route="/scoring-functions", method="GET")
@webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1)
async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
"""List all scoring functions.
@ -168,7 +169,7 @@ class ScoringFunctions(Protocol):
"""
...
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
"""Get a scoring function by its ID.
@ -177,7 +178,7 @@ class ScoringFunctions(Protocol):
"""
...
@webmethod(route="/scoring-functions", method="POST")
@webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
async def register_scoring_function(
self,
scoring_fn_id: str,
@ -198,7 +199,7 @@ class ScoringFunctions(Protocol):
"""
...
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE")
@webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
"""Unregister a scoring function.

View file

@ -9,6 +9,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@ -49,7 +50,7 @@ class ListShieldsResponse(BaseModel):
@runtime_checkable
@trace_protocol
class Shields(Protocol):
@webmethod(route="/shields", method="GET")
@webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
async def list_shields(self) -> ListShieldsResponse:
"""List all shields.
@ -57,7 +58,7 @@ class Shields(Protocol):
"""
...
@webmethod(route="/shields/{identifier:path}", method="GET")
@webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_shield(self, identifier: str) -> Shield:
"""Get a shield by its identifier.
@ -66,7 +67,7 @@ class Shields(Protocol):
"""
...
@webmethod(route="/shields", method="POST")
@webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
async def register_shield(
self,
shield_id: str,
@ -84,7 +85,7 @@ class Shields(Protocol):
"""
...
@webmethod(route="/shields/{identifier:path}", method="DELETE")
@webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_shield(self, identifier: str) -> None:
"""Unregister a shield.

View file

@ -10,6 +10,7 @@ from typing import Any, Protocol
from pydantic import BaseModel
from llama_stack.apis.inference import Message
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, webmethod
@ -59,7 +60,7 @@ class SyntheticDataGenerationResponse(BaseModel):
class SyntheticDataGeneration(Protocol):
@webmethod(route="/synthetic-data-generation/generate")
@webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1)
def synthetic_data_generate(
self,
dialogs: list[Message],

View file

@ -16,6 +16,7 @@ from typing import (
from pydantic import BaseModel, Field
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.models.llama.datatypes import Primitive
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -412,7 +413,7 @@ class QueryMetricsResponse(BaseModel):
@runtime_checkable
class Telemetry(Protocol):
@webmethod(route="/telemetry/events", method="POST")
@webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
async def log_event(
self,
event: Event,
@ -425,7 +426,7 @@ class Telemetry(Protocol):
"""
...
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE)
@webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
async def query_traces(
self,
attribute_filters: list[QueryCondition] | None = None,
@ -443,7 +444,9 @@ class Telemetry(Protocol):
"""
...
@webmethod(route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE)
@webmethod(
route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
)
async def get_trace(self, trace_id: str) -> Trace:
"""Get a trace by its ID.
@ -453,7 +456,10 @@ class Telemetry(Protocol):
...
@webmethod(
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET", required_scope=REQUIRED_SCOPE
route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
method="GET",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1,
)
async def get_span(self, trace_id: str, span_id: str) -> Span:
"""Get a span by its ID.
@ -464,7 +470,12 @@ class Telemetry(Protocol):
"""
...
@webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST", required_scope=REQUIRED_SCOPE)
@webmethod(
route="/telemetry/spans/{span_id:path}/tree",
method="POST",
required_scope=REQUIRED_SCOPE,
level=LLAMA_STACK_API_V1,
)
async def get_span_tree(
self,
span_id: str,
@ -480,7 +491,7 @@ class Telemetry(Protocol):
"""
...
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE)
@webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
async def query_spans(
self,
attribute_filters: list[QueryCondition],
@ -496,7 +507,7 @@ class Telemetry(Protocol):
"""
...
@webmethod(route="/telemetry/spans/export", method="POST")
@webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1)
async def save_spans_to_dataset(
self,
attribute_filters: list[QueryCondition],
@ -513,7 +524,9 @@ class Telemetry(Protocol):
"""
...
@webmethod(route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE)
@webmethod(
route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
)
async def query_metrics(
self,
metric_name: str,

View file

@ -11,6 +11,7 @@ from pydantic import BaseModel, Field, field_validator
from typing_extensions import runtime_checkable
from llama_stack.apis.common.content_types import URL, InterleavedContent
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -185,7 +186,7 @@ class RAGQueryConfig(BaseModel):
@runtime_checkable
@trace_protocol
class RAGToolRuntime(Protocol):
@webmethod(route="/tool-runtime/rag-tool/insert", method="POST")
@webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
async def insert(
self,
documents: list[RAGDocument],
@ -200,7 +201,7 @@ class RAGToolRuntime(Protocol):
"""
...
@webmethod(route="/tool-runtime/rag-tool/query", method="POST")
@webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
async def query(
self,
content: InterleavedContent,

View file

@ -12,6 +12,7 @@ from typing_extensions import runtime_checkable
from llama_stack.apis.common.content_types import URL, InterleavedContent
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@ -155,7 +156,7 @@ class ListToolDefsResponse(BaseModel):
@runtime_checkable
@trace_protocol
class ToolGroups(Protocol):
@webmethod(route="/toolgroups", method="POST")
@webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
async def register_tool_group(
self,
toolgroup_id: str,
@ -172,7 +173,7 @@ class ToolGroups(Protocol):
"""
...
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET")
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_tool_group(
self,
toolgroup_id: str,
@ -184,7 +185,7 @@ class ToolGroups(Protocol):
"""
...
@webmethod(route="/toolgroups", method="GET")
@webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
async def list_tool_groups(self) -> ListToolGroupsResponse:
"""List tool groups with optional provider.
@ -192,7 +193,7 @@ class ToolGroups(Protocol):
"""
...
@webmethod(route="/tools", method="GET")
@webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
"""List tools with optional tool group.
@ -201,7 +202,7 @@ class ToolGroups(Protocol):
"""
...
@webmethod(route="/tools/{tool_name:path}", method="GET")
@webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_tool(
self,
tool_name: str,
@ -213,7 +214,7 @@ class ToolGroups(Protocol):
"""
...
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE")
@webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_toolgroup(
self,
toolgroup_id: str,
@ -242,7 +243,7 @@ class ToolRuntime(Protocol):
rag_tool: RAGToolRuntime | None = None
# TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
@webmethod(route="/tool-runtime/list-tools", method="GET")
@webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
async def list_runtime_tools(
self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
) -> ListToolDefsResponse:
@ -254,7 +255,7 @@ class ToolRuntime(Protocol):
"""
...
@webmethod(route="/tool-runtime/invoke", method="POST")
@webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
"""Run a tool with the given arguments.

View file

@ -9,6 +9,7 @@ from typing import Literal, Protocol, runtime_checkable
from pydantic import BaseModel
from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.schema_utils import json_schema_type, webmethod
@ -65,7 +66,7 @@ class ListVectorDBsResponse(BaseModel):
@runtime_checkable
@trace_protocol
class VectorDBs(Protocol):
@webmethod(route="/vector-dbs", method="GET")
@webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1)
async def list_vector_dbs(self) -> ListVectorDBsResponse:
"""List all vector databases.
@ -73,7 +74,7 @@ class VectorDBs(Protocol):
"""
...
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET")
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_vector_db(
self,
vector_db_id: str,
@ -85,7 +86,7 @@ class VectorDBs(Protocol):
"""
...
@webmethod(route="/vector-dbs", method="POST")
@webmethod(route="/vector-dbs", method="POST", level=LLAMA_STACK_API_V1)
async def register_vector_db(
self,
vector_db_id: str,
@ -107,7 +108,7 @@ class VectorDBs(Protocol):
"""
...
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE")
@webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
async def unregister_vector_db(self, vector_db_id: str) -> None:
"""Unregister a vector database.

View file

@ -15,6 +15,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_dbs import VectorDB
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
from llama_stack.schema_utils import json_schema_type, webmethod
@ -437,7 +438,7 @@ class VectorIO(Protocol):
# this will just block now until chunks are inserted, but it should
# probably return a Job instance which can be polled for completion
@webmethod(route="/vector-io/insert", method="POST")
@webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
async def insert_chunks(
self,
vector_db_id: str,
@ -455,7 +456,7 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/vector-io/query", method="POST")
@webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
async def query_chunks(
self,
vector_db_id: str,
@ -472,7 +473,7 @@ class VectorIO(Protocol):
...
# OpenAI Vector Stores API endpoints
@webmethod(route="/openai/v1/vector_stores", method="POST")
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
async def openai_create_vector_store(
self,
name: str | None = None,
@ -498,7 +499,7 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores", method="GET")
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_vector_stores(
self,
limit: int | None = 20,
@ -516,7 +517,7 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET")
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_vector_store(
self,
vector_store_id: str,
@ -528,7 +529,7 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST")
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
async def openai_update_vector_store(
self,
vector_store_id: str,
@ -546,7 +547,7 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE")
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_vector_store(
self,
vector_store_id: str,
@ -558,7 +559,7 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST")
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
async def openai_search_vector_store(
self,
vector_store_id: str,
@ -584,7 +585,7 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST")
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_attach_file_to_vector_store(
self,
vector_store_id: str,
@ -602,7 +603,7 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET")
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files_in_vector_store(
self,
vector_store_id: str,
@ -624,7 +625,9 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET")
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
)
async def openai_retrieve_vector_store_file(
self,
vector_store_id: str,
@ -638,7 +641,11 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET")
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET",
level=LLAMA_STACK_API_V1,
)
async def openai_retrieve_vector_store_file_contents(
self,
vector_store_id: str,
@ -652,7 +659,9 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST")
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
)
async def openai_update_vector_store_file(
self,
vector_store_id: str,
@ -668,7 +677,9 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE")
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
)
async def openai_delete_vector_store_file(
self,
vector_store_id: str,

View file

@ -4,4 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
LLAMA_STACK_API_VERSION = "v1"
LLAMA_STACK_API_V1 = "v1"
LLAMA_STACK_API_V1BETA = "v1beta"
LLAMA_STACK_API_V1ALPHA = "v1alpha"