Merge branch 'main' into hide-non-openai-inference-apis

2025-10-04 04:04:14 +00:00 · 2025-09-26 10:48:34 -04:00 · 2025-09-26 10:48:34 -04:00 · cb534281c8
commit cb534281c8
parent 5848b3f82c c88c4ff2c6
714 changed files with 123149 additions and 54618 deletions
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -27,6 +27,7 @@ from llama_stack.apis.inference import (
 )
 from llama_stack.apis.safety import SafetyViolation
 from llama_stack.apis.tools import ToolDef
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

 from .openai_responses import (
@ -481,7 +482,7 @@ class Agents(Protocol):
    - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
    """

-    @webmethod(route="/agents", method="POST", descriptive_name="create_agent")
+    @webmethod(route="/agents", method="POST", descriptive_name="create_agent", level=LLAMA_STACK_API_V1)
    async def create_agent(
        self,
        agent_config: AgentConfig,
@ -494,7 +495,10 @@ class Agents(Protocol):
        ...

    @webmethod(
-        route="/agents/{agent_id}/session/{session_id}/turn", method="POST", descriptive_name="create_agent_turn"
+        route="/agents/{agent_id}/session/{session_id}/turn",
+        method="POST",
+        descriptive_name="create_agent_turn",
+        level=LLAMA_STACK_API_V1,
    )
    async def create_agent_turn(
        self,
@ -524,6 +528,7 @@ class Agents(Protocol):
        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
        method="POST",
        descriptive_name="resume_agent_turn",
+        level=LLAMA_STACK_API_V1,
    )
    async def resume_agent_turn(
        self,
@ -549,6 +554,7 @@ class Agents(Protocol):
    @webmethod(
        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
        method="GET",
+        level=LLAMA_STACK_API_V1,
    )
    async def get_agents_turn(
        self,
@ -568,6 +574,7 @@ class Agents(Protocol):
    @webmethod(
        route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
        method="GET",
+        level=LLAMA_STACK_API_V1,
    )
    async def get_agents_step(
        self,
@ -586,7 +593,12 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/agents/{agent_id}/session", method="POST", descriptive_name="create_agent_session")
+    @webmethod(
+        route="/agents/{agent_id}/session",
+        method="POST",
+        descriptive_name="create_agent_session",
+        level=LLAMA_STACK_API_V1,
+    )
    async def create_agent_session(
        self,
        agent_id: str,
@ -600,7 +612,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET")
+    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_agents_session(
        self,
        session_id: str,
@ -616,7 +628,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE")
+    @webmethod(route="/agents/{agent_id}/session/{session_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_agents_session(
        self,
        session_id: str,
@ -629,7 +641,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/agents/{agent_id}", method="DELETE")
+    @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_agent(
        self,
        agent_id: str,
@ -640,7 +652,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/agents", method="GET")
+    @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1)
    async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
        """List all agents.

@ -650,7 +662,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/agents/{agent_id}", method="GET")
+    @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_agent(self, agent_id: str) -> Agent:
        """Describe an agent by its ID.

@ -659,7 +671,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/agents/{agent_id}/sessions", method="GET")
+    @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1)
    async def list_agent_sessions(
        self,
        agent_id: str,
@ -682,7 +694,7 @@ class Agents(Protocol):
    #
    # Both of these APIs are inherently stateful.

-    @webmethod(route="/openai/v1/responses/{response_id}", method="GET")
+    @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_openai_response(
        self,
        response_id: str,
@ -694,7 +706,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/responses", method="POST")
+    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
    async def create_openai_response(
        self,
        input: str | list[OpenAIResponseInput],
@ -719,7 +731,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/responses", method="GET")
+    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_responses(
        self,
        after: str | None = None,
@ -737,7 +749,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET")
+    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_response_input_items(
        self,
        response_id: str,
@ -759,7 +771,7 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE")
+    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
        """Delete an OpenAI response by its ID.

--- a/llama_stack/apis/batch_inference/batch_inference.py
+++ b/llama_stack/apis/batch_inference/batch_inference.py
@ -17,6 +17,7 @@ from llama_stack.apis.inference import (
    ToolDefinition,
    ToolPromptFormat,
 )
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import webmethod


@ -30,7 +31,7 @@ class BatchInference(Protocol):
    including (post-training, evals, etc).
    """

-    @webmethod(route="/batch-inference/completion", method="POST")
+    @webmethod(route="/batch-inference/completion", method="POST", level=LLAMA_STACK_API_V1)
    async def completion(
        self,
        model: str,
@ -50,7 +51,7 @@ class BatchInference(Protocol):
        """
        ...

-    @webmethod(route="/batch-inference/chat-completion", method="POST")
+    @webmethod(route="/batch-inference/chat-completion", method="POST", level=LLAMA_STACK_API_V1)
    async def chat_completion(
        self,
        model: str,
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@ -8,6 +8,7 @@ from typing import Literal, Protocol, runtime_checkable

 from pydantic import BaseModel, Field

+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod

 try:
@ -42,7 +43,7 @@ class Batches(Protocol):
    Note: This API is currently under active development and may undergo changes.
    """

-    @webmethod(route="/openai/v1/batches", method="POST")
+    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
    async def create_batch(
        self,
        input_file_id: str,
@ -62,7 +63,7 @@ class Batches(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET")
+    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def retrieve_batch(self, batch_id: str) -> BatchObject:
        """Retrieve information about a specific batch.

@ -71,7 +72,7 @@ class Batches(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST")
+    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
    async def cancel_batch(self, batch_id: str) -> BatchObject:
        """Cancel a batch that is in progress.

@ -80,7 +81,7 @@ class Batches(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/batches", method="GET")
+    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
    async def list_batches(
        self,
        after: str | None = None,
--- a/llama_stack/apis/benchmarks/benchmarks.py
+++ b/llama_stack/apis/benchmarks/benchmarks.py
@ -8,6 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, Field

 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -53,7 +54,8 @@ class ListBenchmarksResponse(BaseModel):

@runtime_checkable
 class Benchmarks(Protocol):
-    @webmethod(route="/eval/benchmarks", method="GET")
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
    async def list_benchmarks(self) -> ListBenchmarksResponse:
        """List all benchmarks.

@ -61,7 +63,8 @@ class Benchmarks(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
    async def get_benchmark(
        self,
        benchmark_id: str,
@ -73,7 +76,8 @@ class Benchmarks(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmarks", method="POST")
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
    async def register_benchmark(
        self,
        benchmark_id: str,
@ -93,3 +97,12 @@ class Benchmarks(Protocol):
        :param metadata: The metadata to use for the benchmark.
        """
        ...
+
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
+    async def unregister_benchmark(self, benchmark_id: str) -> None:
+        """Unregister a benchmark.
+
+        :param benchmark_id: The ID of the benchmark to unregister.
+        """
+        ...
--- a/llama_stack/apis/common/errors.py
+++ b/llama_stack/apis/common/errors.py
@ -79,3 +79,10 @@ class ConflictError(ValueError):

    def __init__(self, message: str) -> None:
        super().__init__(message)
+
+
+class TokenValidationError(ValueError):
+    """raised when token validation fails during authentication"""
+
+    def __init__(self, message: str) -> None:
+        super().__init__(message)
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable

 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.apis.datasets import Dataset
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import webmethod


@ -20,7 +21,7 @@ class DatasetIO(Protocol):
    # keeping for aligning with inference/safety, but this is not used
    dataset_store: DatasetStore

-    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET")
+    @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def iterrows(
        self,
        dataset_id: str,
@ -44,7 +45,7 @@ class DatasetIO(Protocol):
        """
        ...

-    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST")
+    @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1)
    async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
        """Append rows to a dataset.

--- a/llama_stack/apis/datasets/datasets.py
+++ b/llama_stack/apis/datasets/datasets.py
@ -10,6 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
 from pydantic import BaseModel, Field

 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod


@ -145,7 +146,7 @@ class ListDatasetsResponse(BaseModel):


 class Datasets(Protocol):
-    @webmethod(route="/datasets", method="POST")
+    @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1)
    async def register_dataset(
        self,
        purpose: DatasetPurpose,
@ -214,7 +215,7 @@ class Datasets(Protocol):
        """
        ...

-    @webmethod(route="/datasets/{dataset_id:path}", method="GET")
+    @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_dataset(
        self,
        dataset_id: str,
@ -226,7 +227,7 @@ class Datasets(Protocol):
        """
        ...

-    @webmethod(route="/datasets", method="GET")
+    @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1)
    async def list_datasets(self) -> ListDatasetsResponse:
        """List all datasets.

@ -234,7 +235,7 @@ class Datasets(Protocol):
        """
        ...

-    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE")
+    @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def unregister_dataset(
        self,
        dataset_id: str,
--- a/llama_stack/apis/datatypes.py
+++ b/llama_stack/apis/datatypes.py
@ -102,6 +102,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
    :cvar benchmarks: Benchmark suite management
    :cvar tool_groups: Tool group organization
    :cvar files: File storage and management
+    :cvar prompts: Prompt versions and management
    :cvar inspect: Built-in system inspection and introspection
    """

@ -127,6 +128,7 @@ class Api(Enum, metaclass=DynamicApiMeta):
    benchmarks = "benchmarks"
    tool_groups = "tool_groups"
    files = "files"
+    prompts = "prompts"

    # built-in API
    inspect = "inspect"
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -13,6 +13,7 @@ from llama_stack.apis.common.job_types import Job
 from llama_stack.apis.inference import SamplingParams, SystemMessage
 from llama_stack.apis.scoring import ScoringResult
 from llama_stack.apis.scoring_functions import ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod


@ -83,7 +84,8 @@ class EvaluateResponse(BaseModel):
 class Eval(Protocol):
    """Llama Stack Evaluation API for running evaluations on model and agent candidates."""

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST")
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
    async def run_eval(
        self,
        benchmark_id: str,
@ -97,7 +99,10 @@ class Eval(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
    async def evaluate_rows(
        self,
        benchmark_id: str,
@ -115,7 +120,10 @@ class Eval(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
    async def job_status(self, benchmark_id: str, job_id: str) -> Job:
        """Get the status of a job.

@ -125,7 +133,13 @@ class Eval(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        method="DELETE",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
        """Cancel a job.

@ -134,7 +148,15 @@ class Eval(Protocol):
        """
        ...

-    @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET")
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+        deprecated=True,
+    )
+    @webmethod(
+        route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
+    )
    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
        """Get the result of a job.

--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -11,6 +11,7 @@ from fastapi import File, Form, Response, UploadFile
 from pydantic import BaseModel, Field

 from llama_stack.apis.common.responses import Order
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod

@ -104,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
@trace_protocol
 class Files(Protocol):
    # OpenAI Files API Endpoints
-    @webmethod(route="/openai/v1/files", method="POST")
+    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_upload_file(
        self,
        file: Annotated[UploadFile, File()],
@ -119,7 +120,7 @@ class Files(Protocol):
        The file upload should be a multipart form request with:
        - file: The File object (not file name) to be uploaded.
        - purpose: The intended purpose of the uploaded file.
-        - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = <int>. Seconds must be between 3600 and 2592000 (1 hour to 30 days).
+        - expires_after: Optional form values describing expiration for the file. Expected expires_after[anchor] = "created_at", expires_after[seconds] = {integer}. Seconds must be between 3600 and 2592000 (1 hour to 30 days).

        :param file: The uploaded file object containing content and metadata (filename, content_type, etc.).
        :param purpose: The intended purpose of the uploaded file (e.g., "assistants", "fine-tune").
@ -127,7 +128,7 @@ class Files(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/files", method="GET")
+    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_files(
        self,
        after: str | None = None,
@ -146,7 +147,7 @@ class Files(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/files/{file_id}", method="GET")
+    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file(
        self,
        file_id: str,
@ -159,7 +160,7 @@ class Files(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE")
+    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_file(
        self,
        file_id: str,
@ -172,7 +173,7 @@ class Files(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET")
+    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file_content(
        self,
        file_id: str,
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -21,6 +21,7 @@ from llama_stack.apis.common.content_types import ContentDelta, InterleavedConte
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.models import Model
 from llama_stack.apis.telemetry import MetricResponseMixin
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.models.llama.datatypes import (
    BuiltinTool,
    StopReason,
@ -1150,7 +1151,7 @@ class InferenceProvider(Protocol):
        """
        ...

-    @webmethod(route="/inference/rerank", method="POST", experimental=True)
+    @webmethod(route="/inference/rerank", method="POST", experimental=True, level=LLAMA_STACK_API_V1)
    async def rerank(
        self,
        model: str,
@ -1169,7 +1170,7 @@ class InferenceProvider(Protocol):
        raise NotImplementedError("Reranking is not implemented")
        return  # this is so mypy's safe-super rule will consider the method concrete

-    @webmethod(route="/openai/v1/completions", method="POST")
+    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_completion(
        self,
        # Standard OpenAI completion parameters
@ -1220,7 +1221,7 @@ class InferenceProvider(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/chat/completions", method="POST")
+    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_chat_completion(
        self,
        model: str,
@ -1276,7 +1277,7 @@ class InferenceProvider(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/embeddings", method="POST")
+    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_embeddings(
        self,
        model: str,
@ -1305,7 +1306,7 @@ class Inference(InferenceProvider):
    - Embedding models: these models generate embeddings to be used for semantic search.
    """

-    @webmethod(route="/openai/v1/chat/completions", method="GET")
+    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
    async def list_chat_completions(
        self,
        after: str | None = None,
@ -1323,7 +1324,7 @@ class Inference(InferenceProvider):
        """
        raise NotImplementedError("List chat completions is not implemented")

-    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
+    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
        """Describe a chat completion by its ID.

--- a/llama_stack/apis/inspect/inspect.py
+++ b/llama_stack/apis/inspect/inspect.py
@ -8,6 +8,7 @@ from typing import Protocol, runtime_checkable

 from pydantic import BaseModel

+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import HealthStatus
 from llama_stack.schema_utils import json_schema_type, webmethod

@ -57,7 +58,7 @@ class ListRoutesResponse(BaseModel):

@runtime_checkable
 class Inspect(Protocol):
-    @webmethod(route="/inspect/routes", method="GET")
+    @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
    async def list_routes(self) -> ListRoutesResponse:
        """List all available API routes with their methods and implementing providers.

@ -65,7 +66,7 @@ class Inspect(Protocol):
        """
        ...

-    @webmethod(route="/health", method="GET")
+    @webmethod(route="/health", method="GET", level=LLAMA_STACK_API_V1)
    async def health(self) -> HealthInfo:
        """Get the current health status of the service.

@ -73,7 +74,7 @@ class Inspect(Protocol):
        """
        ...

-    @webmethod(route="/version", method="GET")
+    @webmethod(route="/version", method="GET", level=LLAMA_STACK_API_V1)
    async def version(self) -> VersionInfo:
        """Get the version of the service.

--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -10,6 +10,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel, ConfigDict, Field, field_validator

 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod

@ -102,7 +103,7 @@ class OpenAIListModelsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class Models(Protocol):
-    @webmethod(route="/models", method="GET")
+    @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
    async def list_models(self) -> ListModelsResponse:
        """List all models.

@ -110,7 +111,7 @@ class Models(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/models", method="GET")
+    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_models(self) -> OpenAIListModelsResponse:
        """List models using the OpenAI API.

@ -118,7 +119,7 @@ class Models(Protocol):
        """
        ...

-    @webmethod(route="/models/{model_id:path}", method="GET")
+    @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_model(
        self,
        model_id: str,
@ -130,7 +131,7 @@ class Models(Protocol):
        """
        ...

-    @webmethod(route="/models", method="POST")
+    @webmethod(route="/models", method="POST", level=LLAMA_STACK_API_V1)
    async def register_model(
        self,
        model_id: str,
@ -150,7 +151,7 @@ class Models(Protocol):
        """
        ...

-    @webmethod(route="/models/{model_id:path}", method="DELETE")
+    @webmethod(route="/models/{model_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def unregister_model(
        self,
        model_id: str,
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
 from llama_stack.apis.common.content_types import URL
 from llama_stack.apis.common.job_types import JobStatus
 from llama_stack.apis.common.training_types import Checkpoint
+from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod


@ -283,7 +284,8 @@ class PostTrainingJobArtifactsResponse(BaseModel):


 class PostTraining(Protocol):
-    @webmethod(route="/post-training/supervised-fine-tune", method="POST")
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
    async def supervised_fine_tune(
        self,
        job_uuid: str,
@ -310,7 +312,8 @@ class PostTraining(Protocol):
        """
        ...

-    @webmethod(route="/post-training/preference-optimize", method="POST")
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
    async def preference_optimize(
        self,
        job_uuid: str,
@ -332,7 +335,8 @@ class PostTraining(Protocol):
        """
        ...

-    @webmethod(route="/post-training/jobs", method="GET")
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
    async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
        """Get all training jobs.

@ -340,7 +344,8 @@ class PostTraining(Protocol):
        """
        ...

-    @webmethod(route="/post-training/job/status", method="GET")
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
    async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
        """Get the status of a training job.

@ -349,7 +354,8 @@ class PostTraining(Protocol):
        """
        ...

-    @webmethod(route="/post-training/job/cancel", method="POST")
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
    async def cancel_training_job(self, job_uuid: str) -> None:
        """Cancel a training job.

@ -357,7 +363,8 @@ class PostTraining(Protocol):
        """
        ...

-    @webmethod(route="/post-training/job/artifacts", method="GET")
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
+    @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
        """Get the artifacts of a training job.

--- a/llama_stack/apis/prompts/init.py
+++ b/llama_stack/apis/prompts/init.py
@ -0,0 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .prompts import ListPromptsResponse, Prompt, Prompts
+
+__all__ = ["Prompt", "Prompts", "ListPromptsResponse"]
--- a/llama_stack/apis/prompts/prompts.py
+++ b/llama_stack/apis/prompts/prompts.py
@ -0,0 +1,190 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import re
+import secrets
+from typing import Protocol, runtime_checkable
+
+from pydantic import BaseModel, Field, field_validator, model_validator
+
+from llama_stack.apis.version import LLAMA_STACK_API_V1
+from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
+from llama_stack.schema_utils import json_schema_type, webmethod
+
+
+@json_schema_type
+class Prompt(BaseModel):
+    """A prompt resource representing a stored OpenAI Compatible prompt template in Llama Stack.
+
+    :param prompt: The system prompt text with variable placeholders. Variables are only supported when using the Responses API.
+    :param version: Version (integer starting at 1, incremented on save)
+    :param prompt_id: Unique identifier formatted as 'pmpt_<48-digit-hash>'
+    :param variables: List of prompt variable names that can be used in the prompt template
+    :param is_default: Boolean indicating whether this version is the default version for this prompt
+    """
+
+    prompt: str | None = Field(default=None, description="The system prompt with variable placeholders")
+    version: int = Field(description="Version (integer starting at 1, incremented on save)", ge=1)
+    prompt_id: str = Field(description="Unique identifier in format 'pmpt_<48-digit-hash>'")
+    variables: list[str] = Field(
+        default_factory=list, description="List of variable names that can be used in the prompt template"
+    )
+    is_default: bool = Field(
+        default=False, description="Boolean indicating whether this version is the default version"
+    )
+
+    @field_validator("prompt_id")
+    @classmethod
+    def validate_prompt_id(cls, prompt_id: str) -> str:
+        if not isinstance(prompt_id, str):
+            raise TypeError("prompt_id must be a string in format 'pmpt_<48-digit-hash>'")
+
+        if not prompt_id.startswith("pmpt_"):
+            raise ValueError("prompt_id must start with 'pmpt_' prefix")
+
+        hex_part = prompt_id[5:]
+        if len(hex_part) != 48:
+            raise ValueError("prompt_id must be in format 'pmpt_<48-digit-hash>' (48 lowercase hex chars)")
+
+        for char in hex_part:
+            if char not in "0123456789abcdef":
+                raise ValueError("prompt_id hex part must contain only lowercase hex characters [0-9a-f]")
+
+        return prompt_id
+
+    @field_validator("version")
+    @classmethod
+    def validate_version(cls, prompt_version: int) -> int:
+        if prompt_version < 1:
+            raise ValueError("version must be >= 1")
+        return prompt_version
+
+    @model_validator(mode="after")
+    def validate_prompt_variables(self):
+        """Validate that all variables used in the prompt are declared in the variables list."""
+        if not self.prompt:
+            return self
+
+        prompt_variables = set(re.findall(r"{{\s*(\w+)\s*}}", self.prompt))
+        declared_variables = set(self.variables)
+
+        undeclared = prompt_variables - declared_variables
+        if undeclared:
+            raise ValueError(f"Prompt contains undeclared variables: {sorted(undeclared)}")
+
+        return self
+
+    @classmethod
+    def generate_prompt_id(cls) -> str:
+        # Generate 48 hex characters (24 bytes)
+        random_bytes = secrets.token_bytes(24)
+        hex_string = random_bytes.hex()
+        return f"pmpt_{hex_string}"
+
+
+class ListPromptsResponse(BaseModel):
+    """Response model to list prompts."""
+
+    data: list[Prompt]
+
+
+@runtime_checkable
+@trace_protocol
+class Prompts(Protocol):
+    """Protocol for prompt management operations."""
+
+    @webmethod(route="/prompts", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_prompts(self) -> ListPromptsResponse:
+        """List all prompts.
+
+        :returns: A ListPromptsResponse containing all prompts.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}/versions", method="GET", level=LLAMA_STACK_API_V1)
+    async def list_prompt_versions(
+        self,
+        prompt_id: str,
+    ) -> ListPromptsResponse:
+        """List all versions of a specific prompt.
+
+        :param prompt_id: The identifier of the prompt to list versions for.
+        :returns: A ListPromptsResponse containing all versions of the prompt.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}", method="GET", level=LLAMA_STACK_API_V1)
+    async def get_prompt(
+        self,
+        prompt_id: str,
+        version: int | None = None,
+    ) -> Prompt:
+        """Get a prompt by its identifier and optional version.
+
+        :param prompt_id: The identifier of the prompt to get.
+        :param version: The version of the prompt to get (defaults to latest).
+        :returns: A Prompt resource.
+        """
+        ...
+
+    @webmethod(route="/prompts", method="POST", level=LLAMA_STACK_API_V1)
+    async def create_prompt(
+        self,
+        prompt: str,
+        variables: list[str] | None = None,
+    ) -> Prompt:
+        """Create a new prompt.
+
+        :param prompt: The prompt text content with variable placeholders.
+        :param variables: List of variable names that can be used in the prompt template.
+        :returns: The created Prompt resource.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}", method="PUT", level=LLAMA_STACK_API_V1)
+    async def update_prompt(
+        self,
+        prompt_id: str,
+        prompt: str,
+        version: int,
+        variables: list[str] | None = None,
+        set_as_default: bool = True,
+    ) -> Prompt:
+        """Update an existing prompt (increments version).
+
+        :param prompt_id: The identifier of the prompt to update.
+        :param prompt: The updated prompt text content.
+        :param version: The current version of the prompt being updated.
+        :param variables: Updated list of variable names that can be used in the prompt template.
+        :param set_as_default: Set the new version as the default (default=True).
+        :returns: The updated Prompt resource with incremented version.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def delete_prompt(
+        self,
+        prompt_id: str,
+    ) -> None:
+        """Delete a prompt.
+
+        :param prompt_id: The identifier of the prompt to delete.
+        """
+        ...
+
+    @webmethod(route="/prompts/{prompt_id}/set-default-version", method="PUT", level=LLAMA_STACK_API_V1)
+    async def set_default_version(
+        self,
+        prompt_id: str,
+        version: int,
+    ) -> Prompt:
+        """Set which version of a prompt should be the default in get_prompt (latest).
+
+        :param prompt_id: The identifier of the prompt.
+        :param version: The version to set as default.
+        :returns: The prompt with the specified version now set as default.
+        """
+        ...
--- a/llama_stack/apis/providers/providers.py
+++ b/llama_stack/apis/providers/providers.py
@ -8,6 +8,7 @@ from typing import Any, Protocol, runtime_checkable

 from pydantic import BaseModel

+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.datatypes import HealthResponse
 from llama_stack.schema_utils import json_schema_type, webmethod

@ -45,7 +46,7 @@ class Providers(Protocol):
    Providers API for inspecting, listing, and modifying providers and their configurations.
    """

-    @webmethod(route="/providers", method="GET")
+    @webmethod(route="/providers", method="GET", level=LLAMA_STACK_API_V1)
    async def list_providers(self) -> ListProvidersResponse:
        """List all available providers.

@ -53,7 +54,7 @@ class Providers(Protocol):
        """
        ...

-    @webmethod(route="/providers/{provider_id}", method="GET")
+    @webmethod(route="/providers/{provider_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def inspect_provider(self, provider_id: str) -> ProviderInfo:
        """Get detailed information about a specific provider.

--- a/llama_stack/apis/resource.py
+++ b/llama_stack/apis/resource.py
@ -19,6 +19,7 @@ class ResourceType(StrEnum):
    benchmark = "benchmark"
    tool = "tool"
    tool_group = "tool_group"
+    prompt = "prompt"


 class Resource(BaseModel):
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -11,6 +11,7 @@ from pydantic import BaseModel, Field

 from llama_stack.apis.inference import Message
 from llama_stack.apis.shields import Shield
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod

@ -97,7 +98,7 @@ class ShieldStore(Protocol):
 class Safety(Protocol):
    shield_store: ShieldStore

-    @webmethod(route="/safety/run-shield", method="POST")
+    @webmethod(route="/safety/run-shield", method="POST", level=LLAMA_STACK_API_V1)
    async def run_shield(
        self,
        shield_id: str,
@ -113,7 +114,7 @@ class Safety(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/moderations", method="POST")
+    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
        """Classifies if text and/or image inputs are potentially harmful.
        :param input: Input (or inputs) to classify.
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@ -9,6 +9,7 @@ from typing import Any, Protocol, runtime_checkable
 from pydantic import BaseModel

 from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod

 # mapping of metric to value
@ -61,7 +62,7 @@ class ScoringFunctionStore(Protocol):
 class Scoring(Protocol):
    scoring_function_store: ScoringFunctionStore

-    @webmethod(route="/scoring/score-batch", method="POST")
+    @webmethod(route="/scoring/score-batch", method="POST", level=LLAMA_STACK_API_V1)
    async def score_batch(
        self,
        dataset_id: str,
@ -77,7 +78,7 @@ class Scoring(Protocol):
        """
        ...

-    @webmethod(route="/scoring/score", method="POST")
+    @webmethod(route="/scoring/score", method="POST", level=LLAMA_STACK_API_V1)
    async def score(
        self,
        input_rows: list[dict[str, Any]],
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@ -18,6 +18,7 @@ from pydantic import BaseModel, Field

 from llama_stack.apis.common.type_system import ParamType
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod


@ -160,7 +161,7 @@ class ListScoringFunctionsResponse(BaseModel):

@runtime_checkable
 class ScoringFunctions(Protocol):
-    @webmethod(route="/scoring-functions", method="GET")
+    @webmethod(route="/scoring-functions", method="GET", level=LLAMA_STACK_API_V1)
    async def list_scoring_functions(self) -> ListScoringFunctionsResponse:
        """List all scoring functions.

@ -168,7 +169,7 @@ class ScoringFunctions(Protocol):
        """
        ...

-    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET")
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn:
        """Get a scoring function by its ID.

@ -177,7 +178,7 @@ class ScoringFunctions(Protocol):
        """
        ...

-    @webmethod(route="/scoring-functions", method="POST")
+    @webmethod(route="/scoring-functions", method="POST", level=LLAMA_STACK_API_V1)
    async def register_scoring_function(
        self,
        scoring_fn_id: str,
@ -197,3 +198,11 @@ class ScoringFunctions(Protocol):
        :param params: The parameters for the scoring function for benchmark eval, these can be overridden for app eval.
        """
        ...
+
+    @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
+    async def unregister_scoring_function(self, scoring_fn_id: str) -> None:
+        """Unregister a scoring function.
+
+        :param scoring_fn_id: The ID of the scoring function to unregister.
+        """
+        ...
--- a/llama_stack/apis/shields/shields.py
+++ b/llama_stack/apis/shields/shields.py
@ -9,6 +9,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
 from pydantic import BaseModel

 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod

@ -49,7 +50,7 @@ class ListShieldsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class Shields(Protocol):
-    @webmethod(route="/shields", method="GET")
+    @webmethod(route="/shields", method="GET", level=LLAMA_STACK_API_V1)
    async def list_shields(self) -> ListShieldsResponse:
        """List all shields.

@ -57,7 +58,7 @@ class Shields(Protocol):
        """
        ...

-    @webmethod(route="/shields/{identifier:path}", method="GET")
+    @webmethod(route="/shields/{identifier:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_shield(self, identifier: str) -> Shield:
        """Get a shield by its identifier.

@ -66,7 +67,7 @@ class Shields(Protocol):
        """
        ...

-    @webmethod(route="/shields", method="POST")
+    @webmethod(route="/shields", method="POST", level=LLAMA_STACK_API_V1)
    async def register_shield(
        self,
        shield_id: str,
@ -84,7 +85,7 @@ class Shields(Protocol):
        """
        ...

-    @webmethod(route="/shields/{identifier:path}", method="DELETE")
+    @webmethod(route="/shields/{identifier:path}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def unregister_shield(self, identifier: str) -> None:
        """Unregister a shield.

--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@ -10,6 +10,7 @@ from typing import Any, Protocol
 from pydantic import BaseModel

 from llama_stack.apis.inference import Message
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.schema_utils import json_schema_type, webmethod


@ -59,7 +60,7 @@ class SyntheticDataGenerationResponse(BaseModel):


 class SyntheticDataGeneration(Protocol):
-    @webmethod(route="/synthetic-data-generation/generate")
+    @webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1)
    def synthetic_data_generate(
        self,
        dialogs: list[Message],
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@ -16,6 +16,7 @@ from typing import (

 from pydantic import BaseModel, Field

+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.models.llama.datatypes import Primitive
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

@ -412,7 +413,7 @@ class QueryMetricsResponse(BaseModel):

@runtime_checkable
 class Telemetry(Protocol):
-    @webmethod(route="/telemetry/events", method="POST")
+    @webmethod(route="/telemetry/events", method="POST", level=LLAMA_STACK_API_V1)
    async def log_event(
        self,
        event: Event,
@ -425,7 +426,7 @@ class Telemetry(Protocol):
        """
        ...

-    @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(route="/telemetry/traces", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
    async def query_traces(
        self,
        attribute_filters: list[QueryCondition] | None = None,
@ -443,7 +444,9 @@ class Telemetry(Protocol):
        """
        ...

-    @webmethod(route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/traces/{trace_id:path}", method="GET", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
+    )
    async def get_trace(self, trace_id: str) -> Trace:
        """Get a trace by its ID.

@ -453,7 +456,10 @@ class Telemetry(Protocol):
        ...

    @webmethod(
-        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}", method="GET", required_scope=REQUIRED_SCOPE
+        route="/telemetry/traces/{trace_id:path}/spans/{span_id:path}",
+        method="GET",
+        required_scope=REQUIRED_SCOPE,
+        level=LLAMA_STACK_API_V1,
    )
    async def get_span(self, trace_id: str, span_id: str) -> Span:
        """Get a span by its ID.
@ -464,7 +470,12 @@ class Telemetry(Protocol):
        """
        ...

-    @webmethod(route="/telemetry/spans/{span_id:path}/tree", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/spans/{span_id:path}/tree",
+        method="POST",
+        required_scope=REQUIRED_SCOPE,
+        level=LLAMA_STACK_API_V1,
+    )
    async def get_span_tree(
        self,
        span_id: str,
@ -480,7 +491,7 @@ class Telemetry(Protocol):
        """
        ...

-    @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(route="/telemetry/spans", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1)
    async def query_spans(
        self,
        attribute_filters: list[QueryCondition],
@ -496,7 +507,7 @@ class Telemetry(Protocol):
        """
        ...

-    @webmethod(route="/telemetry/spans/export", method="POST")
+    @webmethod(route="/telemetry/spans/export", method="POST", level=LLAMA_STACK_API_V1)
    async def save_spans_to_dataset(
        self,
        attribute_filters: list[QueryCondition],
@ -513,7 +524,9 @@ class Telemetry(Protocol):
        """
        ...

-    @webmethod(route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE)
+    @webmethod(
+        route="/telemetry/metrics/{metric_name}", method="POST", required_scope=REQUIRED_SCOPE, level=LLAMA_STACK_API_V1
+    )
    async def query_metrics(
        self,
        metric_name: str,
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@ -11,6 +11,7 @@ from pydantic import BaseModel, Field, field_validator
 from typing_extensions import runtime_checkable

 from llama_stack.apis.common.content_types import URL, InterleavedContent
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, register_schema, webmethod

@ -185,7 +186,7 @@ class RAGQueryConfig(BaseModel):
@runtime_checkable
@trace_protocol
 class RAGToolRuntime(Protocol):
-    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST")
+    @webmethod(route="/tool-runtime/rag-tool/insert", method="POST", level=LLAMA_STACK_API_V1)
    async def insert(
        self,
        documents: list[RAGDocument],
@ -200,7 +201,7 @@ class RAGToolRuntime(Protocol):
        """
        ...

-    @webmethod(route="/tool-runtime/rag-tool/query", method="POST")
+    @webmethod(route="/tool-runtime/rag-tool/query", method="POST", level=LLAMA_STACK_API_V1)
    async def query(
        self,
        content: InterleavedContent,
--- a/llama_stack/apis/tools/tools.py
+++ b/llama_stack/apis/tools/tools.py
@ -12,6 +12,7 @@ from typing_extensions import runtime_checkable

 from llama_stack.apis.common.content_types import URL, InterleavedContent
 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod

@ -151,7 +152,7 @@ class ListToolDefsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class ToolGroups(Protocol):
-    @webmethod(route="/toolgroups", method="POST")
+    @webmethod(route="/toolgroups", method="POST", level=LLAMA_STACK_API_V1)
    async def register_tool_group(
        self,
        toolgroup_id: str,
@ -168,7 +169,7 @@ class ToolGroups(Protocol):
        """
        ...

-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET")
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_tool_group(
        self,
        toolgroup_id: str,
@ -180,7 +181,7 @@ class ToolGroups(Protocol):
        """
        ...

-    @webmethod(route="/toolgroups", method="GET")
+    @webmethod(route="/toolgroups", method="GET", level=LLAMA_STACK_API_V1)
    async def list_tool_groups(self) -> ListToolGroupsResponse:
        """List tool groups with optional provider.

@ -188,7 +189,7 @@ class ToolGroups(Protocol):
        """
        ...

-    @webmethod(route="/tools", method="GET")
+    @webmethod(route="/tools", method="GET", level=LLAMA_STACK_API_V1)
    async def list_tools(self, toolgroup_id: str | None = None) -> ListToolsResponse:
        """List tools with optional tool group.

@ -197,7 +198,7 @@ class ToolGroups(Protocol):
        """
        ...

-    @webmethod(route="/tools/{tool_name:path}", method="GET")
+    @webmethod(route="/tools/{tool_name:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_tool(
        self,
        tool_name: str,
@ -209,7 +210,7 @@ class ToolGroups(Protocol):
        """
        ...

-    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE")
+    @webmethod(route="/toolgroups/{toolgroup_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def unregister_toolgroup(
        self,
        toolgroup_id: str,
@ -238,7 +239,7 @@ class ToolRuntime(Protocol):
    rag_tool: RAGToolRuntime | None = None

    # TODO: This needs to be renamed once OPEN API generator name conflict issue is fixed.
-    @webmethod(route="/tool-runtime/list-tools", method="GET")
+    @webmethod(route="/tool-runtime/list-tools", method="GET", level=LLAMA_STACK_API_V1)
    async def list_runtime_tools(
        self, tool_group_id: str | None = None, mcp_endpoint: URL | None = None
    ) -> ListToolDefsResponse:
@ -250,7 +251,7 @@ class ToolRuntime(Protocol):
        """
        ...

-    @webmethod(route="/tool-runtime/invoke", method="POST")
+    @webmethod(route="/tool-runtime/invoke", method="POST", level=LLAMA_STACK_API_V1)
    async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult:
        """Run a tool with the given arguments.

--- a/llama_stack/apis/vector_dbs/vector_dbs.py
+++ b/llama_stack/apis/vector_dbs/vector_dbs.py
@ -9,6 +9,7 @@ from typing import Literal, Protocol, runtime_checkable
 from pydantic import BaseModel

 from llama_stack.apis.resource import Resource, ResourceType
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.schema_utils import json_schema_type, webmethod

@ -65,7 +66,7 @@ class ListVectorDBsResponse(BaseModel):
@runtime_checkable
@trace_protocol
 class VectorDBs(Protocol):
-    @webmethod(route="/vector-dbs", method="GET")
+    @webmethod(route="/vector-dbs", method="GET", level=LLAMA_STACK_API_V1)
    async def list_vector_dbs(self) -> ListVectorDBsResponse:
        """List all vector databases.

@ -73,7 +74,7 @@ class VectorDBs(Protocol):
        """
        ...

-    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET")
+    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_vector_db(
        self,
        vector_db_id: str,
@ -85,7 +86,7 @@ class VectorDBs(Protocol):
        """
        ...

-    @webmethod(route="/vector-dbs", method="POST")
+    @webmethod(route="/vector-dbs", method="POST", level=LLAMA_STACK_API_V1)
    async def register_vector_db(
        self,
        vector_db_id: str,
@ -107,7 +108,7 @@ class VectorDBs(Protocol):
        """
        ...

-    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE")
+    @webmethod(route="/vector-dbs/{vector_db_id:path}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def unregister_vector_db(self, vector_db_id: str) -> None:
        """Unregister a vector database.

--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -15,6 +15,7 @@ from pydantic import BaseModel, Field

 from llama_stack.apis.inference import InterleavedContent
 from llama_stack.apis.vector_dbs import VectorDB
+from llama_stack.apis.version import LLAMA_STACK_API_V1
 from llama_stack.providers.utils.telemetry.trace_protocol import trace_protocol
 from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
 from llama_stack.schema_utils import json_schema_type, webmethod
@ -437,7 +438,7 @@ class VectorIO(Protocol):

    # this will just block now until chunks are inserted, but it should
    # probably return a Job instance which can be polled for completion
-    @webmethod(route="/vector-io/insert", method="POST")
+    @webmethod(route="/vector-io/insert", method="POST", level=LLAMA_STACK_API_V1)
    async def insert_chunks(
        self,
        vector_db_id: str,
@ -455,7 +456,7 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/vector-io/query", method="POST")
+    @webmethod(route="/vector-io/query", method="POST", level=LLAMA_STACK_API_V1)
    async def query_chunks(
        self,
        vector_db_id: str,
@ -472,7 +473,7 @@ class VectorIO(Protocol):
        ...

    # OpenAI Vector Stores API endpoints
-    @webmethod(route="/openai/v1/vector_stores", method="POST")
+    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_create_vector_store(
        self,
        name: str | None = None,
@ -498,7 +499,7 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores", method="GET")
+    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_vector_stores(
        self,
        limit: int | None = 20,
@ -516,7 +517,7 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_vector_store(
        self,
        vector_store_id: str,
@ -528,7 +529,7 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_update_vector_store(
        self,
        vector_store_id: str,
@ -546,7 +547,7 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_vector_store(
        self,
        vector_store_id: str,
@ -558,7 +559,7 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_search_vector_store(
        self,
        vector_store_id: str,
@ -584,7 +585,7 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_attach_file_to_vector_store(
        self,
        vector_store_id: str,
@ -602,7 +603,7 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET")
+    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_files_in_vector_store(
        self,
        vector_store_id: str,
@ -624,7 +625,9 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
+    )
    async def openai_retrieve_vector_store_file(
        self,
        vector_store_id: str,
@ -638,7 +641,11 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content", method="GET")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
    async def openai_retrieve_vector_store_file_contents(
        self,
        vector_store_id: str,
@ -652,7 +659,9 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
+    )
    async def openai_update_vector_store_file(
        self,
        vector_store_id: str,
@ -668,7 +677,9 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE")
+    @webmethod(
+        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
+    )
    async def openai_delete_vector_store_file(
        self,
        vector_store_id: str,
--- a/llama_stack/apis/version.py
+++ b/llama_stack/apis/version.py
@ -4,4 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

-LLAMA_STACK_API_VERSION = "v1"
+LLAMA_STACK_API_V1 = "v1"
+LLAMA_STACK_API_V1BETA = "v1beta"
+LLAMA_STACK_API_V1ALPHA = "v1alpha"