Add version to REST API url (#478)

# What does this PR do? Adds a `/alpha/` prefix to all the REST API urls. Also makes them all use hyphens instead of underscores as is more standard practice. (This is based on feedback from our partners.) ## Test Plan The Stack itself does not need updating. However, client SDKs and documentation will need to be updated.
2025-06-27 18:50:41 +00:00 · 2024-11-18 22:44:14 -08:00 · 2024-11-18 22:44:14 -08:00 · 0dc7f5fa89
commit 0dc7f5fa89
parent 05e93bd2f7
18 changed files with 32842 additions and 6032 deletions
--- a/docs/openapi_generator/generate.py
+++ b/docs/openapi_generator/generate.py
@ -31,7 +31,12 @@ from .strong_typing.schema import json_schema_type

 schema_utils.json_schema_type = json_schema_type

-from llama_stack.distribution.stack import LlamaStack
+# this line needs to be here to ensure json_schema_type has been altered before
+# the imports use the annotation
+from llama_stack.distribution.stack import (  # noqa: E402
+    LLAMA_STACK_API_VERSION,
+    LlamaStack,
+)


 def main(output_dir: str):
@ -50,7 +55,7 @@ def main(output_dir: str):
            server=Server(url="http://any-hosted-llama-stack.com"),
            info=Info(
                title="[DRAFT] Llama Stack Specification",
-                version="0.0.1",
+                version=LLAMA_STACK_API_VERSION,
                description="""This is the specification of the llama stack that provides
                a set of endpoints and their corresponding interfaces that are tailored to
                best leverage Llama Models. The specification is still in draft and subject to change.
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -202,7 +202,9 @@ class ContentBuilder:
    ) -> MediaType:
        schema = self.schema_builder.classdef_to_ref(item_type)
        if self.schema_transformer:
-            schema_transformer: Callable[[SchemaOrRef], SchemaOrRef] = self.schema_transformer  # type: ignore
+            schema_transformer: Callable[[SchemaOrRef], SchemaOrRef] = (
+                self.schema_transformer
+            )  # type: ignore
            schema = schema_transformer(schema)

        if not examples:
@ -630,6 +632,7 @@ class Generator:
                raise NotImplementedError(f"unknown HTTP method: {op.http_method}")

            route = op.get_route()
+            print(f"route: {route}")
            if route in paths:
                paths[route].update(pathItem)
            else:
--- a/docs/openapi_generator/pyopenapi/operations.py
+++ b/docs/openapi_generator/pyopenapi/operations.py
@ -12,6 +12,8 @@ import uuid
 from dataclasses import dataclass
 from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union

+from llama_stack.distribution.stack import LLAMA_STACK_API_VERSION
+
 from termcolor import colored

 from ..strong_typing.inspection import (
@ -111,9 +113,12 @@ class EndpointOperation:

    def get_route(self) -> str:
        if self.route is not None:
-            return self.route
+            assert (
+                "_" not in self.route
+            ), f"route should not contain underscores: {self.route}"
+            return "/".join(["", LLAMA_STACK_API_VERSION, self.route.lstrip("/")])

-        route_parts = ["", self.name]
+        route_parts = ["", LLAMA_STACK_API_VERSION, self.name]
        for param_name, _ in self.path_params:
            route_parts.append("{" + param_name + "}")
        return "/".join(route_parts)
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
--- a/llama_stack/apis/batch_inference/batch_inference.py
+++ b/llama_stack/apis/batch_inference/batch_inference.py
@ -49,7 +49,7 @@ class BatchChatCompletionResponse(BaseModel):

@runtime_checkable
 class BatchInference(Protocol):
-    @webmethod(route="/batch_inference/completion")
+    @webmethod(route="/batch-inference/completion")
    async def batch_completion(
        self,
        model: str,
@ -58,7 +58,7 @@ class BatchInference(Protocol):
        logprobs: Optional[LogProbConfig] = None,
    ) -> BatchCompletionResponse: ...

-    @webmethod(route="/batch_inference/chat_completion")
+    @webmethod(route="/batch-inference/chat-completion")
    async def batch_chat_completion(
        self,
        model: str,
--- a/llama_stack/apis/datasetio/datasetio.py
+++ b/llama_stack/apis/datasetio/datasetio.py
@ -29,7 +29,7 @@ class DatasetIO(Protocol):
    # keeping for aligning with inference/safety, but this is not used
    dataset_store: DatasetStore

-    @webmethod(route="/datasetio/get_rows_paginated", method="GET")
+    @webmethod(route="/datasetio/get-rows-paginated", method="GET")
    async def get_rows_paginated(
        self,
        dataset_id: str,
--- a/llama_stack/apis/eval/eval.py
+++ b/llama_stack/apis/eval/eval.py
@ -74,14 +74,14 @@ class EvaluateResponse(BaseModel):


 class Eval(Protocol):
-    @webmethod(route="/eval/run_eval", method="POST")
+    @webmethod(route="/eval/run-eval", method="POST")
    async def run_eval(
        self,
        task_id: str,
        task_config: EvalTaskConfig,
    ) -> Job: ...

-    @webmethod(route="/eval/evaluate_rows", method="POST")
+    @webmethod(route="/eval/evaluate-rows", method="POST")
    async def evaluate_rows(
        self,
        task_id: str,
--- a/llama_stack/apis/eval_tasks/eval_tasks.py
+++ b/llama_stack/apis/eval_tasks/eval_tasks.py
@ -42,13 +42,13 @@ class EvalTaskInput(CommonEvalTaskFields, BaseModel):

@runtime_checkable
 class EvalTasks(Protocol):
-    @webmethod(route="/eval_tasks/list", method="GET")
+    @webmethod(route="/eval-tasks/list", method="GET")
    async def list_eval_tasks(self) -> List[EvalTask]: ...

-    @webmethod(route="/eval_tasks/get", method="GET")
+    @webmethod(route="/eval-tasks/get", method="GET")
    async def get_eval_task(self, name: str) -> Optional[EvalTask]: ...

-    @webmethod(route="/eval_tasks/register", method="POST")
+    @webmethod(route="/eval-tasks/register", method="POST")
    async def register_eval_task(
        self,
        eval_task_id: str,
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -234,7 +234,7 @@ class Inference(Protocol):
        logprobs: Optional[LogProbConfig] = None,
    ) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]: ...

-    @webmethod(route="/inference/chat_completion")
+    @webmethod(route="/inference/chat-completion")
    async def chat_completion(
        self,
        model_id: str,
--- a/llama_stack/apis/memory_banks/memory_banks.py
+++ b/llama_stack/apis/memory_banks/memory_banks.py
@ -130,13 +130,13 @@ class MemoryBankInput(BaseModel):

@runtime_checkable
 class MemoryBanks(Protocol):
-    @webmethod(route="/memory_banks/list", method="GET")
+    @webmethod(route="/memory-banks/list", method="GET")
    async def list_memory_banks(self) -> List[MemoryBank]: ...

-    @webmethod(route="/memory_banks/get", method="GET")
+    @webmethod(route="/memory-banks/get", method="GET")
    async def get_memory_bank(self, memory_bank_id: str) -> Optional[MemoryBank]: ...

-    @webmethod(route="/memory_banks/register", method="POST")
+    @webmethod(route="/memory-banks/register", method="POST")
    async def register_memory_bank(
        self,
        memory_bank_id: str,
@ -145,5 +145,5 @@ class MemoryBanks(Protocol):
        provider_memory_bank_id: Optional[str] = None,
    ) -> MemoryBank: ...

-    @webmethod(route="/memory_banks/unregister", method="POST")
+    @webmethod(route="/memory-banks/unregister", method="POST")
    async def unregister_memory_bank(self, memory_bank_id: str) -> None: ...
--- a/llama_stack/apis/post_training/post_training.py
+++ b/llama_stack/apis/post_training/post_training.py
@ -176,7 +176,7 @@ class PostTrainingJobArtifactsResponse(BaseModel):


 class PostTraining(Protocol):
-    @webmethod(route="/post_training/supervised_fine_tune")
+    @webmethod(route="/post-training/supervised-fine-tune")
    def supervised_fine_tune(
        self,
        job_uuid: str,
@ -193,7 +193,7 @@ class PostTraining(Protocol):
        logger_config: Dict[str, Any],
    ) -> PostTrainingJob: ...

-    @webmethod(route="/post_training/preference_optimize")
+    @webmethod(route="/post-training/preference-optimize")
    def preference_optimize(
        self,
        job_uuid: str,
@ -208,22 +208,22 @@ class PostTraining(Protocol):
        logger_config: Dict[str, Any],
    ) -> PostTrainingJob: ...

-    @webmethod(route="/post_training/jobs")
+    @webmethod(route="/post-training/jobs")
    def get_training_jobs(self) -> List[PostTrainingJob]: ...

    # sends SSE stream of logs
-    @webmethod(route="/post_training/job/logs")
+    @webmethod(route="/post-training/job/logs")
    def get_training_job_logstream(self, job_uuid: str) -> PostTrainingJobLogStream: ...

-    @webmethod(route="/post_training/job/status")
+    @webmethod(route="/post-training/job/status")
    def get_training_job_status(
        self, job_uuid: str
    ) -> PostTrainingJobStatusResponse: ...

-    @webmethod(route="/post_training/job/cancel")
+    @webmethod(route="/post-training/job/cancel")
    def cancel_training_job(self, job_uuid: str) -> None: ...

-    @webmethod(route="/post_training/job/artifacts")
+    @webmethod(route="/post-training/job/artifacts")
    def get_training_job_artifacts(
        self, job_uuid: str
    ) -> PostTrainingJobArtifactsResponse: ...
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -46,7 +46,7 @@ class ShieldStore(Protocol):
 class Safety(Protocol):
    shield_store: ShieldStore

-    @webmethod(route="/safety/run_shield")
+    @webmethod(route="/safety/run-shield")
    async def run_shield(
        self,
        shield_id: str,
--- a/llama_stack/apis/scoring/scoring.py
+++ b/llama_stack/apis/scoring/scoring.py
@ -44,7 +44,7 @@ class ScoringFunctionStore(Protocol):
 class Scoring(Protocol):
    scoring_function_store: ScoringFunctionStore

-    @webmethod(route="/scoring/score_batch")
+    @webmethod(route="/scoring/score-batch")
    async def score_batch(
        self,
        dataset_id: str,
--- a/llama_stack/apis/scoring_functions/scoring_functions.py
+++ b/llama_stack/apis/scoring_functions/scoring_functions.py
@ -104,13 +104,13 @@ class ScoringFnInput(CommonScoringFnFields, BaseModel):

@runtime_checkable
 class ScoringFunctions(Protocol):
-    @webmethod(route="/scoring_functions/list", method="GET")
+    @webmethod(route="/scoring-functions/list", method="GET")
    async def list_scoring_functions(self) -> List[ScoringFn]: ...

-    @webmethod(route="/scoring_functions/get", method="GET")
+    @webmethod(route="/scoring-functions/get", method="GET")
    async def get_scoring_function(self, scoring_fn_id: str) -> Optional[ScoringFn]: ...

-    @webmethod(route="/scoring_functions/register", method="POST")
+    @webmethod(route="/scoring-functions/register", method="POST")
    async def register_scoring_function(
        self,
        scoring_fn_id: str,
--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@ -44,7 +44,7 @@ class SyntheticDataGenerationResponse(BaseModel):


 class SyntheticDataGeneration(Protocol):
-    @webmethod(route="/synthetic_data_generation/generate")
+    @webmethod(route="/synthetic-data-generation/generate")
    def synthetic_data_generate(
        self,
        dialogs: List[Message],
--- a/llama_stack/apis/telemetry/telemetry.py
+++ b/llama_stack/apis/telemetry/telemetry.py
@ -125,8 +125,8 @@ Event = Annotated[

@runtime_checkable
 class Telemetry(Protocol):
-    @webmethod(route="/telemetry/log_event")
+    @webmethod(route="/telemetry/log-event")
    async def log_event(self, event: Event) -> None: ...

-    @webmethod(route="/telemetry/get_trace", method="GET")
+    @webmethod(route="/telemetry/get-trace", method="GET")
    async def get_trace(self, trace_id: str) -> Trace: ...
--- a/llama_stack/distribution/stack.py
+++ b/llama_stack/distribution/stack.py
@ -40,6 +40,9 @@ from llama_stack.distribution.store.registry import create_dist_registry
 from llama_stack.providers.datatypes import Api


+LLAMA_STACK_API_VERSION = "alpha"
+
+
 class LlamaStack(
    MemoryBanks,
    Inference,