feat(openai_movement): Change URL structures to kill /openai/v1 (part 1) (#3587)

The `/v1/openai/v1` prefix is annoying and now unnecessary given our clearer focus on how to think about the API surface. Let's kill it for the 0.3.0 update. To make client-side changes feasible, we will do this in two parts. This part adds a new route (sans `/openai/v1`) so the existing client continues to work since the server supports both. The next PR will be client-side (Stainless) changes which I will be making shortly. The final PR will remove the `/openai/v1` routes. Note that all these changes will happen rapidly within this release cycle. The entire set _will be backwards incompatible_.
2025-12-03 09:53:45 +00:00 · 2025-09-29 16:14:35 -07:00 · 2025-09-29 16:14:35 -07:00 · 5e7fed8bbb
commit 5e7fed8bbb
parent ddf3f1735a
11 changed files with 2715 additions and 5 deletions
--- a/.github/workflows/conformance.yml
+++ b/.github/workflows/conformance.yml
@ -66,6 +66,4 @@ jobs:
      # This step will fail if incompatible changes are detected, preventing breaking changes from being merged
      - name: Run OpenAPI Breaking Change Diff
        run: |
-          oasdiff breaking --fail-on ERR base/docs/static/llama-stack-spec.yaml docs/static/llama-stack-spec.yaml --match-path '^/v1/openai/v1' \
-          --match-path '^/v1/vector-io' \
-          --match-path '^/v1/vector-dbs'
+          oasdiff breaking --fail-on ERR base/docs/static/llama-stack-spec.yaml docs/static/llama-stack-spec.yaml --match-path '^/v1/'
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -695,6 +695,7 @@ class Agents(Protocol):
    # Both of these APIs are inherently stateful.

    @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_openai_response(
        self,
        response_id: str,
@ -707,6 +708,7 @@ class Agents(Protocol):
        ...

    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
    async def create_openai_response(
        self,
        input: str | list[OpenAIResponseInput],
@ -732,6 +734,7 @@ class Agents(Protocol):
        ...

    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_responses(
        self,
        after: str | None = None,
@ -750,6 +753,7 @@ class Agents(Protocol):
        ...

    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_response_input_items(
        self,
        response_id: str,
@ -772,6 +776,7 @@ class Agents(Protocol):
        ...

    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
        """Delete an OpenAI response by its ID.

--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@ -44,6 +44,7 @@ class Batches(Protocol):
    """

    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
    async def create_batch(
        self,
        input_file_id: str,
@ -64,6 +65,7 @@ class Batches(Protocol):
        ...

    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def retrieve_batch(self, batch_id: str) -> BatchObject:
        """Retrieve information about a specific batch.

@ -73,6 +75,7 @@ class Batches(Protocol):
        ...

    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
    async def cancel_batch(self, batch_id: str) -> BatchObject:
        """Cancel a batch that is in progress.

@ -82,6 +85,7 @@ class Batches(Protocol):
        ...

    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
    async def list_batches(
        self,
        after: str | None = None,
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -106,6 +106,7 @@ class OpenAIFileDeleteResponse(BaseModel):
 class Files(Protocol):
    # OpenAI Files API Endpoints
    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_upload_file(
        self,
        file: Annotated[UploadFile, File()],
@ -129,6 +130,7 @@ class Files(Protocol):
        ...

    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_files(
        self,
        after: str | None = None,
@ -148,6 +150,7 @@ class Files(Protocol):
        ...

    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file(
        self,
        file_id: str,
@ -161,6 +164,7 @@ class Files(Protocol):
        ...

    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_file(
        self,
        file_id: str,
@ -174,6 +178,7 @@ class Files(Protocol):
        ...

    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file_content(
        self,
        file_id: str,
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1090,6 +1090,7 @@ class InferenceProvider(Protocol):
        return  # this is so mypy's safe-super rule will consider the method concrete

    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_completion(
        self,
        # Standard OpenAI completion parameters
@ -1141,6 +1142,7 @@ class InferenceProvider(Protocol):
        ...

    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_chat_completion(
        self,
        model: str,
@ -1197,6 +1199,7 @@ class InferenceProvider(Protocol):
        ...

    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_embeddings(
        self,
        model: str,
@ -1226,6 +1229,7 @@ class Inference(InferenceProvider):
    """

    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
    async def list_chat_completions(
        self,
        after: str | None = None,
@ -1244,6 +1248,7 @@ class Inference(InferenceProvider):
        raise NotImplementedError("List chat completions is not implemented")

    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
        """Describe a chat completion by its ID.

--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -115,6 +115,7 @@ class Safety(Protocol):
        ...

    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
        """Classifies if text and/or image inputs are potentially harmful.
        :param input: Input (or inputs) to classify.
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -474,6 +474,7 @@ class VectorIO(Protocol):

    # OpenAI Vector Stores API endpoints
    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_create_vector_store(
        self,
        name: str | None = None,
@ -500,6 +501,7 @@ class VectorIO(Protocol):
        ...

    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_vector_stores(
        self,
        limit: int | None = 20,
@ -518,6 +520,7 @@ class VectorIO(Protocol):
        ...

    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_vector_store(
        self,
        vector_store_id: str,
@ -530,6 +533,7 @@ class VectorIO(Protocol):
        ...

    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_update_vector_store(
        self,
        vector_store_id: str,
@ -548,6 +552,7 @@ class VectorIO(Protocol):
        ...

    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_vector_store(
        self,
        vector_store_id: str,
@ -560,6 +565,7 @@ class VectorIO(Protocol):
        ...

    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_search_vector_store(
        self,
        vector_store_id: str,
@ -586,6 +592,7 @@ class VectorIO(Protocol):
        ...

    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_attach_file_to_vector_store(
        self,
        vector_store_id: str,
@ -604,6 +611,7 @@ class VectorIO(Protocol):
        ...

    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_files_in_vector_store(
        self,
        vector_store_id: str,
@ -628,6 +636,7 @@ class VectorIO(Protocol):
    @webmethod(
        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
    )
+    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_vector_store_file(
        self,
        vector_store_id: str,
@ -646,6 +655,11 @@ class VectorIO(Protocol):
        method="GET",
        level=LLAMA_STACK_API_V1,
    )
+    @webmethod(
+        route="/vector_stores/{vector_store_id}/files/{file_id}/content",
+        method="GET",
+        level=LLAMA_STACK_API_V1,
+    )
    async def openai_retrieve_vector_store_file_contents(
        self,
        vector_store_id: str,
@ -662,6 +676,7 @@ class VectorIO(Protocol):
    @webmethod(
        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
    )
+    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_update_vector_store_file(
        self,
        vector_store_id: str,
@ -680,6 +695,7 @@ class VectorIO(Protocol):
    @webmethod(
        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
    )
+    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_vector_store_file(
        self,
        vector_store_id: str,
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -274,7 +274,7 @@ def require_server(llama_stack_client):

@pytest.fixture(scope="session")
 def openai_client(llama_stack_client, require_server):
-    base_url = f"{llama_stack_client.base_url}/v1/openai/v1"
+    base_url = f"{llama_stack_client.base_url}/v1"
    return OpenAI(base_url=base_url, api_key="fake")


--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@ -87,7 +87,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):

@pytest.fixture
 def openai_client(client_with_models):
-    base_url = f"{client_with_models.base_url}/v1/openai/v1"
+    base_url = f"{client_with_models.base_url}/v1"
    return OpenAI(base_url=base_url, api_key="fake")