feat(openai_movement)!: Change URL structures to kill /openai/v1 (part 2) (#3605)

2025-12-03 09:53:45 +00:00 · 2025-09-29 22:57:37 -07:00 · 2025-09-29 22:57:37 -07:00 · 56b625d18a
commit 56b625d18a
parent 3a09f00cdb
10 changed files with 3 additions and 2834 deletions
--- a/docs/docs/providers/openai.mdx
+++ b/docs/docs/providers/openai.mdx
@ -7,7 +7,7 @@ sidebar_position: 1

 ### Server path

-Llama Stack exposes an OpenAI-compatible API endpoint at `/v1/openai/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1/openai/v1`.
+Llama Stack exposes OpenAI-compatible API endpoints at `/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1`.

 ### Clients

@ -25,12 +25,12 @@ client = LlamaStackClient(base_url="http://localhost:8321")

 #### OpenAI Client

-When using an OpenAI client, set the `base_url` to the `/v1/openai/v1` path on your Llama Stack server.
+When using an OpenAI client, set the `base_url` to the `/v1` path on your Llama Stack server.

 ```python
 from openai import OpenAI

-client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none")
+client = OpenAI(base_url="http://localhost:8321/v1", api_key="none")
 ```

 Regardless of the client you choose, the following code examples should all work the same.
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@ -694,7 +694,6 @@ class Agents(Protocol):
    #
    # Both of these APIs are inherently stateful.

-    @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_openai_response(
        self,
@ -707,7 +706,6 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
    async def create_openai_response(
        self,
@ -733,7 +731,6 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_responses(
        self,
@ -752,7 +749,6 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
    async def list_openai_response_input_items(
        self,
@ -775,7 +771,6 @@ class Agents(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
        """Delete an OpenAI response by its ID.
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@ -43,7 +43,6 @@ class Batches(Protocol):
    Note: This API is currently under active development and may undergo changes.
    """

-    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
    async def create_batch(
        self,
@ -64,7 +63,6 @@ class Batches(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def retrieve_batch(self, batch_id: str) -> BatchObject:
        """Retrieve information about a specific batch.
@ -74,7 +72,6 @@ class Batches(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
    async def cancel_batch(self, batch_id: str) -> BatchObject:
        """Cancel a batch that is in progress.
@ -84,7 +81,6 @@ class Batches(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
    async def list_batches(
        self,
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@ -105,7 +105,6 @@ class OpenAIFileDeleteResponse(BaseModel):
@trace_protocol
 class Files(Protocol):
    # OpenAI Files API Endpoints
-    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_upload_file(
        self,
@ -128,7 +127,6 @@ class Files(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_files(
        self,
@ -148,7 +146,6 @@ class Files(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file(
        self,
@ -162,7 +159,6 @@ class Files(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_file(
        self,
@ -176,7 +172,6 @@ class Files(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_file_content(
        self,
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -1089,7 +1089,6 @@ class InferenceProvider(Protocol):
        raise NotImplementedError("Reranking is not implemented")
        return  # this is so mypy's safe-super rule will consider the method concrete

-    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_completion(
        self,
@ -1141,7 +1140,6 @@ class InferenceProvider(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_chat_completion(
        self,
@ -1198,7 +1196,6 @@ class InferenceProvider(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_embeddings(
        self,
@ -1228,7 +1225,6 @@ class Inference(InferenceProvider):
    - Embedding models: these models generate embeddings to be used for semantic search.
    """

-    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
    async def list_chat_completions(
        self,
@ -1247,7 +1243,6 @@ class Inference(InferenceProvider):
        """
        raise NotImplementedError("List chat completions is not implemented")

-    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
        """Describe a chat completion by its ID.
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@ -111,14 +111,6 @@ class Models(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
-    async def openai_list_models(self) -> OpenAIListModelsResponse:
-        """List models using the OpenAI API.
-
-        :returns: A OpenAIListModelsResponse.
-        """
-        ...
-
    @webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
    async def get_model(
        self,
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@ -114,7 +114,6 @@ class Safety(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
    async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
        """Classifies if text and/or image inputs are potentially harmful.
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -473,7 +473,6 @@ class VectorIO(Protocol):
        ...

    # OpenAI Vector Stores API endpoints
-    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_create_vector_store(
        self,
@ -500,7 +499,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_vector_stores(
        self,
@ -519,7 +517,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_vector_store(
        self,
@ -532,7 +529,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_update_vector_store(
        self,
@ -551,7 +547,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    @webmethod(route="/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_vector_store(
        self,
@ -564,7 +559,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_search_vector_store(
        self,
@ -591,7 +585,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
    @webmethod(route="/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_attach_file_to_vector_store(
        self,
@ -610,7 +603,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
    @webmethod(route="/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_list_files_in_vector_store(
        self,
@ -633,9 +625,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
-    )
    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
    async def openai_retrieve_vector_store_file(
        self,
@ -650,11 +639,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
-        method="GET",
-        level=LLAMA_STACK_API_V1,
-    )
    @webmethod(
        route="/vector_stores/{vector_store_id}/files/{file_id}/content",
        method="GET",
@ -673,9 +657,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
-    )
    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1)
    async def openai_update_vector_store_file(
        self,
@ -692,9 +673,6 @@ class VectorIO(Protocol):
        """
        ...

-    @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
-    )
    @webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
    async def openai_delete_vector_store_file(
        self,