From bcbe5f32a5046bb72c01d6aa7e23f8b5905358a1 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Sat, 27 Sep 2025 13:06:41 -0700
Subject: [PATCH] feat(openai_movement)!: Change URL structures to kill
 /openai/v1

---
 llama_stack/apis/agents/agents.py             | 10 ++++----
 llama_stack/apis/batches/batches.py           |  8 +++----
 llama_stack/apis/files/files.py               | 10 ++++----
 llama_stack/apis/inference/inference.py       | 10 ++++----
 llama_stack/apis/models/models.py             |  2 +-
 llama_stack/apis/safety/safety.py             |  2 +-
 llama_stack/apis/vector_io/vector_io.py       | 24 +++++++++----------
 tests/integration/fixtures/common.py          |  2 +-
 .../inference/test_openai_embeddings.py       |  2 +-
 9 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py
index e53ca82e2..e8d0c467a 100644
--- a/llama_stack/apis/agents/agents.py
+++ b/llama_stack/apis/agents/agents.py
@@ -694,7 +694,7 @@ class Agents(Protocol):
     #
     # Both of these APIs are inherently stateful.
 
-    @webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_openai_response(
         self,
         response_id: str,
@@ -706,7 +706,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
     async def create_openai_response(
         self,
         input: str | list[OpenAIResponseInput],
@@ -731,7 +731,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_responses(
         self,
         after: str | None = None,
@@ -749,7 +749,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
     async def list_openai_response_input_items(
         self,
         response_id: str,
@@ -771,7 +771,7 @@ class Agents(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
         """Delete an OpenAI response by its ID.
 
diff --git a/llama_stack/apis/batches/batches.py b/llama_stack/apis/batches/batches.py
index 5890cbe04..1ee9fdb15 100644
--- a/llama_stack/apis/batches/batches.py
+++ b/llama_stack/apis/batches/batches.py
@@ -43,7 +43,7 @@ class Batches(Protocol):
     Note: This API is currently under active development and may undergo changes.
     """
 
-    @webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
     async def create_batch(
         self,
         input_file_id: str,
@@ -63,7 +63,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def retrieve_batch(self, batch_id: str) -> BatchObject:
         """Retrieve information about a specific batch.
 
@@ -72,7 +72,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
     async def cancel_batch(self, batch_id: str) -> BatchObject:
         """Cancel a batch that is in progress.
 
@@ -81,7 +81,7 @@ class Batches(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
     async def list_batches(
         self,
         after: str | None = None,
diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py
index 7e45b55ee..a0f7a5ef2 100644
--- a/llama_stack/apis/files/files.py
+++ b/llama_stack/apis/files/files.py
@@ -105,7 +105,7 @@ class OpenAIFileDeleteResponse(BaseModel):
 @trace_protocol
 class Files(Protocol):
     # OpenAI Files API Endpoints
-    @webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_upload_file(
         self,
         file: Annotated[UploadFile, File()],
@@ -128,7 +128,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files(
         self,
         after: str | None = None,
@@ -147,7 +147,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file(
         self,
         file_id: str,
@@ -160,7 +160,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_file(
         self,
         file_id: str,
@@ -173,7 +173,7 @@ class Files(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_file_content(
         self,
         file_id: str,
diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py
index 756896796..6d266fe25 100644
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@@ -1109,7 +1109,7 @@ class InferenceProvider(Protocol):
         raise NotImplementedError("Reranking is not implemented")
         return  # this is so mypy's safe-super rule will consider the method concrete
 
-    @webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_completion(
         self,
         # Standard OpenAI completion parameters
@@ -1160,7 +1160,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_chat_completion(
         self,
         model: str,
@@ -1216,7 +1216,7 @@ class InferenceProvider(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_embeddings(
         self,
         model: str,
@@ -1245,7 +1245,7 @@ class Inference(InferenceProvider):
     - Embedding models: these models generate embeddings to be used for semantic search.
     """
 
-    @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
     async def list_chat_completions(
         self,
         after: str | None = None,
@@ -1263,7 +1263,7 @@ class Inference(InferenceProvider):
         """
         raise NotImplementedError("List chat completions is not implemented")
 
-    @webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
         """Describe a chat completion by its ID.
 
diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py
index a4f6a888b..17cf2fc58 100644
--- a/llama_stack/apis/models/models.py
+++ b/llama_stack/apis/models/models.py
@@ -111,7 +111,7 @@ class Models(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_models(self) -> OpenAIListModelsResponse:
         """List models using the OpenAI API.
 
diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py
index 98367e9b0..bf37b496a 100644
--- a/llama_stack/apis/safety/safety.py
+++ b/llama_stack/apis/safety/safety.py
@@ -114,7 +114,7 @@ class Safety(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
     async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
         """Classifies if text and/or image inputs are potentially harmful.
         :param input: Input (or inputs) to classify.
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 2850863c4..6371880ed 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -473,7 +473,7 @@ class VectorIO(Protocol):
         ...
 
     # OpenAI Vector Stores API endpoints
-    @webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_create_vector_store(
         self,
         name: str | None = None,
@@ -499,7 +499,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_vector_stores(
         self,
         limit: int | None = 20,
@@ -517,7 +517,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_retrieve_vector_store(
         self,
         vector_store_id: str,
@@ -529,7 +529,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_update_vector_store(
         self,
         vector_store_id: str,
@@ -547,7 +547,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
     async def openai_delete_vector_store(
         self,
         vector_store_id: str,
@@ -559,7 +559,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_search_vector_store(
         self,
         vector_store_id: str,
@@ -585,7 +585,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
     async def openai_attach_file_to_vector_store(
         self,
         vector_store_id: str,
@@ -603,7 +603,7 @@ class VectorIO(Protocol):
         """
         ...
 
-    @webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
+    @webmethod(route="/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
     async def openai_list_files_in_vector_store(
         self,
         vector_store_id: str,
@@ -626,7 +626,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
+        route="/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
     )
     async def openai_retrieve_vector_store_file(
         self,
@@ -642,7 +642,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+        route="/vector_stores/{vector_store_id}/files/{file_id}/content",
         method="GET",
         level=LLAMA_STACK_API_V1,
     )
@@ -660,7 +660,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
+        route="/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
     )
     async def openai_update_vector_store_file(
         self,
@@ -678,7 +678,7 @@ class VectorIO(Protocol):
         ...
 
     @webmethod(
-        route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
+        route="/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
     )
     async def openai_delete_vector_store_file(
         self,
diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py
index ee4c5755a..a56da83c3 100644
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@@ -274,7 +274,7 @@ def require_server(llama_stack_client):
 
 @pytest.fixture(scope="session")
 def openai_client(llama_stack_client, require_server):
-    base_url = f"{llama_stack_client.base_url}/v1/openai/v1"
+    base_url = f"{llama_stack_client.base_url}/v1"
     return OpenAI(base_url=base_url, api_key="fake")
 
 
diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py
index 92064b651..84e92706a 100644
--- a/tests/integration/inference/test_openai_embeddings.py
+++ b/tests/integration/inference/test_openai_embeddings.py
@@ -87,7 +87,7 @@ def skip_if_model_doesnt_support_openai_embeddings(client, model_id):
 
 @pytest.fixture
 def openai_client(client_with_models):
-    base_url = f"{client_with_models.base_url}/v1/openai/v1"
+    base_url = f"{client_with_models.base_url}/v1"
     return OpenAI(base_url=base_url, api_key="fake")