diff --git a/source/api_definitions.py b/source/api_definitions.py
index 84e0954f7..626fa0bdd 100644
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@@ -143,22 +143,26 @@ class BatchChatCompletionRequest:
 
 
 class Inference(Protocol):
-    """Set of methods that can be called on the inference service."""
+
+    @webmethod(route="/inference/completion")
     def post_completion(
         self,
         request: CompletionRequest,
     ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
 
+    @webmethod(route="/inference/chat_completion")
     def post_chat_completion(
         self,
         request: ChatCompletionRequest,
     ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
 
+    @webmethod(route="/inference/batch_completion")
     def post_batch_completion(
         self,
         request: BatchCompletionRequest,
     ) -> List[CompletionResponse]: ...
 
+    @webmethod(route="/inference/batch_chat_completion")
     def post_batch_chat_completion(
         self,
         request: BatchChatCompletionRequest,
diff --git a/source/openapi.html b/source/openapi.html
index a4b8e7ca7..ceb92c59f 100644
--- a/source/openapi.html
+++ b/source/openapi.html
@@ -386,7 +386,7 @@
                 ]
             }
         },
-        "/batch_chat_completion": {
+        "/inference/batch_chat_completion": {
             "post": {
                 "responses": {
                     "200": {
@@ -416,7 +416,7 @@
                 }
             }
         },
-        "/batch_completion": {
+        "/inference/batch_completion": {
             "post": {
                 "responses": {
                     "200": {
@@ -446,7 +446,7 @@
                 }
             }
         },
-        "/chat_completion": {
+        "/inference/chat_completion": {
             "post": {
                 "responses": {
                     "200": {
@@ -483,7 +483,7 @@
                 }
             }
         },
-        "/completion": {
+        "/inference/completion": {
             "post": {
                 "responses": {
                     "200": {
@@ -3317,27 +3317,26 @@
         }
     ],
     "tags": [
+        {
+            "name": "RewardScoring"
+        },
         {
             "name": "PostTraining"
         },
-        {
-            "name": "Inference",
-            "x-displayName": "Set of methods that can be called on the inference service."
-        },
-        {
-            "name": "MemoryBanks"
-        },
-        {
-            "name": "Datasets"
-        },
         {
             "name": "AgenticSystem"
         },
+        {
+            "name": "Datasets"
+        },
         {
             "name": "SyntheticDataGeneration"
         },
         {
-            "name": "RewardScoring"
+            "name": "Inference"
+        },
+        {
+            "name": "MemoryBanks"
         },
         {
             "name": "ShieldConfig",
diff --git a/source/openapi.yaml b/source/openapi.yaml
index e14d457e2..dc25460bf 100644
--- a/source/openapi.yaml
+++ b/source/openapi.yaml
@@ -1563,83 +1563,6 @@ paths:
             agent execution response.
       tags:
       - AgenticSystem
-  /batch_chat_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchChatCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/jsonl:
-              schema:
-                $ref: '#/components/schemas/ChatCompletionResponse'
-          description: OK
-      tags:
-      - Inference
-  /batch_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/jsonl:
-              schema:
-                $ref: '#/components/schemas/CompletionResponse'
-          description: OK
-      tags:
-      - Inference
-  /chat_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                oneOf:
-                - $ref: '#/components/schemas/ChatCompletionResponse'
-                - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
-          description: Normal chat completion response. **OR** Streamed chat completion
-            response. The actual response is a series of such objects.
-      tags:
-      - Inference
-  /completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                oneOf:
-                - $ref: '#/components/schemas/CompletionResponse'
-                - $ref: '#/components/schemas/CompletionResponseStreamChunk'
-          description: Normal completion response. **OR** streamed completion response.
-      tags:
-      - Inference
   /datasets/create:
     post:
       parameters: []
@@ -1684,6 +1607,83 @@ paths:
           description: OK
       tags:
       - Datasets
+  /inference/batch_chat_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BatchChatCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/jsonl:
+              schema:
+                $ref: '#/components/schemas/ChatCompletionResponse'
+          description: OK
+      tags:
+      - Inference
+  /inference/batch_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/BatchCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/jsonl:
+              schema:
+                $ref: '#/components/schemas/CompletionResponse'
+          description: OK
+      tags:
+      - Inference
+  /inference/chat_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ChatCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                oneOf:
+                - $ref: '#/components/schemas/ChatCompletionResponse'
+                - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
+          description: Normal chat completion response. **OR** Streamed chat completion
+            response. The actual response is a series of such objects.
+      tags:
+      - Inference
+  /inference/completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                oneOf:
+                - $ref: '#/components/schemas/CompletionResponse'
+                - $ref: '#/components/schemas/CompletionResponseStreamChunk'
+          description: Normal completion response. **OR** streamed completion response.
+      tags:
+      - Inference
   /memory_bank/delete:
     post:
       parameters:
@@ -2015,14 +2015,13 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
-- name: PostTraining
-- name: Inference
-  x-displayName: Set of methods that can be called on the inference service.
-- name: MemoryBanks
-- name: Datasets
-- name: AgenticSystem
-- name: SyntheticDataGeneration
 - name: RewardScoring
+- name: PostTraining
+- name: AgenticSystem
+- name: Datasets
+- name: SyntheticDataGeneration
+- name: Inference
+- name: MemoryBanks
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
   name: ShieldConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"