From 67f0510edd909a2394b429be1ff8d639ea178a49 Mon Sep 17 00:00:00 2001
From: rsm <rsm@devgpu008.pci3.facebook.com>
Date: Sun, 21 Jul 2024 12:19:52 -0700
Subject: [PATCH] rename ModelInference to Inference

---
 create_config.sh                        |    2 +-
 toolchain/cli/inference/configure.py    |    2 +-
 toolchain/configs/ashwin.yaml           |    4 +-
 toolchain/configs/chrisluc.yaml         |    2 +-
 toolchain/configs/cyni.yaml             |    2 +-
 toolchain/configs/default.yaml          |    2 +-
 toolchain/configs/hjshah.yaml           |    2 +-
 toolchain/configs/long_seqlen.yaml      |    2 +-
 toolchain/inference/api/config.py       |   12 +-
 toolchain/inference/api/endpoints.py    |    2 +-
 toolchain/inference/api_instance.py     |   12 +-
 toolchain/inference/client.py           |    6 +-
 toolchain/inference/inference.py        |    4 +-
 toolchain/inference/server.py           |   10 +-
 toolchain/spec/generate.py              |    2 +-
 toolchain/spec/openapi.html             | 1444 ++++++-----------------
 toolchain/spec/openapi.yaml             |  592 ++--------
 toolchain/spec/run_openapi_generator.sh |    2 +-
 18 files changed, 468 insertions(+), 1636 deletions(-)
diff --git a/create_config.sh b/create_config.sh
index bf3a6b28e..2b500217e 100644
--- a/create_config.sh
+++ b/create_config.sh
@@ -30,7 +30,7 @@ create_parent_dir() {
 # Function to output the YAML configuration
 output_yaml() {
     cat <<EOL > ${yaml_output_path}
-model_inference_config:
+inference_config:
   impl_type: "inline"
   inline_config:
     checkpoint_type: "pytorch"
diff --git a/toolchain/cli/inference/configure.py b/toolchain/cli/inference/configure.py
index df97ebf04..0c0ae61fe 100644
--- a/toolchain/cli/inference/configure.py
+++ b/toolchain/cli/inference/configure.py
@@ -47,7 +47,7 @@ class InferenceConfigure(Subcommand):
         yaml_output_path
     ):
         yaml_content = textwrap.dedent(f"""
-            model_inference_config:
+            inference_config:
                 impl_type: "inline"
                 inline_config:
                     checkpoint_type: "pytorch"
diff --git a/toolchain/configs/ashwin.yaml b/toolchain/configs/ashwin.yaml
index 8eec6d923..21ab6b880 100644
--- a/toolchain/configs/ashwin.yaml
+++ b/toolchain/configs/ashwin.yaml
@@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
   impl_type: "inline"
   inline_config:
     checkpoint_type: "pytorch"
@@ -7,5 +7,5 @@ model_inference_config:
     model_parallel_size: 8
     max_seq_len: 2048
     max_batch_size: 1
-    quantization: 
+    quantization:
       type: "fp8"
diff --git a/toolchain/configs/chrisluc.yaml b/toolchain/configs/chrisluc.yaml
index be51a534c..c44f9524e 100644
--- a/toolchain/configs/chrisluc.yaml
+++ b/toolchain/configs/chrisluc.yaml
@@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
   impl_type: "inline"
   inline_config:
     checkpoint_type: "pytorch"
diff --git a/toolchain/configs/cyni.yaml b/toolchain/configs/cyni.yaml
index d0c47b397..e8edbf036 100644
--- a/toolchain/configs/cyni.yaml
+++ b/toolchain/configs/cyni.yaml
@@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
   impl_type: "inline"
   inline_config:
     checkpoint_type: "pytorch"
diff --git a/toolchain/configs/default.yaml b/toolchain/configs/default.yaml
index 642a55f22..d13f37226 100644
--- a/toolchain/configs/default.yaml
+++ b/toolchain/configs/default.yaml
@@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
   impl_type: "inline"
   inline_config:
     checkpoint_type: "pytorch"
diff --git a/toolchain/configs/hjshah.yaml b/toolchain/configs/hjshah.yaml
index 98e2660ea..089ab1b5a 100644
--- a/toolchain/configs/hjshah.yaml
+++ b/toolchain/configs/hjshah.yaml
@@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
   impl_type: "inline"
   inline_config:
     checkpoint_type: "pytorch"
diff --git a/toolchain/configs/long_seqlen.yaml b/toolchain/configs/long_seqlen.yaml
index e137d0273..9eaeab1bd 100644
--- a/toolchain/configs/long_seqlen.yaml
+++ b/toolchain/configs/long_seqlen.yaml
@@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
   impl_type: "inline"
   inline_config:
     checkpoint_type: "pytorch"
diff --git a/toolchain/inference/api/config.py b/toolchain/inference/api/config.py
index 4a6c5145f..5994e805b 100644
--- a/toolchain/inference/api/config.py
+++ b/toolchain/inference/api/config.py
@@ -75,7 +75,7 @@ class RemoteImplConfig(BaseModel):
     url: str = Field(..., description="The URL of the remote module")
 
 
-class ModelInferenceConfig(BaseModel):
+class InferenceConfig(BaseModel):
     impl_config: Annotated[
         Union[InlineImplConfig, RemoteImplConfig],
         Field(discriminator="impl_type"),
@@ -130,7 +130,7 @@ class RemoteImplHydraConfig:
 
 
 @dataclass
-class ModelInferenceHydraConfig:
+class InferenceHydraConfig:
     impl_type: str
     inline_config: Optional[InlineImplHydraConfig] = None
     remote_config: Optional[RemoteImplHydraConfig] = None
@@ -142,18 +142,18 @@ class ModelInferenceHydraConfig:
         if self.impl_type == "remote":
             assert self.remote_config is not None
 
-    def convert_to_model_inferene_config(self):
+    def convert_to_inference_config(self):
         if self.impl_type == "inline":
             inline_config = InlineImplHydraConfig(**self.inline_config)
-            return ModelInferenceConfig(
+            return InferenceConfig(
                 impl_config=inline_config.convert_to_inline_impl_config()
             )
         elif self.impl_type == "remote":
             remote_config = RemoteImplHydraConfig(**self.remote_config)
-            return ModelInferenceConfig(
+            return InferenceConfig(
                 impl_config=remote_config.convert_to_remote_impl_config()
             )
 
 
 cs = ConfigStore.instance()
-cs.store(name="model_inference_config", node=ModelInferenceHydraConfig)
+cs.store(name="inference_config", node=InferenceHydraConfig)
diff --git a/toolchain/inference/api/endpoints.py b/toolchain/inference/api/endpoints.py
index 5b262a99c..20efbd111 100644
--- a/toolchain/inference/api/endpoints.py
+++ b/toolchain/inference/api/endpoints.py
@@ -90,7 +90,7 @@ class BatchChatCompletionResponse(BaseModel):
     completion_message_batch: List[CompletionMessage]
 
 
-class ModelInference(Protocol):
+class Inference(Protocol):
 
     @webmethod(route="/inference/completion")
     async def completion(
diff --git a/toolchain/inference/api_instance.py b/toolchain/inference/api_instance.py
index 6110fd257..d39d642be 100644
--- a/toolchain/inference/api_instance.py
+++ b/toolchain/inference/api_instance.py
@@ -1,12 +1,12 @@
-from .api.config import ImplType, ModelInferenceConfig
+from .api.config import ImplType, InferenceConfig
 
 
-async def get_inference_api_instance(config: ModelInferenceConfig):
+async def get_inference_api_instance(config: InferenceConfig):
     if config.impl_config.impl_type == ImplType.inline.value:
-        from .inference import ModelInferenceImpl
+        from .inference import InferenceImpl
 
-        return ModelInferenceImpl(config.impl_config)
+        return InferenceImpl(config.impl_config)
 
-    from .client import ModelInferenceClient
+    from .client import InferenceClient
 
-    return ModelInferenceClient(config.impl_config.url)
+    return InferenceClient(config.impl_config.url)
diff --git a/toolchain/inference/client.py b/toolchain/inference/client.py
index a4d2b641f..317637efa 100644
--- a/toolchain/inference/client.py
+++ b/toolchain/inference/client.py
@@ -10,12 +10,12 @@ from .api import (
     ChatCompletionResponseStreamChunk,
     CompletionRequest,
     InstructModel,
-    ModelInference,
+    Inference,
     UserMessage,
 )
 
 
-class ModelInferenceClient(ModelInference):
+class InferenceClient(Inference):
     def __init__(self, base_url: str):
         self.base_url = base_url
 
@@ -48,7 +48,7 @@ class ModelInferenceClient(ModelInference):
 
 
 async def run_main(host: str, port: int):
-    client = ModelInferenceClient(f"http://{host}:{port}")
+    client = InferenceClient(f"http://{host}:{port}")
 
     message = UserMessage(content="hello world, help me out here")
     req = ChatCompletionRequest(
diff --git a/toolchain/inference/inference.py b/toolchain/inference/inference.py
index 5ec1c897d..94228ac7b 100644
--- a/toolchain/inference/inference.py
+++ b/toolchain/inference/inference.py
@@ -18,12 +18,12 @@ from .api.endpoints import (
     ChatCompletionRequest,
     ChatCompletionResponseStreamChunk,
     CompletionRequest,
-    ModelInference,
+    Inference,
 )
 from .model_parallel import LlamaModelParallelGenerator
 
 
-class ModelInferenceImpl(ModelInference):
+class InferenceImpl(Inference):
 
     def __init__(self, config: InlineImplConfig) -> None:
         self.config = config
diff --git a/toolchain/inference/server.py b/toolchain/inference/server.py
index a2846f136..01a905b2a 100644
--- a/toolchain/inference/server.py
+++ b/toolchain/inference/server.py
@@ -11,7 +11,7 @@ from fastapi.responses import StreamingResponse
 from omegaconf import OmegaConf
 
 from toolchain.utils import get_default_config_dir, parse_config
-from .api.config import ModelInferenceHydraConfig
+from .api.config import InferenceHydraConfig
 from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk
 
 from .api_instance import get_inference_api_instance
@@ -43,13 +43,13 @@ async def startup():
     global InferenceApiInstance
 
     config = get_config()
-    hydra_config = ModelInferenceHydraConfig(
-        **OmegaConf.to_container(config["model_inference_config"], resolve=True)
+    hydra_config = InferenceHydraConfig(
+        **OmegaConf.to_container(config["inference_config"], resolve=True)
     )
-    model_inference_config = hydra_config.convert_to_model_inferene_config()
+    inference_config = hydra_config.convert_to_inference_config()
 
     InferenceApiInstance = await get_inference_api_instance(
-        model_inference_config,
+        inference_config,
     )
     await InferenceApiInstance.initialize()
 
diff --git a/toolchain/spec/generate.py b/toolchain/spec/generate.py
index 5b4bd9e04..974885b2b 100644
--- a/toolchain/spec/generate.py
+++ b/toolchain/spec/generate.py
@@ -16,7 +16,7 @@ from agentic_system.api import *  # noqa: F403
 
 
 class LlamaStackEndpoints(
-    ModelInference,
+    Inference,
     AgenticSystem,
     RewardScoring,
     SyntheticDataGeneration,
diff --git a/toolchain/spec/openapi.html b/toolchain/spec/openapi.html
index b09bf6c48..81d720fcb 100644
--- a/toolchain/spec/openapi.html
+++ b/toolchain/spec/openapi.html
@@ -21,7 +21,7 @@
     "info": {
         "title": "[DRAFT] Llama Stack Specification",
         "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-07-19 11:49:56.794897"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-07-21 12:19:33.327857"
     },
     "servers": [
         {
@@ -29,213 +29,6 @@
         }
     ],
     "paths": {
-        "/agentic_system/memory_bank/attach": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    }
-                },
-                "tags": [
-                    "AgenticSystem"
-                ],
-                "parameters": [
-                    {
-                        "name": "agent_id",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "session_id",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/inference/batch_chat_completion": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/jsonl": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ChatCompletionResponse"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "ModelInference"
-                ],
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/BatchChatCompletionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/inference/batch_completion": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK",
-                        "content": {
-                            "application/jsonl": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/CompletionResponse"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "ModelInference"
-                ],
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/BatchCompletionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/evaluate/job/cancel": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    }
-                },
-                "tags": [
-                    "Evaluations"
-                ],
-                "parameters": [
-                    {
-                        "name": "job_uuid",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
-        "/post_training/job/cancel": {
-            "get": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    }
-                },
-                "tags": [
-                    "PostTraining"
-                ],
-                "parameters": [
-                    {
-                        "name": "job_uuid",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ]
-            }
-        },
-        "/inference/chat_completion": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "SSE-stream of these events.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "ModelInference"
-                ],
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/ChatCompletionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
-        "/inference/completion": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "streamed completion response.",
-                        "content": {
-                            "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/CompletionResponseStreamChunk"
-                                }
-                            }
-                        }
-                    }
-                },
-                "tags": [
-                    "ModelInference"
-                ],
-                "parameters": [],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "$ref": "#/components/schemas/CompletionRequest"
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/agentic_system/create": {
             "post": {
                 "responses": {
@@ -464,49 +257,6 @@
                 }
             }
         },
-        "/agentic_system/memory_bank/detach": {
-            "post": {
-                "responses": {
-                    "200": {
-                        "description": "OK"
-                    }
-                },
-                "tags": [
-                    "AgenticSystem"
-                ],
-                "parameters": [
-                    {
-                        "name": "agent_id",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    {
-                        "name": "session_id",
-                        "in": "query",
-                        "required": true,
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                ],
-                "requestBody": {
-                    "content": {
-                        "application/json": {
-                            "schema": {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            }
-                        }
-                    },
-                    "required": true
-                }
-            }
-        },
         "/agentic_system/session/get": {
             "post": {
                 "responses": {
@@ -1262,6 +1012,133 @@
     "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
     "components": {
         "schemas": {
+            "AgenticSystemCreateRequest": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "$ref": "#/components/schemas/InstructModel"
+                    },
+                    "instance_config": {
+                        "$ref": "#/components/schemas/AgenticSystemInstanceConfig"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "instance_config"
+                ]
+            },
+            "AgenticSystemInstanceConfig": {
+                "type": "object",
+                "properties": {
+                    "instructions": {
+                        "type": "string"
+                    },
+                    "sampling_params": {
+                        "$ref": "#/components/schemas/SamplingParams"
+                    },
+                    "available_tools": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/AgenticSystemToolDefinition"
+                        }
+                    },
+                    "input_shields": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ShieldDefinition"
+                        }
+                    },
+                    "output_shields": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ShieldDefinition"
+                        }
+                    },
+                    "quantization_config": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
+                            },
+                            {
+                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
+                            }
+                        ]
+                    },
+                    "debug_prefix_messages": {
+                        "type": "array",
+                        "items": {
+                            "oneOf": [
+                                {
+                                    "$ref": "#/components/schemas/UserMessage"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/SystemMessage"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/ToolResponseMessage"
+                                },
+                                {
+                                    "$ref": "#/components/schemas/CompletionMessage"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "instructions"
+                ]
+            },
+            "AgenticSystemToolDefinition": {
+                "type": "object",
+                "properties": {
+                    "tool_name": {
+                        "oneOf": [
+                            {
+                                "type": "string",
+                                "enum": [
+                                    "brave_search",
+                                    "wolfram_alpha",
+                                    "photogen",
+                                    "code_interpreter"
+                                ]
+                            },
+                            {
+                                "type": "string"
+                            }
+                        ]
+                    },
+                    "description": {
+                        "type": "string"
+                    },
+                    "parameters": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ToolParamDefinition"
+                        }
+                    },
+                    "execution_config": {
+                        "$ref": "#/components/schemas/RestAPIExecutionConfig"
+                    },
+                    "input_shields": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ShieldDefinition"
+                        }
+                    },
+                    "output_shields": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/ShieldDefinition"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "tool_name"
+                ]
+            },
             "Attachment": {
                 "type": "object",
                 "properties": {
@@ -1278,80 +1155,27 @@
                     "mime_type"
                 ]
             },
-            "BatchChatCompletionRequest": {
-                "type": "object",
-                "properties": {
-                    "model": {
-                        "$ref": "#/components/schemas/InstructModel"
-                    },
-                    "messages_batch": {
-                        "type": "array",
-                        "items": {
-                            "type": "array",
-                            "items": {
-                                "oneOf": [
-                                    {
-                                        "$ref": "#/components/schemas/UserMessage"
-                                    },
-                                    {
-                                        "$ref": "#/components/schemas/SystemMessage"
-                                    },
-                                    {
-                                        "$ref": "#/components/schemas/ToolResponseMessage"
-                                    },
-                                    {
-                                        "$ref": "#/components/schemas/CompletionMessage"
-                                    }
-                                ]
-                            }
-                        }
-                    },
-                    "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
-                    },
-                    "available_tools": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ToolDefinition"
-                        }
-                    },
-                    "logprobs": {
-                        "type": "object",
-                        "properties": {
-                            "top_k": {
-                                "type": "integer"
-                            }
-                        },
-                        "additionalProperties": false
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model",
-                    "messages_batch"
-                ]
-            },
             "Bf16QuantizationConfig": {
                 "type": "object",
                 "properties": {
-                    "quantization_type": {
+                    "type": {
                         "type": "string",
                         "const": "bf16"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "quantization_type"
+                    "type"
+                ]
+            },
+            "BuiltinShield": {
+                "type": "string",
+                "enum": [
+                    "llama_guard",
+                    "code_scanner_guard",
+                    "third_party_shield",
+                    "injection_shield",
+                    "jailbreak_shield"
                 ]
             },
             "CompletionMessage": {
@@ -1405,14 +1229,14 @@
             "Fp8QuantizationConfig": {
                 "type": "object",
                 "properties": {
-                    "quantization_type": {
+                    "type": {
                         "type": "string",
                         "const": "fp8"
                     }
                 },
                 "additionalProperties": false,
                 "required": [
-                    "quantization_type"
+                    "type"
                 ]
             },
             "InstructModel": {
@@ -1422,6 +1246,57 @@
                     "llama3_70b_chat"
                 ]
             },
+            "OnViolationAction": {
+                "type": "integer",
+                "enum": [
+                    0,
+                    1,
+                    2
+                ]
+            },
+            "RestAPIExecutionConfig": {
+                "type": "object",
+                "properties": {
+                    "url": {
+                        "$ref": "#/components/schemas/URL"
+                    },
+                    "method": {
+                        "$ref": "#/components/schemas/RestAPIMethod"
+                    },
+                    "params": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        }
+                    },
+                    "headers": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        }
+                    },
+                    "body": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "url",
+                    "method"
+                ]
+            },
+            "RestAPIMethod": {
+                "type": "string",
+                "enum": [
+                    "GET",
+                    "POST",
+                    "PUT",
+                    "DELETE"
+                ]
+            },
             "SamplingParams": {
                 "type": "object",
                 "properties": {
@@ -1457,6 +1332,41 @@
                     "top_k"
                 ]
             },
+            "ShieldDefinition": {
+                "type": "object",
+                "properties": {
+                    "shield_type": {
+                        "oneOf": [
+                            {
+                                "$ref": "#/components/schemas/BuiltinShield"
+                            },
+                            {
+                                "type": "string"
+                            }
+                        ]
+                    },
+                    "description": {
+                        "type": "string"
+                    },
+                    "parameters": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "$ref": "#/components/schemas/ToolParamDefinition"
+                        }
+                    },
+                    "on_violation_action": {
+                        "$ref": "#/components/schemas/OnViolationAction"
+                    },
+                    "execution_config": {
+                        "$ref": "#/components/schemas/RestAPIExecutionConfig"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "shield_type",
+                    "on_violation_action"
+                ]
+            },
             "StopReason": {
                 "type": "string",
                 "enum": [
@@ -1598,40 +1508,6 @@
                     "arguments"
                 ]
             },
-            "ToolDefinition": {
-                "type": "object",
-                "properties": {
-                    "tool_name": {
-                        "oneOf": [
-                            {
-                                "type": "string",
-                                "enum": [
-                                    "brave_search",
-                                    "wolfram_alpha",
-                                    "photogen",
-                                    "code_interpreter"
-                                ]
-                            },
-                            {
-                                "type": "string"
-                            }
-                        ]
-                    },
-                    "description": {
-                        "type": "string"
-                    },
-                    "parameters": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ToolParamDefinition"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "tool_name"
-                ]
-            },
             "ToolParamDefinition": {
                 "type": "object",
                 "properties": {
@@ -1750,579 +1626,6 @@
                     "content"
                 ]
             },
-            "ChatCompletionResponse": {
-                "type": "object",
-                "properties": {
-                    "completion_message": {
-                        "$ref": "#/components/schemas/CompletionMessage"
-                    },
-                    "logprobs": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/TokenLogProbs"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "completion_message"
-                ]
-            },
-            "TokenLogProbs": {
-                "type": "object",
-                "properties": {
-                    "logprobs_by_token": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "number"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "logprobs_by_token"
-                ]
-            },
-            "BatchCompletionRequest": {
-                "type": "object",
-                "properties": {
-                    "model": {
-                        "$ref": "#/components/schemas/PretrainedModel"
-                    },
-                    "content_batch": {
-                        "type": "array",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/Attachment"
-                                },
-                                {
-                                    "type": "array",
-                                    "items": {
-                                        "oneOf": [
-                                            {
-                                                "type": "string"
-                                            },
-                                            {
-                                                "$ref": "#/components/schemas/Attachment"
-                                            }
-                                        ]
-                                    }
-                                }
-                            ]
-                        }
-                    },
-                    "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
-                    },
-                    "logprobs": {
-                        "type": "object",
-                        "properties": {
-                            "top_k": {
-                                "type": "integer"
-                            }
-                        },
-                        "additionalProperties": false
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model",
-                    "content_batch"
-                ]
-            },
-            "PretrainedModel": {
-                "type": "string",
-                "enum": [
-                    "llama3_8b",
-                    "llama3_70b"
-                ]
-            },
-            "CompletionResponse": {
-                "type": "object",
-                "properties": {
-                    "completion_message": {
-                        "$ref": "#/components/schemas/CompletionMessage"
-                    },
-                    "logprobs": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/TokenLogProbs"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "completion_message"
-                ]
-            },
-            "ChatCompletionRequest": {
-                "type": "object",
-                "properties": {
-                    "model": {
-                        "$ref": "#/components/schemas/InstructModel"
-                    },
-                    "messages": {
-                        "type": "array",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/UserMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/SystemMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/ToolResponseMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/CompletionMessage"
-                                }
-                            ]
-                        }
-                    },
-                    "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
-                    },
-                    "available_tools": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ToolDefinition"
-                        }
-                    },
-                    "stream": {
-                        "type": "boolean"
-                    },
-                    "logprobs": {
-                        "type": "object",
-                        "properties": {
-                            "top_k": {
-                                "type": "integer"
-                            }
-                        },
-                        "additionalProperties": false
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model",
-                    "messages"
-                ]
-            },
-            "ChatCompletionResponseEvent": {
-                "type": "object",
-                "properties": {
-                    "event_type": {
-                        "$ref": "#/components/schemas/ChatCompletionResponseEventType"
-                    },
-                    "delta": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/ToolCallDelta"
-                            }
-                        ]
-                    },
-                    "logprobs": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/TokenLogProbs"
-                        }
-                    },
-                    "stop_reason": {
-                        "$ref": "#/components/schemas/StopReason"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event_type",
-                    "delta"
-                ],
-                "title": "Chat completion response event."
-            },
-            "ChatCompletionResponseEventType": {
-                "type": "string",
-                "enum": [
-                    "start",
-                    "complete",
-                    "progress"
-                ]
-            },
-            "ChatCompletionResponseStreamChunk": {
-                "type": "object",
-                "properties": {
-                    "event": {
-                        "$ref": "#/components/schemas/ChatCompletionResponseEvent"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "event"
-                ],
-                "title": "SSE-stream of these events."
-            },
-            "ToolCallDelta": {
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/ToolCall"
-                            }
-                        ]
-                    },
-                    "parse_status": {
-                        "$ref": "#/components/schemas/ToolCallParseStatus"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "content",
-                    "parse_status"
-                ]
-            },
-            "ToolCallParseStatus": {
-                "type": "string",
-                "enum": [
-                    "start",
-                    "in_progress",
-                    "failure",
-                    "success"
-                ]
-            },
-            "CompletionRequest": {
-                "type": "object",
-                "properties": {
-                    "model": {
-                        "$ref": "#/components/schemas/PretrainedModel"
-                    },
-                    "content": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Attachment"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "oneOf": [
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "$ref": "#/components/schemas/Attachment"
-                                        }
-                                    ]
-                                }
-                            }
-                        ]
-                    },
-                    "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
-                    },
-                    "stream": {
-                        "type": "boolean"
-                    },
-                    "logprobs": {
-                        "type": "object",
-                        "properties": {
-                            "top_k": {
-                                "type": "integer"
-                            }
-                        },
-                        "additionalProperties": false
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model",
-                    "content"
-                ]
-            },
-            "CompletionResponseStreamChunk": {
-                "type": "object",
-                "properties": {
-                    "delta": {
-                        "type": "string"
-                    },
-                    "stop_reason": {
-                        "$ref": "#/components/schemas/StopReason"
-                    },
-                    "logprobs": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/TokenLogProbs"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "delta"
-                ],
-                "title": "streamed completion response."
-            },
-            "AgenticSystemCreateRequest": {
-                "type": "object",
-                "properties": {
-                    "model": {
-                        "$ref": "#/components/schemas/InstructModel"
-                    },
-                    "instance_config": {
-                        "$ref": "#/components/schemas/AgenticSystemInstanceConfig"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "model",
-                    "instance_config"
-                ]
-            },
-            "AgenticSystemInstanceConfig": {
-                "type": "object",
-                "properties": {
-                    "instructions": {
-                        "type": "string"
-                    },
-                    "sampling_params": {
-                        "$ref": "#/components/schemas/SamplingParams"
-                    },
-                    "available_tools": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/AgenticSystemToolDefinition"
-                        }
-                    },
-                    "input_shields": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ShieldDefinition"
-                        }
-                    },
-                    "output_shields": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ShieldDefinition"
-                        }
-                    },
-                    "quantization_config": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/Bf16QuantizationConfig"
-                            },
-                            {
-                                "$ref": "#/components/schemas/Fp8QuantizationConfig"
-                            }
-                        ]
-                    },
-                    "debug_prefix_messages": {
-                        "type": "array",
-                        "items": {
-                            "oneOf": [
-                                {
-                                    "$ref": "#/components/schemas/UserMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/SystemMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/ToolResponseMessage"
-                                },
-                                {
-                                    "$ref": "#/components/schemas/CompletionMessage"
-                                }
-                            ]
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "instructions"
-                ]
-            },
-            "AgenticSystemToolDefinition": {
-                "type": "object",
-                "properties": {
-                    "tool_name": {
-                        "oneOf": [
-                            {
-                                "type": "string",
-                                "enum": [
-                                    "brave_search",
-                                    "wolfram_alpha",
-                                    "photogen",
-                                    "code_interpreter"
-                                ]
-                            },
-                            {
-                                "type": "string"
-                            }
-                        ]
-                    },
-                    "description": {
-                        "type": "string"
-                    },
-                    "parameters": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ToolParamDefinition"
-                        }
-                    },
-                    "execution_config": {
-                        "$ref": "#/components/schemas/RestAPIExecutionConfig"
-                    },
-                    "input_shields": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ShieldDefinition"
-                        }
-                    },
-                    "output_shields": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/ShieldDefinition"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "tool_name"
-                ]
-            },
-            "BuiltinShield": {
-                "type": "string",
-                "enum": [
-                    "llama_guard",
-                    "prompt_guard",
-                    "code_scanner_guard",
-                    "third_party_shield"
-                ]
-            },
-            "OnViolationAction": {
-                "type": "integer",
-                "enum": [
-                    0,
-                    1,
-                    2
-                ]
-            },
-            "RestAPIExecutionConfig": {
-                "type": "object",
-                "properties": {
-                    "url": {
-                        "$ref": "#/components/schemas/URL"
-                    },
-                    "method": {
-                        "$ref": "#/components/schemas/RestAPIMethod"
-                    },
-                    "params": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "string"
-                        }
-                    },
-                    "headers": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "string"
-                        }
-                    },
-                    "body": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "string"
-                        }
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "url",
-                    "method"
-                ]
-            },
-            "RestAPIMethod": {
-                "type": "string",
-                "enum": [
-                    "GET",
-                    "POST",
-                    "PUT",
-                    "DELETE"
-                ]
-            },
-            "ShieldDefinition": {
-                "type": "object",
-                "properties": {
-                    "shield_type": {
-                        "oneOf": [
-                            {
-                                "$ref": "#/components/schemas/BuiltinShield"
-                            },
-                            {
-                                "type": "string"
-                            }
-                        ]
-                    },
-                    "description": {
-                        "type": "string"
-                    },
-                    "parameters": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "$ref": "#/components/schemas/ToolParamDefinition"
-                        }
-                    },
-                    "on_violation_action": {
-                        "$ref": "#/components/schemas/OnViolationAction"
-                    },
-                    "execution_config": {
-                        "$ref": "#/components/schemas/RestAPIExecutionConfig"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "shield_type",
-                    "on_violation_action"
-                ]
-            },
             "AgenticSystemCreateResponse": {
                 "type": "object",
                 "properties": {
@@ -2438,7 +1741,7 @@
                     "step_type": {
                         "type": "string",
                         "enum": [
-                            "model_inference",
+                            "inference",
                             "tool_execution",
                             "shield_call",
                             "memory_retrieval"
@@ -2447,7 +1750,7 @@
                     "step_details": {
                         "oneOf": [
                             {
-                                "$ref": "#/components/schemas/ModelInferenceStep"
+                                "$ref": "#/components/schemas/InferenceStep"
                             },
                             {
                                 "$ref": "#/components/schemas/ToolExecutionStep"
@@ -2478,7 +1781,7 @@
                     "step_type": {
                         "type": "string",
                         "enum": [
-                            "model_inference",
+                            "inference",
                             "tool_execution",
                             "shield_call",
                             "memory_retrieval"
@@ -2514,7 +1817,7 @@
                     "step_type": {
                         "type": "string",
                         "enum": [
-                            "model_inference",
+                            "inference",
                             "tool_execution",
                             "shield_call",
                             "memory_retrieval"
@@ -2602,6 +1905,39 @@
                     "turn_id"
                 ]
             },
+            "InferenceStep": {
+                "type": "object",
+                "properties": {
+                    "turn_id": {
+                        "type": "string"
+                    },
+                    "step_id": {
+                        "type": "string"
+                    },
+                    "started_at": {
+                        "type": "string",
+                        "format": "date-time"
+                    },
+                    "completed_at": {
+                        "type": "string",
+                        "format": "date-time"
+                    },
+                    "step_type": {
+                        "type": "string",
+                        "const": "inference"
+                    },
+                    "model_response": {
+                        "$ref": "#/components/schemas/CompletionMessage"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "turn_id",
+                    "step_id",
+                    "step_type",
+                    "model_response"
+                ]
+            },
             "MemoryBankDocument": {
                 "type": "object",
                 "properties": {
@@ -2699,39 +2035,6 @@
                     "scores"
                 ]
             },
-            "ModelInferenceStep": {
-                "type": "object",
-                "properties": {
-                    "turn_id": {
-                        "type": "string"
-                    },
-                    "step_id": {
-                        "type": "string"
-                    },
-                    "started_at": {
-                        "type": "string",
-                        "format": "date-time"
-                    },
-                    "completed_at": {
-                        "type": "string",
-                        "format": "date-time"
-                    },
-                    "step_type": {
-                        "type": "string",
-                        "const": "model_inference"
-                    },
-                    "model_response": {
-                        "$ref": "#/components/schemas/CompletionMessage"
-                    }
-                },
-                "additionalProperties": false,
-                "required": [
-                    "turn_id",
-                    "step_id",
-                    "step_type",
-                    "model_response"
-                ]
-            },
             "ShieldCallStep": {
                 "type": "object",
                 "properties": {
@@ -2794,6 +2097,38 @@
                     "is_violation"
                 ]
             },
+            "ToolCallDelta": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/ToolCall"
+                            }
+                        ]
+                    },
+                    "parse_status": {
+                        "$ref": "#/components/schemas/ToolCallParseStatus"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "parse_status"
+                ]
+            },
+            "ToolCallParseStatus": {
+                "type": "string",
+                "enum": [
+                    "started",
+                    "in_progress",
+                    "failure",
+                    "success"
+                ]
+            },
             "ToolExecutionStep": {
                 "type": "object",
                 "properties": {
@@ -2917,7 +2252,7 @@
                         "items": {
                             "oneOf": [
                                 {
-                                    "$ref": "#/components/schemas/ModelInferenceStep"
+                                    "$ref": "#/components/schemas/InferenceStep"
                                 },
                                 {
                                     "$ref": "#/components/schemas/ToolExecutionStep"
@@ -4019,6 +3354,13 @@
                 ],
                 "title": "Request to finetune a model."
             },
+            "PretrainedModel": {
+                "type": "string",
+                "enum": [
+                    "llama3_8b",
+                    "llama3_70b"
+                ]
+            },
             "QLoraFinetuningConfig": {
                 "type": "object",
                 "properties": {
@@ -4059,42 +3401,51 @@
         }
     ],
     "tags": [
+        {
+            "name": "RewardScoring"
+        },
+        {
+            "name": "PostTraining"
+        },
         {
             "name": "AgenticSystem"
         },
         {
             "name": "Datasets"
         },
-        {
-            "name": "ModelInference"
-        },
-        {
-            "name": "SyntheticDataGeneration"
-        },
         {
             "name": "MemoryBanks"
         },
-        {
-            "name": "PostTraining"
-        },
         {
             "name": "Evaluations"
         },
         {
-            "name": "RewardScoring"
+            "name": "SyntheticDataGeneration"
+        },
+        {
+            "name": "AgenticSystemCreateRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemCreateRequest\" />"
+        },
+        {
+            "name": "AgenticSystemInstanceConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemInstanceConfig\" />"
+        },
+        {
+            "name": "AgenticSystemToolDefinition",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemToolDefinition\" />"
         },
         {
             "name": "Attachment",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />"
         },
-        {
-            "name": "BatchChatCompletionRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchChatCompletionRequest\" />"
-        },
         {
             "name": "Bf16QuantizationConfig",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Bf16QuantizationConfig\" />"
         },
+        {
+            "name": "BuiltinShield",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinShield\" />"
+        },
         {
             "name": "CompletionMessage",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionMessage\" />"
@@ -4107,6 +3458,18 @@
             "name": "InstructModel",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InstructModel\" />"
         },
+        {
+            "name": "OnViolationAction",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/OnViolationAction\" />"
+        },
+        {
+            "name": "RestAPIExecutionConfig",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RestAPIExecutionConfig\" />"
+        },
+        {
+            "name": "RestAPIMethod",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RestAPIMethod\" />"
+        },
         {
             "name": "SamplingParams",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/SamplingParams\" />"
@@ -4115,6 +3478,10 @@
             "name": "SamplingStrategy",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/SamplingStrategy\" />"
         },
+        {
+            "name": "ShieldDefinition",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldDefinition\" />"
+        },
         {
             "name": "StopReason",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/StopReason\" />"
@@ -4127,10 +3494,6 @@
             "name": "ToolCall",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolCall\" />"
         },
-        {
-            "name": "ToolDefinition",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolDefinition\" />"
-        },
         {
             "name": "ToolParamDefinition",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolParamDefinition\" />"
@@ -4147,90 +3510,6 @@
             "name": "UserMessage",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/UserMessage\" />"
         },
-        {
-            "name": "ChatCompletionResponse",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />"
-        },
-        {
-            "name": "TokenLogProbs",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/TokenLogProbs\" />"
-        },
-        {
-            "name": "BatchCompletionRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BatchCompletionRequest\" />"
-        },
-        {
-            "name": "PretrainedModel",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/PretrainedModel\" />"
-        },
-        {
-            "name": "CompletionResponse",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
-        },
-        {
-            "name": "ChatCompletionRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
-        },
-        {
-            "name": "ChatCompletionResponseEvent",
-            "description": "Chat completion response event.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponseEvent\" />"
-        },
-        {
-            "name": "ChatCompletionResponseEventType",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponseEventType\" />"
-        },
-        {
-            "name": "ChatCompletionResponseStreamChunk",
-            "description": "SSE-stream of these events.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponseStreamChunk\" />"
-        },
-        {
-            "name": "ToolCallDelta",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolCallDelta\" />"
-        },
-        {
-            "name": "ToolCallParseStatus",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolCallParseStatus\" />"
-        },
-        {
-            "name": "CompletionRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionRequest\" />"
-        },
-        {
-            "name": "CompletionResponseStreamChunk",
-            "description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponseStreamChunk\" />"
-        },
-        {
-            "name": "AgenticSystemCreateRequest",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemCreateRequest\" />"
-        },
-        {
-            "name": "AgenticSystemInstanceConfig",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemInstanceConfig\" />"
-        },
-        {
-            "name": "AgenticSystemToolDefinition",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemToolDefinition\" />"
-        },
-        {
-            "name": "BuiltinShield",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinShield\" />"
-        },
-        {
-            "name": "OnViolationAction",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/OnViolationAction\" />"
-        },
-        {
-            "name": "RestAPIExecutionConfig",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RestAPIExecutionConfig\" />"
-        },
-        {
-            "name": "RestAPIMethod",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/RestAPIMethod\" />"
-        },
-        {
-            "name": "ShieldDefinition",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldDefinition\" />"
-        },
         {
             "name": "AgenticSystemCreateResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemCreateResponse\" />"
@@ -4275,6 +3554,10 @@
             "name": "AgenticSystemTurnResponseTurnStartPayload",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/AgenticSystemTurnResponseTurnStartPayload\" />"
         },
+        {
+            "name": "InferenceStep",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/InferenceStep\" />"
+        },
         {
             "name": "MemoryBankDocument",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryBankDocument\" />"
@@ -4283,10 +3566,6 @@
             "name": "MemoryRetrievalStep",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryRetrievalStep\" />"
         },
-        {
-            "name": "ModelInferenceStep",
-            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ModelInferenceStep\" />"
-        },
         {
             "name": "ShieldCallStep",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldCallStep\" />"
@@ -4295,6 +3574,14 @@
             "name": "ShieldResponse",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldResponse\" />"
         },
+        {
+            "name": "ToolCallDelta",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolCallDelta\" />"
+        },
+        {
+            "name": "ToolCallParseStatus",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolCallParseStatus\" />"
+        },
         {
             "name": "ToolExecutionStep",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ToolExecutionStep\" />"
@@ -4443,6 +3730,10 @@
             "name": "PostTrainingSFTRequest",
             "description": "Request to finetune a model.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/PostTrainingSFTRequest\" />"
         },
+        {
+            "name": "PretrainedModel",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/PretrainedModel\" />"
+        },
         {
             "name": "QLoraFinetuningConfig",
             "description": "<SchemaDefinition schemaRef=\"#/components/schemas/QLoraFinetuningConfig\" />"
@@ -4456,7 +3747,6 @@
                 "Datasets",
                 "Evaluations",
                 "MemoryBanks",
-                "ModelInference",
                 "PostTraining",
                 "RewardScoring",
                 "SyntheticDataGeneration"
@@ -4480,19 +3770,9 @@
                 "AgenticSystemTurnResponseTurnCompletePayload",
                 "AgenticSystemTurnResponseTurnStartPayload",
                 "Attachment",
-                "BatchChatCompletionRequest",
-                "BatchCompletionRequest",
                 "Bf16QuantizationConfig",
                 "BuiltinShield",
-                "ChatCompletionRequest",
-                "ChatCompletionResponse",
-                "ChatCompletionResponseEvent",
-                "ChatCompletionResponseEventType",
-                "ChatCompletionResponseStreamChunk",
                 "CompletionMessage",
-                "CompletionRequest",
-                "CompletionResponse",
-                "CompletionResponseStreamChunk",
                 "CreateDatasetRequest",
                 "DPOAlignmentConfig",
                 "DialogGenerations",
@@ -4506,12 +3786,12 @@
                 "EvaluationJobStatusResponse",
                 "FinetuningAlgorithm",
                 "Fp8QuantizationConfig",
+                "InferenceStep",
                 "InstructModel",
                 "LoraFinetuningConfig",
                 "MemoryBank",
                 "MemoryBankDocument",
                 "MemoryRetrievalStep",
-                "ModelInferenceStep",
                 "OnViolationAction",
                 "OptimizerConfig",
                 "PostTrainingJob",
@@ -4541,11 +3821,9 @@
                 "SyntheticDataGenerationRequest",
                 "SyntheticDataGenerationResponse",
                 "SystemMessage",
-                "TokenLogProbs",
                 "ToolCall",
                 "ToolCallDelta",
                 "ToolCallParseStatus",
-                "ToolDefinition",
                 "ToolExecutionStep",
                 "ToolParamDefinition",
                 "ToolResponse",
diff --git a/toolchain/spec/openapi.yaml b/toolchain/spec/openapi.yaml
index 06f735cc5..6c73f6175 100644
--- a/toolchain/spec/openapi.yaml
+++ b/toolchain/spec/openapi.yaml
@@ -148,13 +148,13 @@ components:
           type: string
         step_details:
           oneOf:
-          - $ref: '#/components/schemas/ModelInferenceStep'
+          - $ref: '#/components/schemas/InferenceStep'
           - $ref: '#/components/schemas/ToolExecutionStep'
           - $ref: '#/components/schemas/ShieldCallStep'
           - $ref: '#/components/schemas/MemoryRetrievalStep'
         step_type:
           enum:
-          - model_inference
+          - inference
           - tool_execution
           - shield_call
           - memory_retrieval
@@ -176,7 +176,7 @@ components:
           type: string
         step_type:
           enum:
-          - model_inference
+          - inference
           - tool_execution
           - shield_call
           - memory_retrieval
@@ -210,7 +210,7 @@ components:
           type: string
         step_type:
           enum:
-          - model_inference
+          - inference
           - tool_execution
           - shield_call
           - memory_retrieval
@@ -263,171 +263,23 @@ components:
       - url
       - mime_type
       type: object
-    BatchChatCompletionRequest:
-      additionalProperties: false
-      properties:
-        available_tools:
-          items:
-            $ref: '#/components/schemas/ToolDefinition'
-          type: array
-        logprobs:
-          additionalProperties: false
-          properties:
-            top_k:
-              type: integer
-          type: object
-        messages_batch:
-          items:
-            items:
-              oneOf:
-              - $ref: '#/components/schemas/UserMessage'
-              - $ref: '#/components/schemas/SystemMessage'
-              - $ref: '#/components/schemas/ToolResponseMessage'
-              - $ref: '#/components/schemas/CompletionMessage'
-            type: array
-          type: array
-        model:
-          $ref: '#/components/schemas/InstructModel'
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-      required:
-      - model
-      - messages_batch
-      type: object
-    BatchCompletionRequest:
-      additionalProperties: false
-      properties:
-        content_batch:
-          items:
-            oneOf:
-            - type: string
-            - $ref: '#/components/schemas/Attachment'
-            - items:
-                oneOf:
-                - type: string
-                - $ref: '#/components/schemas/Attachment'
-              type: array
-          type: array
-        logprobs:
-          additionalProperties: false
-          properties:
-            top_k:
-              type: integer
-          type: object
-        model:
-          $ref: '#/components/schemas/PretrainedModel'
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-      required:
-      - model
-      - content_batch
-      type: object
     Bf16QuantizationConfig:
       additionalProperties: false
       properties:
-        quantization_type:
+        type:
           const: bf16
           type: string
       required:
-      - quantization_type
+      - type
       type: object
     BuiltinShield:
       enum:
       - llama_guard
-      - prompt_guard
       - code_scanner_guard
       - third_party_shield
+      - injection_shield
+      - jailbreak_shield
       type: string
-    ChatCompletionRequest:
-      additionalProperties: false
-      properties:
-        available_tools:
-          items:
-            $ref: '#/components/schemas/ToolDefinition'
-          type: array
-        logprobs:
-          additionalProperties: false
-          properties:
-            top_k:
-              type: integer
-          type: object
-        messages:
-          items:
-            oneOf:
-            - $ref: '#/components/schemas/UserMessage'
-            - $ref: '#/components/schemas/SystemMessage'
-            - $ref: '#/components/schemas/ToolResponseMessage'
-            - $ref: '#/components/schemas/CompletionMessage'
-          type: array
-        model:
-          $ref: '#/components/schemas/InstructModel'
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-        stream:
-          type: boolean
-      required:
-      - model
-      - messages
-      type: object
-    ChatCompletionResponse:
-      additionalProperties: false
-      properties:
-        completion_message:
-          $ref: '#/components/schemas/CompletionMessage'
-        logprobs:
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          type: array
-      required:
-      - completion_message
-      type: object
-    ChatCompletionResponseEvent:
-      additionalProperties: false
-      properties:
-        delta:
-          oneOf:
-          - type: string
-          - $ref: '#/components/schemas/ToolCallDelta'
-        event_type:
-          $ref: '#/components/schemas/ChatCompletionResponseEventType'
-        logprobs:
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          type: array
-        stop_reason:
-          $ref: '#/components/schemas/StopReason'
-      required:
-      - event_type
-      - delta
-      title: Chat completion response event.
-      type: object
-    ChatCompletionResponseEventType:
-      enum:
-      - start
-      - complete
-      - progress
-      type: string
-    ChatCompletionResponseStreamChunk:
-      additionalProperties: false
-      properties:
-        event:
-          $ref: '#/components/schemas/ChatCompletionResponseEvent'
-      required:
-      - event
-      title: SSE-stream of these events.
-      type: object
     CompletionMessage:
       additionalProperties: false
       properties:
@@ -455,65 +307,6 @@ components:
       - stop_reason
       - tool_calls
       type: object
-    CompletionRequest:
-      additionalProperties: false
-      properties:
-        content:
-          oneOf:
-          - type: string
-          - $ref: '#/components/schemas/Attachment'
-          - items:
-              oneOf:
-              - type: string
-              - $ref: '#/components/schemas/Attachment'
-            type: array
-        logprobs:
-          additionalProperties: false
-          properties:
-            top_k:
-              type: integer
-          type: object
-        model:
-          $ref: '#/components/schemas/PretrainedModel'
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-        stream:
-          type: boolean
-      required:
-      - model
-      - content
-      type: object
-    CompletionResponse:
-      additionalProperties: false
-      properties:
-        completion_message:
-          $ref: '#/components/schemas/CompletionMessage'
-        logprobs:
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          type: array
-      required:
-      - completion_message
-      type: object
-    CompletionResponseStreamChunk:
-      additionalProperties: false
-      properties:
-        delta:
-          type: string
-        logprobs:
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          type: array
-        stop_reason:
-          $ref: '#/components/schemas/StopReason'
-      required:
-      - delta
-      title: streamed completion response.
-      type: object
     CreateDatasetRequest:
       additionalProperties: false
       properties:
@@ -737,11 +530,35 @@ components:
     Fp8QuantizationConfig:
       additionalProperties: false
       properties:
-        quantization_type:
+        type:
           const: fp8
           type: string
       required:
-      - quantization_type
+      - type
+      type: object
+    InferenceStep:
+      additionalProperties: false
+      properties:
+        completed_at:
+          format: date-time
+          type: string
+        model_response:
+          $ref: '#/components/schemas/CompletionMessage'
+        started_at:
+          format: date-time
+          type: string
+        step_id:
+          type: string
+        step_type:
+          const: inference
+          type: string
+        turn_id:
+          type: string
+      required:
+      - turn_id
+      - step_id
+      - step_type
+      - model_response
       type: object
     InstructModel:
       enum:
@@ -843,30 +660,6 @@ components:
       - documents
       - scores
       type: object
-    ModelInferenceStep:
-      additionalProperties: false
-      properties:
-        completed_at:
-          format: date-time
-          type: string
-        model_response:
-          $ref: '#/components/schemas/CompletionMessage'
-        started_at:
-          format: date-time
-          type: string
-        step_id:
-          type: string
-        step_type:
-          const: model_inference
-          type: string
-        turn_id:
-          type: string
-      required:
-      - turn_id
-      - step_id
-      - step_type
-      - model_response
-      type: object
     OnViolationAction:
       enum:
       - 0
@@ -1408,16 +1201,6 @@ components:
       - role
       - content
       type: object
-    TokenLogProbs:
-      additionalProperties: false
-      properties:
-        logprobs_by_token:
-          additionalProperties:
-            type: number
-          type: object
-      required:
-      - logprobs_by_token
-      type: object
     ToolCall:
       additionalProperties: false
       properties:
@@ -1477,32 +1260,11 @@ components:
       type: object
     ToolCallParseStatus:
       enum:
-      - start
+      - started
       - in_progress
       - failure
       - success
       type: string
-    ToolDefinition:
-      additionalProperties: false
-      properties:
-        description:
-          type: string
-        parameters:
-          additionalProperties:
-            $ref: '#/components/schemas/ToolParamDefinition'
-          type: object
-        tool_name:
-          oneOf:
-          - enum:
-            - brave_search
-            - wolfram_alpha
-            - photogen
-            - code_interpreter
-            type: string
-          - type: string
-      required:
-      - tool_name
-      type: object
     ToolExecutionStep:
       additionalProperties: false
       properties:
@@ -1686,7 +1448,7 @@ components:
         steps:
           items:
             oneOf:
-            - $ref: '#/components/schemas/ModelInferenceStep'
+            - $ref: '#/components/schemas/InferenceStep'
             - $ref: '#/components/schemas/ToolExecutionStep'
             - $ref: '#/components/schemas/ShieldCallStep'
             - $ref: '#/components/schemas/MemoryRetrievalStep'
@@ -1729,7 +1491,7 @@ info:
   description: "This is the specification of the llama stack that provides\n     \
     \           a set of endpoints and their corresponding interfaces that are tailored\
     \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-07-19 11:49:56.794897"
+    \ draft and subject to change.\n                Generated at 2024-07-21 12:19:33.327857"
   title: '[DRAFT] Llama Stack Specification'
   version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -1766,58 +1528,6 @@ paths:
           description: OK
       tags:
       - AgenticSystem
-  /agentic_system/memory_bank/attach:
-    post:
-      parameters:
-      - in: query
-        name: agent_id
-        required: true
-        schema:
-          type: string
-      - in: query
-        name: session_id
-        required: true
-        schema:
-          type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              items:
-                type: string
-              type: array
-        required: true
-      responses:
-        '200':
-          description: OK
-      tags:
-      - AgenticSystem
-  /agentic_system/memory_bank/detach:
-    post:
-      parameters:
-      - in: query
-        name: agent_id
-        required: true
-        schema:
-          type: string
-      - in: query
-        name: session_id
-        required: true
-        schema:
-          type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              items:
-                type: string
-              type: array
-        required: true
-      responses:
-        '200':
-          description: OK
-      tags:
-      - AgenticSystem
   /agentic_system/session/create:
     post:
       parameters: []
@@ -1969,19 +1679,6 @@ paths:
           description: OK
       tags:
       - Evaluations
-  /evaluate/job/cancel:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      responses:
-        '200':
-          description: OK
-      tags:
-      - Evaluations
   /evaluate/job/logs:
     get:
       parameters:
@@ -2082,78 +1779,6 @@ paths:
           description: OK
       tags:
       - Evaluations
-  /inference/batch_chat_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchChatCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/jsonl:
-              schema:
-                $ref: '#/components/schemas/ChatCompletionResponse'
-          description: OK
-      tags:
-      - ModelInference
-  /inference/batch_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/jsonl:
-              schema:
-                $ref: '#/components/schemas/CompletionResponse'
-          description: OK
-      tags:
-      - ModelInference
-  /inference/chat_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
-          description: SSE-stream of these events.
-      tags:
-      - ModelInference
-  /inference/completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/CompletionResponseStreamChunk'
-          description: streamed completion response.
-      tags:
-      - ModelInference
   /memory_bank/delete:
     post:
       parameters:
@@ -2335,19 +1960,6 @@ paths:
           description: OK
       tags:
       - PostTraining
-  /post_training/job/cancel:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      responses:
-        '200':
-          description: OK
-      tags:
-      - PostTraining
   /post_training/job/logs:
     get:
       parameters:
@@ -2471,22 +2083,29 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
+- name: RewardScoring
+- name: PostTraining
 - name: AgenticSystem
 - name: Datasets
-- name: ModelInference
-- name: SyntheticDataGeneration
 - name: MemoryBanks
-- name: PostTraining
 - name: Evaluations
-- name: RewardScoring
+- name: SyntheticDataGeneration
+- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
+    />
+  name: AgenticSystemCreateRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
+    />
+  name: AgenticSystemInstanceConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
+    />
+  name: AgenticSystemToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
   name: Attachment
-- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
-    />
-  name: BatchChatCompletionRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
     />
   name: Bf16QuantizationConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
+  name: BuiltinShield
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
     />
   name: CompletionMessage
@@ -2495,19 +2114,28 @@ tags:
   name: Fp8QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" />
   name: InstructModel
+- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
+    />
+  name: OnViolationAction
+- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
+    />
+  name: RestAPIExecutionConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
+  name: RestAPIMethod
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
   name: SamplingParams
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
     />
   name: SamplingStrategy
+- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
+    />
+  name: ShieldDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" />
   name: StopReason
 - description: <SchemaDefinition schemaRef="#/components/schemas/SystemMessage" />
   name: SystemMessage
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolCall" />
   name: ToolCall
-- description: <SchemaDefinition schemaRef="#/components/schemas/ToolDefinition" />
-  name: ToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition"
     />
   name: ToolParamDefinition
@@ -2518,74 +2146,6 @@ tags:
   name: URL
 - description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
   name: UserMessage
-- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse"
-    />
-  name: ChatCompletionResponse
-- description: <SchemaDefinition schemaRef="#/components/schemas/TokenLogProbs" />
-  name: TokenLogProbs
-- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
-    />
-  name: BatchCompletionRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
-    />
-  name: PretrainedModel
-- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse"
-    />
-  name: CompletionResponse
-- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
-    />
-  name: ChatCompletionRequest
-- description: 'Chat completion response event.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEvent"
-    />'
-  name: ChatCompletionResponseEvent
-- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEventType"
-    />
-  name: ChatCompletionResponseEventType
-- description: 'SSE-stream of these events.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
-    />'
-  name: ChatCompletionResponseStreamChunk
-- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
-  name: ToolCallDelta
-- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
-    />
-  name: ToolCallParseStatus
-- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
-    />
-  name: CompletionRequest
-- description: 'streamed completion response.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
-    />'
-  name: CompletionResponseStreamChunk
-- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
-    />
-  name: AgenticSystemCreateRequest
-- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
-    />
-  name: AgenticSystemInstanceConfig
-- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
-    />
-  name: AgenticSystemToolDefinition
-- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
-  name: BuiltinShield
-- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
-    />
-  name: OnViolationAction
-- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
-    />
-  name: RestAPIExecutionConfig
-- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
-  name: RestAPIMethod
-- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
-    />
-  name: ShieldDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
     />
   name: AgenticSystemCreateResponse
@@ -2622,19 +2182,23 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurnResponseTurnStartPayload"
     />
   name: AgenticSystemTurnResponseTurnStartPayload
+- description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
+  name: InferenceStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
     />
   name: MemoryBankDocument
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
     />
   name: MemoryRetrievalStep
-- description: <SchemaDefinition schemaRef="#/components/schemas/ModelInferenceStep"
-    />
-  name: ModelInferenceStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldCallStep" />
   name: ShieldCallStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldResponse" />
   name: ShieldResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
+  name: ToolCallDelta
+- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
+    />
+  name: ToolCallParseStatus
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolExecutionStep"
     />
   name: ToolExecutionStep
@@ -2785,6 +2349,9 @@ tags:
 
     <SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
   name: PostTrainingSFTRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
+    />
+  name: PretrainedModel
 - description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
     />
   name: QLoraFinetuningConfig
@@ -2795,7 +2362,6 @@ x-tagGroups:
   - Datasets
   - Evaluations
   - MemoryBanks
-  - ModelInference
   - PostTraining
   - RewardScoring
   - SyntheticDataGeneration
@@ -2816,19 +2382,9 @@ x-tagGroups:
   - AgenticSystemTurnResponseTurnCompletePayload
   - AgenticSystemTurnResponseTurnStartPayload
   - Attachment
-  - BatchChatCompletionRequest
-  - BatchCompletionRequest
   - Bf16QuantizationConfig
   - BuiltinShield
-  - ChatCompletionRequest
-  - ChatCompletionResponse
-  - ChatCompletionResponseEvent
-  - ChatCompletionResponseEventType
-  - ChatCompletionResponseStreamChunk
   - CompletionMessage
-  - CompletionRequest
-  - CompletionResponse
-  - CompletionResponseStreamChunk
   - CreateDatasetRequest
   - DPOAlignmentConfig
   - DialogGenerations
@@ -2842,12 +2398,12 @@ x-tagGroups:
   - EvaluationJobStatusResponse
   - FinetuningAlgorithm
   - Fp8QuantizationConfig
+  - InferenceStep
   - InstructModel
   - LoraFinetuningConfig
   - MemoryBank
   - MemoryBankDocument
   - MemoryRetrievalStep
-  - ModelInferenceStep
   - OnViolationAction
   - OptimizerConfig
   - PostTrainingJob
@@ -2877,11 +2433,9 @@ x-tagGroups:
   - SyntheticDataGenerationRequest
   - SyntheticDataGenerationResponse
   - SystemMessage
-  - TokenLogProbs
   - ToolCall
   - ToolCallDelta
   - ToolCallParseStatus
-  - ToolDefinition
   - ToolExecutionStep
   - ToolParamDefinition
   - ToolResponse
diff --git a/toolchain/spec/run_openapi_generator.sh b/toolchain/spec/run_openapi_generator.sh
index 5e74f4a8e..bb0171fa3 100644
--- a/toolchain/spec/run_openapi_generator.sh
+++ b/toolchain/spec/run_openapi_generator.sh
@@ -2,4 +2,4 @@
 
 set -x
 
-PYTHONPATH=../../../oss-ops:../.. python3 -m toolchain.spec.generate
+PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate