rename ModelInference to Inference

2025-12-03 18:00:36 +00:00 · 2024-07-21 12:19:52 -07:00 · 2024-07-21 12:19:52 -07:00 · 67f0510edd
commit 67f0510edd
parent 245461620d
18 changed files with 468 additions and 1636 deletions
--- a/create_config.sh
+++ b/create_config.sh
@ -30,7 +30,7 @@ create_parent_dir() {
 # Function to output the YAML configuration
 output_yaml() {
    cat <<EOL > ${yaml_output_path}
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/cli/inference/configure.py
+++ b/toolchain/cli/inference/configure.py
@ -47,7 +47,7 @@ class InferenceConfigure(Subcommand):
        yaml_output_path
    ):
        yaml_content = textwrap.dedent(f"""
-            model_inference_config:
+            inference_config:
                impl_type: "inline"
                inline_config:
                    checkpoint_type: "pytorch"
--- a/toolchain/configs/ashwin.yaml
+++ b/toolchain/configs/ashwin.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/chrisluc.yaml
+++ b/toolchain/configs/chrisluc.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/cyni.yaml
+++ b/toolchain/configs/cyni.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/default.yaml
+++ b/toolchain/configs/default.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/hjshah.yaml
+++ b/toolchain/configs/hjshah.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/long_seqlen.yaml
+++ b/toolchain/configs/long_seqlen.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/inference/api/config.py
+++ b/toolchain/inference/api/config.py
@ -75,7 +75,7 @@ class RemoteImplConfig(BaseModel):
    url: str = Field(..., description="The URL of the remote module")


-class ModelInferenceConfig(BaseModel):
+class InferenceConfig(BaseModel):
    impl_config: Annotated[
        Union[InlineImplConfig, RemoteImplConfig],
        Field(discriminator="impl_type"),
@ -130,7 +130,7 @@ class RemoteImplHydraConfig:


@dataclass
-class ModelInferenceHydraConfig:
+class InferenceHydraConfig:
    impl_type: str
    inline_config: Optional[InlineImplHydraConfig] = None
    remote_config: Optional[RemoteImplHydraConfig] = None
@ -142,18 +142,18 @@ class ModelInferenceHydraConfig:
        if self.impl_type == "remote":
            assert self.remote_config is not None

-    def convert_to_model_inferene_config(self):
+    def convert_to_inference_config(self):
        if self.impl_type == "inline":
            inline_config = InlineImplHydraConfig(**self.inline_config)
-            return ModelInferenceConfig(
+            return InferenceConfig(
                impl_config=inline_config.convert_to_inline_impl_config()
            )
        elif self.impl_type == "remote":
            remote_config = RemoteImplHydraConfig(**self.remote_config)
-            return ModelInferenceConfig(
+            return InferenceConfig(
                impl_config=remote_config.convert_to_remote_impl_config()
            )


 cs = ConfigStore.instance()
-cs.store(name="model_inference_config", node=ModelInferenceHydraConfig)
+cs.store(name="inference_config", node=InferenceHydraConfig)
--- a/toolchain/inference/api/endpoints.py
+++ b/toolchain/inference/api/endpoints.py
@ -90,7 +90,7 @@ class BatchChatCompletionResponse(BaseModel):
    completion_message_batch: List[CompletionMessage]


-class ModelInference(Protocol):
+class Inference(Protocol):

    @webmethod(route="/inference/completion")
    async def completion(
--- a/toolchain/inference/api_instance.py
+++ b/toolchain/inference/api_instance.py
@ -1,12 +1,12 @@
-from .api.config import ImplType, ModelInferenceConfig
+from .api.config import ImplType, InferenceConfig


-async def get_inference_api_instance(config: ModelInferenceConfig):
+async def get_inference_api_instance(config: InferenceConfig):
    if config.impl_config.impl_type == ImplType.inline.value:
-        from .inference import ModelInferenceImpl
+        from .inference import InferenceImpl

-        return ModelInferenceImpl(config.impl_config)
+        return InferenceImpl(config.impl_config)

-    from .client import ModelInferenceClient
+    from .client import InferenceClient

-    return ModelInferenceClient(config.impl_config.url)
+    return InferenceClient(config.impl_config.url)
--- a/toolchain/inference/client.py
+++ b/toolchain/inference/client.py
@ -10,12 +10,12 @@ from .api import (
    ChatCompletionResponseStreamChunk,
    CompletionRequest,
    InstructModel,
-    ModelInference,
+    Inference,
    UserMessage,
 )


-class ModelInferenceClient(ModelInference):
+class InferenceClient(Inference):
    def __init__(self, base_url: str):
        self.base_url = base_url

@ -48,7 +48,7 @@ class ModelInferenceClient(ModelInference):


 async def run_main(host: str, port: int):
-    client = ModelInferenceClient(f"http://{host}:{port}")
+    client = InferenceClient(f"http://{host}:{port}")

    message = UserMessage(content="hello world, help me out here")
    req = ChatCompletionRequest(
--- a/toolchain/inference/inference.py
+++ b/toolchain/inference/inference.py
@ -18,12 +18,12 @@ from .api.endpoints import (
    ChatCompletionRequest,
    ChatCompletionResponseStreamChunk,
    CompletionRequest,
-    ModelInference,
+    Inference,
 )
 from .model_parallel import LlamaModelParallelGenerator


-class ModelInferenceImpl(ModelInference):
+class InferenceImpl(Inference):

    def __init__(self, config: InlineImplConfig) -> None:
        self.config = config
--- a/toolchain/inference/server.py
+++ b/toolchain/inference/server.py
@ -11,7 +11,7 @@ from fastapi.responses import StreamingResponse
 from omegaconf import OmegaConf

 from toolchain.utils import get_default_config_dir, parse_config
-from .api.config import ModelInferenceHydraConfig
+from .api.config import InferenceHydraConfig
 from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk

 from .api_instance import get_inference_api_instance
@ -43,13 +43,13 @@ async def startup():
    global InferenceApiInstance

    config = get_config()
-    hydra_config = ModelInferenceHydraConfig(
-        **OmegaConf.to_container(config["model_inference_config"], resolve=True)
+    hydra_config = InferenceHydraConfig(
+        **OmegaConf.to_container(config["inference_config"], resolve=True)
    )
-    model_inference_config = hydra_config.convert_to_model_inferene_config()
+    inference_config = hydra_config.convert_to_inference_config()

    InferenceApiInstance = await get_inference_api_instance(
-        model_inference_config,
+        inference_config,
    )
    await InferenceApiInstance.initialize()

--- a/toolchain/spec/generate.py
+++ b/toolchain/spec/generate.py
@ -16,7 +16,7 @@ from agentic_system.api import *  # noqa: F403


 class LlamaStackEndpoints(
-    ModelInference,
+    Inference,
    AgenticSystem,
    RewardScoring,
    SyntheticDataGeneration,
--- a/toolchain/spec/openapi.html
+++ b/toolchain/spec/openapi.html
--- a/toolchain/spec/openapi.yaml
+++ b/toolchain/spec/openapi.yaml
@ -148,13 +148,13 @@ components:
          type: string
        step_details:
          oneOf:
-          - $ref: '#/components/schemas/ModelInferenceStep'
+          - $ref: '#/components/schemas/InferenceStep'
          - $ref: '#/components/schemas/ToolExecutionStep'
          - $ref: '#/components/schemas/ShieldCallStep'
          - $ref: '#/components/schemas/MemoryRetrievalStep'
        step_type:
          enum:
-          - model_inference
+          - inference
          - tool_execution
          - shield_call
          - memory_retrieval
@ -176,7 +176,7 @@ components:
          type: string
        step_type:
          enum:
-          - model_inference
+          - inference
          - tool_execution
          - shield_call
          - memory_retrieval
@ -210,7 +210,7 @@ components:
          type: string
        step_type:
          enum:
-          - model_inference
+          - inference
          - tool_execution
          - shield_call
          - memory_retrieval
@ -263,171 +263,23 @@ components:
      - url
      - mime_type
      type: object
-    BatchChatCompletionRequest:
-      additionalProperties: false
-      properties:
-        available_tools:
-          items:
-            $ref: '#/components/schemas/ToolDefinition'
-          type: array
-        logprobs:
-          additionalProperties: false
-          properties:
-            top_k:
-              type: integer
-          type: object
-        messages_batch:
-          items:
-            items:
-              oneOf:
-              - $ref: '#/components/schemas/UserMessage'
-              - $ref: '#/components/schemas/SystemMessage'
-              - $ref: '#/components/schemas/ToolResponseMessage'
-              - $ref: '#/components/schemas/CompletionMessage'
-            type: array
-          type: array
-        model:
-          $ref: '#/components/schemas/InstructModel'
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-      required:
-      - model
-      - messages_batch
-      type: object
-    BatchCompletionRequest:
-      additionalProperties: false
-      properties:
-        content_batch:
-          items:
-            oneOf:
-            - type: string
-            - $ref: '#/components/schemas/Attachment'
-            - items:
-                oneOf:
-                - type: string
-                - $ref: '#/components/schemas/Attachment'
-              type: array
-          type: array
-        logprobs:
-          additionalProperties: false
-          properties:
-            top_k:
-              type: integer
-          type: object
-        model:
-          $ref: '#/components/schemas/PretrainedModel'
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-      required:
-      - model
-      - content_batch
-      type: object
    Bf16QuantizationConfig:
      additionalProperties: false
      properties:
-        quantization_type:
+        type:
          const: bf16
          type: string
      required:
-      - quantization_type
+      - type
      type: object
    BuiltinShield:
      enum:
      - llama_guard
-      - prompt_guard
      - code_scanner_guard
      - third_party_shield
+      - injection_shield
+      - jailbreak_shield
      type: string
-    ChatCompletionRequest:
-      additionalProperties: false
-      properties:
-        available_tools:
-          items:
-            $ref: '#/components/schemas/ToolDefinition'
-          type: array
-        logprobs:
-          additionalProperties: false
-          properties:
-            top_k:
-              type: integer
-          type: object
-        messages:
-          items:
-            oneOf:
-            - $ref: '#/components/schemas/UserMessage'
-            - $ref: '#/components/schemas/SystemMessage'
-            - $ref: '#/components/schemas/ToolResponseMessage'
-            - $ref: '#/components/schemas/CompletionMessage'
-          type: array
-        model:
-          $ref: '#/components/schemas/InstructModel'
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-        stream:
-          type: boolean
-      required:
-      - model
-      - messages
-      type: object
-    ChatCompletionResponse:
-      additionalProperties: false
-      properties:
-        completion_message:
-          $ref: '#/components/schemas/CompletionMessage'
-        logprobs:
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          type: array
-      required:
-      - completion_message
-      type: object
-    ChatCompletionResponseEvent:
-      additionalProperties: false
-      properties:
-        delta:
-          oneOf:
-          - type: string
-          - $ref: '#/components/schemas/ToolCallDelta'
-        event_type:
-          $ref: '#/components/schemas/ChatCompletionResponseEventType'
-        logprobs:
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          type: array
-        stop_reason:
-          $ref: '#/components/schemas/StopReason'
-      required:
-      - event_type
-      - delta
-      title: Chat completion response event.
-      type: object
-    ChatCompletionResponseEventType:
-      enum:
-      - start
-      - complete
-      - progress
-      type: string
-    ChatCompletionResponseStreamChunk:
-      additionalProperties: false
-      properties:
-        event:
-          $ref: '#/components/schemas/ChatCompletionResponseEvent'
-      required:
-      - event
-      title: SSE-stream of these events.
-      type: object
    CompletionMessage:
      additionalProperties: false
      properties:
@ -455,65 +307,6 @@ components:
      - stop_reason
      - tool_calls
      type: object
-    CompletionRequest:
-      additionalProperties: false
-      properties:
-        content:
-          oneOf:
-          - type: string
-          - $ref: '#/components/schemas/Attachment'
-          - items:
-              oneOf:
-              - type: string
-              - $ref: '#/components/schemas/Attachment'
-            type: array
-        logprobs:
-          additionalProperties: false
-          properties:
-            top_k:
-              type: integer
-          type: object
-        model:
-          $ref: '#/components/schemas/PretrainedModel'
-        quantization_config:
-          oneOf:
-          - $ref: '#/components/schemas/Bf16QuantizationConfig'
-          - $ref: '#/components/schemas/Fp8QuantizationConfig'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-        stream:
-          type: boolean
-      required:
-      - model
-      - content
-      type: object
-    CompletionResponse:
-      additionalProperties: false
-      properties:
-        completion_message:
-          $ref: '#/components/schemas/CompletionMessage'
-        logprobs:
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          type: array
-      required:
-      - completion_message
-      type: object
-    CompletionResponseStreamChunk:
-      additionalProperties: false
-      properties:
-        delta:
-          type: string
-        logprobs:
-          items:
-            $ref: '#/components/schemas/TokenLogProbs'
-          type: array
-        stop_reason:
-          $ref: '#/components/schemas/StopReason'
-      required:
-      - delta
-      title: streamed completion response.
-      type: object
    CreateDatasetRequest:
      additionalProperties: false
      properties:
@ -737,11 +530,35 @@ components:
    Fp8QuantizationConfig:
      additionalProperties: false
      properties:
-        quantization_type:
+        type:
          const: fp8
          type: string
      required:
-      - quantization_type
+      - type
+      type: object
+    InferenceStep:
+      additionalProperties: false
+      properties:
+        completed_at:
+          format: date-time
+          type: string
+        model_response:
+          $ref: '#/components/schemas/CompletionMessage'
+        started_at:
+          format: date-time
+          type: string
+        step_id:
+          type: string
+        step_type:
+          const: inference
+          type: string
+        turn_id:
+          type: string
+      required:
+      - turn_id
+      - step_id
+      - step_type
+      - model_response
      type: object
    InstructModel:
      enum:
@ -843,30 +660,6 @@ components:
      - documents
      - scores
      type: object
-    ModelInferenceStep:
-      additionalProperties: false
-      properties:
-        completed_at:
-          format: date-time
-          type: string
-        model_response:
-          $ref: '#/components/schemas/CompletionMessage'
-        started_at:
-          format: date-time
-          type: string
-        step_id:
-          type: string
-        step_type:
-          const: model_inference
-          type: string
-        turn_id:
-          type: string
-      required:
-      - turn_id
-      - step_id
-      - step_type
-      - model_response
-      type: object
    OnViolationAction:
      enum:
      - 0
@ -1408,16 +1201,6 @@ components:
      - role
      - content
      type: object
-    TokenLogProbs:
-      additionalProperties: false
-      properties:
-        logprobs_by_token:
-          additionalProperties:
-            type: number
-          type: object
-      required:
-      - logprobs_by_token
-      type: object
    ToolCall:
      additionalProperties: false
      properties:
@ -1477,32 +1260,11 @@ components:
      type: object
    ToolCallParseStatus:
      enum:
-      - start
+      - started
      - in_progress
      - failure
      - success
      type: string
-    ToolDefinition:
-      additionalProperties: false
-      properties:
-        description:
-          type: string
-        parameters:
-          additionalProperties:
-            $ref: '#/components/schemas/ToolParamDefinition'
-          type: object
-        tool_name:
-          oneOf:
-          - enum:
-            - brave_search
-            - wolfram_alpha
-            - photogen
-            - code_interpreter
-            type: string
-          - type: string
-      required:
-      - tool_name
-      type: object
    ToolExecutionStep:
      additionalProperties: false
      properties:
@ -1686,7 +1448,7 @@ components:
        steps:
          items:
            oneOf:
-            - $ref: '#/components/schemas/ModelInferenceStep'
+            - $ref: '#/components/schemas/InferenceStep'
            - $ref: '#/components/schemas/ToolExecutionStep'
            - $ref: '#/components/schemas/ShieldCallStep'
            - $ref: '#/components/schemas/MemoryRetrievalStep'
@ -1729,7 +1491,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-07-19 11:49:56.794897"
+    \ draft and subject to change.\n                Generated at 2024-07-21 12:19:33.327857"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -1766,58 +1528,6 @@ paths:
          description: OK
      tags:
      - AgenticSystem
-  /agentic_system/memory_bank/attach:
-    post:
-      parameters:
-      - in: query
-        name: agent_id
-        required: true
-        schema:
-          type: string
-      - in: query
-        name: session_id
-        required: true
-        schema:
-          type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              items:
-                type: string
-              type: array
-        required: true
-      responses:
-        '200':
-          description: OK
-      tags:
-      - AgenticSystem
-  /agentic_system/memory_bank/detach:
-    post:
-      parameters:
-      - in: query
-        name: agent_id
-        required: true
-        schema:
-          type: string
-      - in: query
-        name: session_id
-        required: true
-        schema:
-          type: string
-      requestBody:
-        content:
-          application/json:
-            schema:
-              items:
-                type: string
-              type: array
-        required: true
-      responses:
-        '200':
-          description: OK
-      tags:
-      - AgenticSystem
  /agentic_system/session/create:
    post:
      parameters: []
@ -1969,19 +1679,6 @@ paths:
          description: OK
      tags:
      - Evaluations
-  /evaluate/job/cancel:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      responses:
-        '200':
-          description: OK
-      tags:
-      - Evaluations
  /evaluate/job/logs:
    get:
      parameters:
@ -2082,78 +1779,6 @@ paths:
          description: OK
      tags:
      - Evaluations
-  /inference/batch_chat_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchChatCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/jsonl:
-              schema:
-                $ref: '#/components/schemas/ChatCompletionResponse'
-          description: OK
-      tags:
-      - ModelInference
-  /inference/batch_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/jsonl:
-              schema:
-                $ref: '#/components/schemas/CompletionResponse'
-          description: OK
-      tags:
-      - ModelInference
-  /inference/chat_completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/ChatCompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
-          description: SSE-stream of these events.
-      tags:
-      - ModelInference
-  /inference/completion:
-    post:
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/CompletionRequest'
-        required: true
-      responses:
-        '200':
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/CompletionResponseStreamChunk'
-          description: streamed completion response.
-      tags:
-      - ModelInference
  /memory_bank/delete:
    post:
      parameters:
@ -2335,19 +1960,6 @@ paths:
          description: OK
      tags:
      - PostTraining
-  /post_training/job/cancel:
-    get:
-      parameters:
-      - in: query
-        name: job_uuid
-        required: true
-        schema:
-          type: string
-      responses:
-        '200':
-          description: OK
-      tags:
-      - PostTraining
  /post_training/job/logs:
    get:
      parameters:
@ -2471,22 +2083,29 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
+- name: RewardScoring
+- name: PostTraining
 - name: AgenticSystem
 - name: Datasets
- name: ModelInference
- name: SyntheticDataGeneration
 - name: MemoryBanks
- name: PostTraining
 - name: Evaluations
- name: RewardScoring
+- name: SyntheticDataGeneration
+- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
+    />
+  name: AgenticSystemCreateRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
+    />
+  name: AgenticSystemInstanceConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
+    />
+  name: AgenticSystemToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
  name: Attachment
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
-    />
-  name: BatchChatCompletionRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
    />
  name: Bf16QuantizationConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
+  name: BuiltinShield
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
    />
  name: CompletionMessage
@ -2495,19 +2114,28 @@ tags:
  name: Fp8QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" />
  name: InstructModel
+- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
+    />
+  name: OnViolationAction
+- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
+    />
+  name: RestAPIExecutionConfig
+- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
+  name: RestAPIMethod
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
  name: SamplingParams
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
    />
  name: SamplingStrategy
+- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
+    />
+  name: ShieldDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" />
  name: StopReason
 - description: <SchemaDefinition schemaRef="#/components/schemas/SystemMessage" />
  name: SystemMessage
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolCall" />
  name: ToolCall
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolDefinition" />
-  name: ToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition"
    />
  name: ToolParamDefinition
@ -2518,74 +2146,6 @@ tags:
  name: URL
 - description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
  name: UserMessage
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse"
-    />
-  name: ChatCompletionResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/TokenLogProbs" />
-  name: TokenLogProbs
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
-    />
-  name: BatchCompletionRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
-    />
-  name: PretrainedModel
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse"
-    />
-  name: CompletionResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
-    />
-  name: ChatCompletionRequest
- description: 'Chat completion response event.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEvent"
-    />'
-  name: ChatCompletionResponseEvent
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEventType"
-    />
-  name: ChatCompletionResponseEventType
- description: 'SSE-stream of these events.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
-    />'
-  name: ChatCompletionResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
-  name: ToolCallDelta
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
-    />
-  name: ToolCallParseStatus
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
-    />
-  name: CompletionRequest
- description: 'streamed completion response.
-
-
-    <SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
-    />'
-  name: CompletionResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
-    />
-  name: AgenticSystemCreateRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
-    />
-  name: AgenticSystemInstanceConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
-    />
-  name: AgenticSystemToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
-  name: BuiltinShield
- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
-    />
-  name: OnViolationAction
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
-    />
-  name: RestAPIExecutionConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
-  name: RestAPIMethod
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
-    />
-  name: ShieldDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
    />
  name: AgenticSystemCreateResponse
@ -2622,19 +2182,23 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurnResponseTurnStartPayload"
    />
  name: AgenticSystemTurnResponseTurnStartPayload
+- description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
+  name: InferenceStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
    />
  name: MemoryBankDocument
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
    />
  name: MemoryRetrievalStep
- description: <SchemaDefinition schemaRef="#/components/schemas/ModelInferenceStep"
-    />
-  name: ModelInferenceStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldCallStep" />
  name: ShieldCallStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldResponse" />
  name: ShieldResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
+  name: ToolCallDelta
+- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
+    />
+  name: ToolCallParseStatus
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolExecutionStep"
    />
  name: ToolExecutionStep
@ -2785,6 +2349,9 @@ tags:

    <SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
  name: PostTrainingSFTRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
+    />
+  name: PretrainedModel
 - description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
    />
  name: QLoraFinetuningConfig
@ -2795,7 +2362,6 @@ x-tagGroups:
  - Datasets
  - Evaluations
  - MemoryBanks
-  - ModelInference
  - PostTraining
  - RewardScoring
  - SyntheticDataGeneration
@ -2816,19 +2382,9 @@ x-tagGroups:
  - AgenticSystemTurnResponseTurnCompletePayload
  - AgenticSystemTurnResponseTurnStartPayload
  - Attachment
-  - BatchChatCompletionRequest
-  - BatchCompletionRequest
  - Bf16QuantizationConfig
  - BuiltinShield
-  - ChatCompletionRequest
-  - ChatCompletionResponse
-  - ChatCompletionResponseEvent
-  - ChatCompletionResponseEventType
-  - ChatCompletionResponseStreamChunk
  - CompletionMessage
-  - CompletionRequest
-  - CompletionResponse
-  - CompletionResponseStreamChunk
  - CreateDatasetRequest
  - DPOAlignmentConfig
  - DialogGenerations
@ -2842,12 +2398,12 @@ x-tagGroups:
  - EvaluationJobStatusResponse
  - FinetuningAlgorithm
  - Fp8QuantizationConfig
+  - InferenceStep
  - InstructModel
  - LoraFinetuningConfig
  - MemoryBank
  - MemoryBankDocument
  - MemoryRetrievalStep
-  - ModelInferenceStep
  - OnViolationAction
  - OptimizerConfig
  - PostTrainingJob
@ -2877,11 +2433,9 @@ x-tagGroups:
  - SyntheticDataGenerationRequest
  - SyntheticDataGenerationResponse
  - SystemMessage
-  - TokenLogProbs
  - ToolCall
  - ToolCallDelta
  - ToolCallParseStatus
-  - ToolDefinition
  - ToolExecutionStep
  - ToolParamDefinition
  - ToolResponse
--- a/toolchain/spec/run_openapi_generator.sh
+++ b/toolchain/spec/run_openapi_generator.sh
@ -2,4 +2,4 @@

 set -x

-PYTHONPATH=../../../oss-ops:../.. python3 -m toolchain.spec.generate
+PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate