rename ModelInference to Inference

2025-12-03 18:00:36 +00:00 · 2024-07-21 12:19:52 -07:00 · 2024-07-21 12:19:52 -07:00 · 67f0510edd
commit 67f0510edd
parent 245461620d
18 changed files with 468 additions and 1636 deletions
--- a/create_config.sh
+++ b/create_config.sh
@ -30,7 +30,7 @@ create_parent_dir() {
 # Function to output the YAML configuration
 output_yaml() {
    cat <<EOL > ${yaml_output_path}
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/cli/inference/configure.py
+++ b/toolchain/cli/inference/configure.py
@ -47,7 +47,7 @@ class InferenceConfigure(Subcommand):
        yaml_output_path
    ):
        yaml_content = textwrap.dedent(f"""
-            model_inference_config:
+            inference_config:
                impl_type: "inline"
                inline_config:
                    checkpoint_type: "pytorch"
--- a/toolchain/configs/ashwin.yaml
+++ b/toolchain/configs/ashwin.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
@ -7,5 +7,5 @@ model_inference_config:
    model_parallel_size: 8
    max_seq_len: 2048
    max_batch_size: 1
-    quantization: 
+    quantization:
      type: "fp8"
--- a/toolchain/configs/chrisluc.yaml
+++ b/toolchain/configs/chrisluc.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/cyni.yaml
+++ b/toolchain/configs/cyni.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/default.yaml
+++ b/toolchain/configs/default.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/hjshah.yaml
+++ b/toolchain/configs/hjshah.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/configs/long_seqlen.yaml
+++ b/toolchain/configs/long_seqlen.yaml
@ -1,4 +1,4 @@
-model_inference_config:
+inference_config:
  impl_type: "inline"
  inline_config:
    checkpoint_type: "pytorch"
--- a/toolchain/inference/api/config.py
+++ b/toolchain/inference/api/config.py
@ -75,7 +75,7 @@ class RemoteImplConfig(BaseModel):
    url: str = Field(..., description="The URL of the remote module")
-class ModelInferenceConfig(BaseModel):
+class InferenceConfig(BaseModel):
    impl_config: Annotated[
        Union[InlineImplConfig, RemoteImplConfig],
        Field(discriminator="impl_type"),
@ -130,7 +130,7 @@ class RemoteImplHydraConfig:
@dataclass
-class ModelInferenceHydraConfig:
+class InferenceHydraConfig:
    impl_type: str
    inline_config: Optional[InlineImplHydraConfig] = None
    remote_config: Optional[RemoteImplHydraConfig] = None
@ -142,18 +142,18 @@ class ModelInferenceHydraConfig:
        if self.impl_type == "remote":
            assert self.remote_config is not None
-    def convert_to_model_inferene_config(self):
+    def convert_to_inference_config(self):
        if self.impl_type == "inline":
            inline_config = InlineImplHydraConfig(**self.inline_config)
-            return ModelInferenceConfig(
+            return InferenceConfig(
                impl_config=inline_config.convert_to_inline_impl_config()
            )
        elif self.impl_type == "remote":
            remote_config = RemoteImplHydraConfig(**self.remote_config)
-            return ModelInferenceConfig(
+            return InferenceConfig(
                impl_config=remote_config.convert_to_remote_impl_config()
            )
 cs = ConfigStore.instance()
-cs.store(name="model_inference_config", node=ModelInferenceHydraConfig)
+cs.store(name="inference_config", node=InferenceHydraConfig)
--- a/toolchain/inference/api/endpoints.py
+++ b/toolchain/inference/api/endpoints.py
@ -90,7 +90,7 @@ class BatchChatCompletionResponse(BaseModel):
    completion_message_batch: List[CompletionMessage]
-class ModelInference(Protocol):
+class Inference(Protocol):
    @webmethod(route="/inference/completion")
    async def completion(
--- a/toolchain/inference/api_instance.py
+++ b/toolchain/inference/api_instance.py
@ -1,12 +1,12 @@
-from .api.config import ImplType, ModelInferenceConfig
+from .api.config import ImplType, InferenceConfig
-async def get_inference_api_instance(config: ModelInferenceConfig):
+async def get_inference_api_instance(config: InferenceConfig):
    if config.impl_config.impl_type == ImplType.inline.value:
-        from .inference import ModelInferenceImpl
+        from .inference import InferenceImpl
-        return ModelInferenceImpl(config.impl_config)
+        return InferenceImpl(config.impl_config)
-    from .client import ModelInferenceClient
+    from .client import InferenceClient
-    return ModelInferenceClient(config.impl_config.url)
+    return InferenceClient(config.impl_config.url)
--- a/toolchain/inference/client.py
+++ b/toolchain/inference/client.py
@ -10,12 +10,12 @@ from .api import (
    ChatCompletionResponseStreamChunk,
    CompletionRequest,
    InstructModel,
-    ModelInference,
+    Inference,
    UserMessage,
 )
-class ModelInferenceClient(ModelInference):
+class InferenceClient(Inference):
    def __init__(self, base_url: str):
        self.base_url = base_url
@ -48,7 +48,7 @@ class ModelInferenceClient(ModelInference):
 async def run_main(host: str, port: int):
-    client = ModelInferenceClient(f"http://{host}:{port}")
+    client = InferenceClient(f"http://{host}:{port}")
    message = UserMessage(content="hello world, help me out here")
    req = ChatCompletionRequest(
--- a/toolchain/inference/inference.py
+++ b/toolchain/inference/inference.py
@ -18,12 +18,12 @@ from .api.endpoints import (
    ChatCompletionRequest,
    ChatCompletionResponseStreamChunk,
    CompletionRequest,
-    ModelInference,
+    Inference,
 )
 from .model_parallel import LlamaModelParallelGenerator
-class ModelInferenceImpl(ModelInference):
+class InferenceImpl(Inference):
    def __init__(self, config: InlineImplConfig) -> None:
        self.config = config
--- a/toolchain/inference/server.py
+++ b/toolchain/inference/server.py
@ -11,7 +11,7 @@ from fastapi.responses import StreamingResponse
 from omegaconf import OmegaConf
 from toolchain.utils import get_default_config_dir, parse_config
-from .api.config import ModelInferenceHydraConfig
+from .api.config import InferenceHydraConfig
 from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk
 from .api_instance import get_inference_api_instance
@ -43,13 +43,13 @@ async def startup():
    global InferenceApiInstance
    config = get_config()
-    hydra_config = ModelInferenceHydraConfig(
+    hydra_config = InferenceHydraConfig(
-        **OmegaConf.to_container(config["model_inference_config"], resolve=True)
+        **OmegaConf.to_container(config["inference_config"], resolve=True)
    )
-    model_inference_config = hydra_config.convert_to_model_inferene_config()
+    inference_config = hydra_config.convert_to_inference_config()
    InferenceApiInstance = await get_inference_api_instance(
-        model_inference_config,
+        inference_config,
    )
    await InferenceApiInstance.initialize()
--- a/toolchain/spec/generate.py
+++ b/toolchain/spec/generate.py
@ -16,7 +16,7 @@ from agentic_system.api import *  # noqa: F403
 class LlamaStackEndpoints(
-    ModelInference,
+    Inference,
    AgenticSystem,
    RewardScoring,
    SyntheticDataGeneration,
--- a/toolchain/spec/openapi.html
+++ b/toolchain/spec/openapi.html
--- a/toolchain/spec/openapi.yaml
+++ b/toolchain/spec/openapi.yaml
@ -148,13 +148,13 @@ components:
          type: string
        step_details:
          oneOf:
-          - $ref: '#/components/schemas/ModelInferenceStep'
+          - $ref: '#/components/schemas/InferenceStep'
          - $ref: '#/components/schemas/ToolExecutionStep'
          - $ref: '#/components/schemas/ShieldCallStep'
          - $ref: '#/components/schemas/MemoryRetrievalStep'
        step_type:
          enum:
-          - model_inference
+          - inference
          - tool_execution
          - shield_call
          - memory_retrieval
@ -176,7 +176,7 @@ components:
          type: string
        step_type:
          enum:
-          - model_inference
+          - inference
          - tool_execution
          - shield_call
          - memory_retrieval
@ -210,7 +210,7 @@ components:
          type: string
        step_type:
          enum:
-          - model_inference
+          - inference
          - tool_execution
          - shield_call
          - memory_retrieval
@ -263,171 +263,23 @@ components:
      - url
      - mime_type
      type: object
    BatchChatCompletionRequest:
      additionalProperties: false
      properties:
        available_tools:
          items:
            $ref: '#/components/schemas/ToolDefinition'
          type: array
        logprobs:
          additionalProperties: false
          properties:
            top_k:
              type: integer
          type: object
        messages_batch:
          items:
            items:
              oneOf:
              - $ref: '#/components/schemas/UserMessage'
              - $ref: '#/components/schemas/SystemMessage'
              - $ref: '#/components/schemas/ToolResponseMessage'
              - $ref: '#/components/schemas/CompletionMessage'
            type: array
          type: array
        model:
          $ref: '#/components/schemas/InstructModel'
        quantization_config:
          oneOf:
          - $ref: '#/components/schemas/Bf16QuantizationConfig'
          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
      required:
      - model
      - messages_batch
      type: object
    BatchCompletionRequest:
      additionalProperties: false
      properties:
        content_batch:
          items:
            oneOf:
            - type: string
            - $ref: '#/components/schemas/Attachment'
            - items:
                oneOf:
                - type: string
                - $ref: '#/components/schemas/Attachment'
              type: array
          type: array
        logprobs:
          additionalProperties: false
          properties:
            top_k:
              type: integer
          type: object
        model:
          $ref: '#/components/schemas/PretrainedModel'
        quantization_config:
          oneOf:
          - $ref: '#/components/schemas/Bf16QuantizationConfig'
          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
      required:
      - model
      - content_batch
      type: object
    Bf16QuantizationConfig:
      additionalProperties: false
      properties:
-        quantization_type:
+        type:
          const: bf16
          type: string
      required:
-      - quantization_type
+      - type
      type: object
    BuiltinShield:
      enum:
      - llama_guard
      - prompt_guard
      - code_scanner_guard
      - third_party_shield
      - injection_shield
      - jailbreak_shield
      type: string
    ChatCompletionRequest:
      additionalProperties: false
      properties:
        available_tools:
          items:
            $ref: '#/components/schemas/ToolDefinition'
          type: array
        logprobs:
          additionalProperties: false
          properties:
            top_k:
              type: integer
          type: object
        messages:
          items:
            oneOf:
            - $ref: '#/components/schemas/UserMessage'
            - $ref: '#/components/schemas/SystemMessage'
            - $ref: '#/components/schemas/ToolResponseMessage'
            - $ref: '#/components/schemas/CompletionMessage'
          type: array
        model:
          $ref: '#/components/schemas/InstructModel'
        quantization_config:
          oneOf:
          - $ref: '#/components/schemas/Bf16QuantizationConfig'
          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
        stream:
          type: boolean
      required:
      - model
      - messages
      type: object
    ChatCompletionResponse:
      additionalProperties: false
      properties:
        completion_message:
          $ref: '#/components/schemas/CompletionMessage'
        logprobs:
          items:
            $ref: '#/components/schemas/TokenLogProbs'
          type: array
      required:
      - completion_message
      type: object
    ChatCompletionResponseEvent:
      additionalProperties: false
      properties:
        delta:
          oneOf:
          - type: string
          - $ref: '#/components/schemas/ToolCallDelta'
        event_type:
          $ref: '#/components/schemas/ChatCompletionResponseEventType'
        logprobs:
          items:
            $ref: '#/components/schemas/TokenLogProbs'
          type: array
        stop_reason:
          $ref: '#/components/schemas/StopReason'
      required:
      - event_type
      - delta
      title: Chat completion response event.
      type: object
    ChatCompletionResponseEventType:
      enum:
      - start
      - complete
      - progress
      type: string
    ChatCompletionResponseStreamChunk:
      additionalProperties: false
      properties:
        event:
          $ref: '#/components/schemas/ChatCompletionResponseEvent'
      required:
      - event
      title: SSE-stream of these events.
      type: object
    CompletionMessage:
      additionalProperties: false
      properties:
@ -455,65 +307,6 @@ components:
      - stop_reason
      - tool_calls
      type: object
    CompletionRequest:
      additionalProperties: false
      properties:
        content:
          oneOf:
          - type: string
          - $ref: '#/components/schemas/Attachment'
          - items:
              oneOf:
              - type: string
              - $ref: '#/components/schemas/Attachment'
            type: array
        logprobs:
          additionalProperties: false
          properties:
            top_k:
              type: integer
          type: object
        model:
          $ref: '#/components/schemas/PretrainedModel'
        quantization_config:
          oneOf:
          - $ref: '#/components/schemas/Bf16QuantizationConfig'
          - $ref: '#/components/schemas/Fp8QuantizationConfig'
        sampling_params:
          $ref: '#/components/schemas/SamplingParams'
        stream:
          type: boolean
      required:
      - model
      - content
      type: object
    CompletionResponse:
      additionalProperties: false
      properties:
        completion_message:
          $ref: '#/components/schemas/CompletionMessage'
        logprobs:
          items:
            $ref: '#/components/schemas/TokenLogProbs'
          type: array
      required:
      - completion_message
      type: object
    CompletionResponseStreamChunk:
      additionalProperties: false
      properties:
        delta:
          type: string
        logprobs:
          items:
            $ref: '#/components/schemas/TokenLogProbs'
          type: array
        stop_reason:
          $ref: '#/components/schemas/StopReason'
      required:
      - delta
      title: streamed completion response.
      type: object
    CreateDatasetRequest:
      additionalProperties: false
      properties:
@ -737,11 +530,35 @@ components:
    Fp8QuantizationConfig:
      additionalProperties: false
      properties:
-        quantization_type:
+        type:
          const: fp8
          type: string
      required:
-      - quantization_type
+      - type
      type: object
    InferenceStep:
      additionalProperties: false
      properties:
        completed_at:
          format: date-time
          type: string
        model_response:
          $ref: '#/components/schemas/CompletionMessage'
        started_at:
          format: date-time
          type: string
        step_id:
          type: string
        step_type:
          const: inference
          type: string
        turn_id:
          type: string
      required:
      - turn_id
      - step_id
      - step_type
      - model_response
      type: object
    InstructModel:
      enum:
@ -843,30 +660,6 @@ components:
      - documents
      - scores
      type: object
    ModelInferenceStep:
      additionalProperties: false
      properties:
        completed_at:
          format: date-time
          type: string
        model_response:
          $ref: '#/components/schemas/CompletionMessage'
        started_at:
          format: date-time
          type: string
        step_id:
          type: string
        step_type:
          const: model_inference
          type: string
        turn_id:
          type: string
      required:
      - turn_id
      - step_id
      - step_type
      - model_response
      type: object
    OnViolationAction:
      enum:
      - 0
@ -1408,16 +1201,6 @@ components:
      - role
      - content
      type: object
    TokenLogProbs:
      additionalProperties: false
      properties:
        logprobs_by_token:
          additionalProperties:
            type: number
          type: object
      required:
      - logprobs_by_token
      type: object
    ToolCall:
      additionalProperties: false
      properties:
@ -1477,32 +1260,11 @@ components:
      type: object
    ToolCallParseStatus:
      enum:
-      - start
+      - started
      - in_progress
      - failure
      - success
      type: string
    ToolDefinition:
      additionalProperties: false
      properties:
        description:
          type: string
        parameters:
          additionalProperties:
            $ref: '#/components/schemas/ToolParamDefinition'
          type: object
        tool_name:
          oneOf:
          - enum:
            - brave_search
            - wolfram_alpha
            - photogen
            - code_interpreter
            type: string
          - type: string
      required:
      - tool_name
      type: object
    ToolExecutionStep:
      additionalProperties: false
      properties:
@ -1686,7 +1448,7 @@ components:
        steps:
          items:
            oneOf:
-            - $ref: '#/components/schemas/ModelInferenceStep'
+            - $ref: '#/components/schemas/InferenceStep'
            - $ref: '#/components/schemas/ToolExecutionStep'
            - $ref: '#/components/schemas/ShieldCallStep'
            - $ref: '#/components/schemas/MemoryRetrievalStep'
@ -1729,7 +1491,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-07-19 11:49:56.794897"
+    \ draft and subject to change.\n                Generated at 2024-07-21 12:19:33.327857"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -1766,58 +1528,6 @@ paths:
          description: OK
      tags:
      - AgenticSystem
  /agentic_system/memory_bank/attach:
    post:
      parameters:
      - in: query
        name: agent_id
        required: true
        schema:
          type: string
      - in: query
        name: session_id
        required: true
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              items:
                type: string
              type: array
        required: true
      responses:
        '200':
          description: OK
      tags:
      - AgenticSystem
  /agentic_system/memory_bank/detach:
    post:
      parameters:
      - in: query
        name: agent_id
        required: true
        schema:
          type: string
      - in: query
        name: session_id
        required: true
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              items:
                type: string
              type: array
        required: true
      responses:
        '200':
          description: OK
      tags:
      - AgenticSystem
  /agentic_system/session/create:
    post:
      parameters: []
@ -1969,19 +1679,6 @@ paths:
          description: OK
      tags:
      - Evaluations
  /evaluate/job/cancel:
    get:
      parameters:
      - in: query
        name: job_uuid
        required: true
        schema:
          type: string
      responses:
        '200':
          description: OK
      tags:
      - Evaluations
  /evaluate/job/logs:
    get:
      parameters:
@ -2082,78 +1779,6 @@ paths:
          description: OK
      tags:
      - Evaluations
  /inference/batch_chat_completion:
    post:
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/BatchChatCompletionRequest'
        required: true
      responses:
        '200':
          content:
            application/jsonl:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
          description: OK
      tags:
      - ModelInference
  /inference/batch_completion:
    post:
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/BatchCompletionRequest'
        required: true
      responses:
        '200':
          content:
            application/jsonl:
              schema:
                $ref: '#/components/schemas/CompletionResponse'
          description: OK
      tags:
      - ModelInference
  /inference/chat_completion:
    post:
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
        required: true
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
          description: SSE-stream of these events.
      tags:
      - ModelInference
  /inference/completion:
    post:
      parameters: []
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CompletionRequest'
        required: true
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CompletionResponseStreamChunk'
          description: streamed completion response.
      tags:
      - ModelInference
  /memory_bank/delete:
    post:
      parameters:
@ -2335,19 +1960,6 @@ paths:
          description: OK
      tags:
      - PostTraining
  /post_training/job/cancel:
    get:
      parameters:
      - in: query
        name: job_uuid
        required: true
        schema:
          type: string
      responses:
        '200':
          description: OK
      tags:
      - PostTraining
  /post_training/job/logs:
    get:
      parameters:
@ -2471,22 +2083,29 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
 - name: RewardScoring
 - name: PostTraining
 - name: AgenticSystem
 - name: Datasets
 - name: ModelInference
 - name: SyntheticDataGeneration
 - name: MemoryBanks
 - name: PostTraining
 - name: Evaluations
- name: RewardScoring
+- name: SyntheticDataGeneration
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
    />
  name: AgenticSystemCreateRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
    />
  name: AgenticSystemInstanceConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
    />
  name: AgenticSystemToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
  name: Attachment
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
    />
  name: BatchChatCompletionRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
    />
  name: Bf16QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
  name: BuiltinShield
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
    />
  name: CompletionMessage
@ -2495,19 +2114,28 @@ tags:
  name: Fp8QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" />
  name: InstructModel
 - description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
    />
  name: OnViolationAction
 - description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
    />
  name: RestAPIExecutionConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
  name: RestAPIMethod
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
  name: SamplingParams
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
    />
  name: SamplingStrategy
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
    />
  name: ShieldDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" />
  name: StopReason
 - description: <SchemaDefinition schemaRef="#/components/schemas/SystemMessage" />
  name: SystemMessage
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolCall" />
  name: ToolCall
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolDefinition" />
  name: ToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition"
    />
  name: ToolParamDefinition
@ -2518,74 +2146,6 @@ tags:
  name: URL
 - description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
  name: UserMessage
 - description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse"
    />
  name: ChatCompletionResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/TokenLogProbs" />
  name: TokenLogProbs
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
    />
  name: BatchCompletionRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
    />
  name: PretrainedModel
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse"
    />
  name: CompletionResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
    />
  name: ChatCompletionRequest
 - description: 'Chat completion response event.
    <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEvent"
    />'
  name: ChatCompletionResponseEvent
 - description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEventType"
    />
  name: ChatCompletionResponseEventType
 - description: 'SSE-stream of these events.
    <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
    />'
  name: ChatCompletionResponseStreamChunk
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
  name: ToolCallDelta
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
    />
  name: ToolCallParseStatus
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
    />
  name: CompletionRequest
 - description: 'streamed completion response.
    <SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
    />'
  name: CompletionResponseStreamChunk
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
    />
  name: AgenticSystemCreateRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
    />
  name: AgenticSystemInstanceConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
    />
  name: AgenticSystemToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
  name: BuiltinShield
 - description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
    />
  name: OnViolationAction
 - description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
    />
  name: RestAPIExecutionConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
  name: RestAPIMethod
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
    />
  name: ShieldDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
    />
  name: AgenticSystemCreateResponse
@ -2622,19 +2182,23 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurnResponseTurnStartPayload"
    />
  name: AgenticSystemTurnResponseTurnStartPayload
 - description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
  name: InferenceStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
    />
  name: MemoryBankDocument
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
    />
  name: MemoryRetrievalStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/ModelInferenceStep"
    />
  name: ModelInferenceStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldCallStep" />
  name: ShieldCallStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldResponse" />
  name: ShieldResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
  name: ToolCallDelta
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
    />
  name: ToolCallParseStatus
 - description: <SchemaDefinition schemaRef="#/components/schemas/ToolExecutionStep"
    />
  name: ToolExecutionStep
@ -2785,6 +2349,9 @@ tags:
    <SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
  name: PostTrainingSFTRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
    />
  name: PretrainedModel
 - description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
    />
  name: QLoraFinetuningConfig
@ -2795,7 +2362,6 @@ x-tagGroups:
  - Datasets
  - Evaluations
  - MemoryBanks
  - ModelInference
  - PostTraining
  - RewardScoring
  - SyntheticDataGeneration
@ -2816,19 +2382,9 @@ x-tagGroups:
  - AgenticSystemTurnResponseTurnCompletePayload
  - AgenticSystemTurnResponseTurnStartPayload
  - Attachment
  - BatchChatCompletionRequest
  - BatchCompletionRequest
  - Bf16QuantizationConfig
  - BuiltinShield
  - ChatCompletionRequest
  - ChatCompletionResponse
  - ChatCompletionResponseEvent
  - ChatCompletionResponseEventType
  - ChatCompletionResponseStreamChunk
  - CompletionMessage
  - CompletionRequest
  - CompletionResponse
  - CompletionResponseStreamChunk
  - CreateDatasetRequest
  - DPOAlignmentConfig
  - DialogGenerations
@ -2842,12 +2398,12 @@ x-tagGroups:
  - EvaluationJobStatusResponse
  - FinetuningAlgorithm
  - Fp8QuantizationConfig
  - InferenceStep
  - InstructModel
  - LoraFinetuningConfig
  - MemoryBank
  - MemoryBankDocument
  - MemoryRetrievalStep
  - ModelInferenceStep
  - OnViolationAction
  - OptimizerConfig
  - PostTrainingJob
@ -2877,11 +2433,9 @@ x-tagGroups:
  - SyntheticDataGenerationRequest
  - SyntheticDataGenerationResponse
  - SystemMessage
  - TokenLogProbs
  - ToolCall
  - ToolCallDelta
  - ToolCallParseStatus
  - ToolDefinition
  - ToolExecutionStep
  - ToolParamDefinition
  - ToolResponse
--- a/toolchain/spec/run_openapi_generator.sh
+++ b/toolchain/spec/run_openapi_generator.sh
@ -2,4 +2,4 @@
 set -x
-PYTHONPATH=../../../oss-ops:../.. python3 -m toolchain.spec.generate
+PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate
`@ -2,4 +2,4 @@`

	`set -x`	`set -x`

	`PYTHONPATH=../../../oss-ops:../.. python3 -m toolchain.spec.generate`	`PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate`