rename ModelInference to Inference

This commit is contained in:
rsm 2024-07-21 12:19:52 -07:00
parent 245461620d
commit 67f0510edd
18 changed files with 468 additions and 1636 deletions

View file

@ -30,7 +30,7 @@ create_parent_dir() {
# Function to output the YAML configuration # Function to output the YAML configuration
output_yaml() { output_yaml() {
cat <<EOL > ${yaml_output_path} cat <<EOL > ${yaml_output_path}
model_inference_config: inference_config:
impl_type: "inline" impl_type: "inline"
inline_config: inline_config:
checkpoint_type: "pytorch" checkpoint_type: "pytorch"

View file

@ -47,7 +47,7 @@ class InferenceConfigure(Subcommand):
yaml_output_path yaml_output_path
): ):
yaml_content = textwrap.dedent(f""" yaml_content = textwrap.dedent(f"""
model_inference_config: inference_config:
impl_type: "inline" impl_type: "inline"
inline_config: inline_config:
checkpoint_type: "pytorch" checkpoint_type: "pytorch"

View file

@ -1,4 +1,4 @@
model_inference_config: inference_config:
impl_type: "inline" impl_type: "inline"
inline_config: inline_config:
checkpoint_type: "pytorch" checkpoint_type: "pytorch"
@ -7,5 +7,5 @@ model_inference_config:
model_parallel_size: 8 model_parallel_size: 8
max_seq_len: 2048 max_seq_len: 2048
max_batch_size: 1 max_batch_size: 1
quantization: quantization:
type: "fp8" type: "fp8"

View file

@ -1,4 +1,4 @@
model_inference_config: inference_config:
impl_type: "inline" impl_type: "inline"
inline_config: inline_config:
checkpoint_type: "pytorch" checkpoint_type: "pytorch"

View file

@ -1,4 +1,4 @@
model_inference_config: inference_config:
impl_type: "inline" impl_type: "inline"
inline_config: inline_config:
checkpoint_type: "pytorch" checkpoint_type: "pytorch"

View file

@ -1,4 +1,4 @@
model_inference_config: inference_config:
impl_type: "inline" impl_type: "inline"
inline_config: inline_config:
checkpoint_type: "pytorch" checkpoint_type: "pytorch"

View file

@ -1,4 +1,4 @@
model_inference_config: inference_config:
impl_type: "inline" impl_type: "inline"
inline_config: inline_config:
checkpoint_type: "pytorch" checkpoint_type: "pytorch"

View file

@ -1,4 +1,4 @@
model_inference_config: inference_config:
impl_type: "inline" impl_type: "inline"
inline_config: inline_config:
checkpoint_type: "pytorch" checkpoint_type: "pytorch"

View file

@ -75,7 +75,7 @@ class RemoteImplConfig(BaseModel):
url: str = Field(..., description="The URL of the remote module") url: str = Field(..., description="The URL of the remote module")
class ModelInferenceConfig(BaseModel): class InferenceConfig(BaseModel):
impl_config: Annotated[ impl_config: Annotated[
Union[InlineImplConfig, RemoteImplConfig], Union[InlineImplConfig, RemoteImplConfig],
Field(discriminator="impl_type"), Field(discriminator="impl_type"),
@ -130,7 +130,7 @@ class RemoteImplHydraConfig:
@dataclass @dataclass
class ModelInferenceHydraConfig: class InferenceHydraConfig:
impl_type: str impl_type: str
inline_config: Optional[InlineImplHydraConfig] = None inline_config: Optional[InlineImplHydraConfig] = None
remote_config: Optional[RemoteImplHydraConfig] = None remote_config: Optional[RemoteImplHydraConfig] = None
@ -142,18 +142,18 @@ class ModelInferenceHydraConfig:
if self.impl_type == "remote": if self.impl_type == "remote":
assert self.remote_config is not None assert self.remote_config is not None
def convert_to_model_inferene_config(self): def convert_to_inference_config(self):
if self.impl_type == "inline": if self.impl_type == "inline":
inline_config = InlineImplHydraConfig(**self.inline_config) inline_config = InlineImplHydraConfig(**self.inline_config)
return ModelInferenceConfig( return InferenceConfig(
impl_config=inline_config.convert_to_inline_impl_config() impl_config=inline_config.convert_to_inline_impl_config()
) )
elif self.impl_type == "remote": elif self.impl_type == "remote":
remote_config = RemoteImplHydraConfig(**self.remote_config) remote_config = RemoteImplHydraConfig(**self.remote_config)
return ModelInferenceConfig( return InferenceConfig(
impl_config=remote_config.convert_to_remote_impl_config() impl_config=remote_config.convert_to_remote_impl_config()
) )
cs = ConfigStore.instance() cs = ConfigStore.instance()
cs.store(name="model_inference_config", node=ModelInferenceHydraConfig) cs.store(name="inference_config", node=InferenceHydraConfig)

View file

@ -90,7 +90,7 @@ class BatchChatCompletionResponse(BaseModel):
completion_message_batch: List[CompletionMessage] completion_message_batch: List[CompletionMessage]
class ModelInference(Protocol): class Inference(Protocol):
@webmethod(route="/inference/completion") @webmethod(route="/inference/completion")
async def completion( async def completion(

View file

@ -1,12 +1,12 @@
from .api.config import ImplType, ModelInferenceConfig from .api.config import ImplType, InferenceConfig
async def get_inference_api_instance(config: ModelInferenceConfig): async def get_inference_api_instance(config: InferenceConfig):
if config.impl_config.impl_type == ImplType.inline.value: if config.impl_config.impl_type == ImplType.inline.value:
from .inference import ModelInferenceImpl from .inference import InferenceImpl
return ModelInferenceImpl(config.impl_config) return InferenceImpl(config.impl_config)
from .client import ModelInferenceClient from .client import InferenceClient
return ModelInferenceClient(config.impl_config.url) return InferenceClient(config.impl_config.url)

View file

@ -10,12 +10,12 @@ from .api import (
ChatCompletionResponseStreamChunk, ChatCompletionResponseStreamChunk,
CompletionRequest, CompletionRequest,
InstructModel, InstructModel,
ModelInference, Inference,
UserMessage, UserMessage,
) )
class ModelInferenceClient(ModelInference): class InferenceClient(Inference):
def __init__(self, base_url: str): def __init__(self, base_url: str):
self.base_url = base_url self.base_url = base_url
@ -48,7 +48,7 @@ class ModelInferenceClient(ModelInference):
async def run_main(host: str, port: int): async def run_main(host: str, port: int):
client = ModelInferenceClient(f"http://{host}:{port}") client = InferenceClient(f"http://{host}:{port}")
message = UserMessage(content="hello world, help me out here") message = UserMessage(content="hello world, help me out here")
req = ChatCompletionRequest( req = ChatCompletionRequest(

View file

@ -18,12 +18,12 @@ from .api.endpoints import (
ChatCompletionRequest, ChatCompletionRequest,
ChatCompletionResponseStreamChunk, ChatCompletionResponseStreamChunk,
CompletionRequest, CompletionRequest,
ModelInference, Inference,
) )
from .model_parallel import LlamaModelParallelGenerator from .model_parallel import LlamaModelParallelGenerator
class ModelInferenceImpl(ModelInference): class InferenceImpl(Inference):
def __init__(self, config: InlineImplConfig) -> None: def __init__(self, config: InlineImplConfig) -> None:
self.config = config self.config = config

View file

@ -11,7 +11,7 @@ from fastapi.responses import StreamingResponse
from omegaconf import OmegaConf from omegaconf import OmegaConf
from toolchain.utils import get_default_config_dir, parse_config from toolchain.utils import get_default_config_dir, parse_config
from .api.config import ModelInferenceHydraConfig from .api.config import InferenceHydraConfig
from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk
from .api_instance import get_inference_api_instance from .api_instance import get_inference_api_instance
@ -43,13 +43,13 @@ async def startup():
global InferenceApiInstance global InferenceApiInstance
config = get_config() config = get_config()
hydra_config = ModelInferenceHydraConfig( hydra_config = InferenceHydraConfig(
**OmegaConf.to_container(config["model_inference_config"], resolve=True) **OmegaConf.to_container(config["inference_config"], resolve=True)
) )
model_inference_config = hydra_config.convert_to_model_inferene_config() inference_config = hydra_config.convert_to_inference_config()
InferenceApiInstance = await get_inference_api_instance( InferenceApiInstance = await get_inference_api_instance(
model_inference_config, inference_config,
) )
await InferenceApiInstance.initialize() await InferenceApiInstance.initialize()

View file

@ -16,7 +16,7 @@ from agentic_system.api import * # noqa: F403
class LlamaStackEndpoints( class LlamaStackEndpoints(
ModelInference, Inference,
AgenticSystem, AgenticSystem,
RewardScoring, RewardScoring,
SyntheticDataGeneration, SyntheticDataGeneration,

File diff suppressed because it is too large Load diff

View file

@ -148,13 +148,13 @@ components:
type: string type: string
step_details: step_details:
oneOf: oneOf:
- $ref: '#/components/schemas/ModelInferenceStep' - $ref: '#/components/schemas/InferenceStep'
- $ref: '#/components/schemas/ToolExecutionStep' - $ref: '#/components/schemas/ToolExecutionStep'
- $ref: '#/components/schemas/ShieldCallStep' - $ref: '#/components/schemas/ShieldCallStep'
- $ref: '#/components/schemas/MemoryRetrievalStep' - $ref: '#/components/schemas/MemoryRetrievalStep'
step_type: step_type:
enum: enum:
- model_inference - inference
- tool_execution - tool_execution
- shield_call - shield_call
- memory_retrieval - memory_retrieval
@ -176,7 +176,7 @@ components:
type: string type: string
step_type: step_type:
enum: enum:
- model_inference - inference
- tool_execution - tool_execution
- shield_call - shield_call
- memory_retrieval - memory_retrieval
@ -210,7 +210,7 @@ components:
type: string type: string
step_type: step_type:
enum: enum:
- model_inference - inference
- tool_execution - tool_execution
- shield_call - shield_call
- memory_retrieval - memory_retrieval
@ -263,171 +263,23 @@ components:
- url - url
- mime_type - mime_type
type: object type: object
BatchChatCompletionRequest:
additionalProperties: false
properties:
available_tools:
items:
$ref: '#/components/schemas/ToolDefinition'
type: array
logprobs:
additionalProperties: false
properties:
top_k:
type: integer
type: object
messages_batch:
items:
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/SystemMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
- $ref: '#/components/schemas/CompletionMessage'
type: array
type: array
model:
$ref: '#/components/schemas/InstructModel'
quantization_config:
oneOf:
- $ref: '#/components/schemas/Bf16QuantizationConfig'
- $ref: '#/components/schemas/Fp8QuantizationConfig'
sampling_params:
$ref: '#/components/schemas/SamplingParams'
required:
- model
- messages_batch
type: object
BatchCompletionRequest:
additionalProperties: false
properties:
content_batch:
items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
type: array
logprobs:
additionalProperties: false
properties:
top_k:
type: integer
type: object
model:
$ref: '#/components/schemas/PretrainedModel'
quantization_config:
oneOf:
- $ref: '#/components/schemas/Bf16QuantizationConfig'
- $ref: '#/components/schemas/Fp8QuantizationConfig'
sampling_params:
$ref: '#/components/schemas/SamplingParams'
required:
- model
- content_batch
type: object
Bf16QuantizationConfig: Bf16QuantizationConfig:
additionalProperties: false additionalProperties: false
properties: properties:
quantization_type: type:
const: bf16 const: bf16
type: string type: string
required: required:
- quantization_type - type
type: object type: object
BuiltinShield: BuiltinShield:
enum: enum:
- llama_guard - llama_guard
- prompt_guard
- code_scanner_guard - code_scanner_guard
- third_party_shield - third_party_shield
- injection_shield
- jailbreak_shield
type: string type: string
ChatCompletionRequest:
additionalProperties: false
properties:
available_tools:
items:
$ref: '#/components/schemas/ToolDefinition'
type: array
logprobs:
additionalProperties: false
properties:
top_k:
type: integer
type: object
messages:
items:
oneOf:
- $ref: '#/components/schemas/UserMessage'
- $ref: '#/components/schemas/SystemMessage'
- $ref: '#/components/schemas/ToolResponseMessage'
- $ref: '#/components/schemas/CompletionMessage'
type: array
model:
$ref: '#/components/schemas/InstructModel'
quantization_config:
oneOf:
- $ref: '#/components/schemas/Bf16QuantizationConfig'
- $ref: '#/components/schemas/Fp8QuantizationConfig'
sampling_params:
$ref: '#/components/schemas/SamplingParams'
stream:
type: boolean
required:
- model
- messages
type: object
ChatCompletionResponse:
additionalProperties: false
properties:
completion_message:
$ref: '#/components/schemas/CompletionMessage'
logprobs:
items:
$ref: '#/components/schemas/TokenLogProbs'
type: array
required:
- completion_message
type: object
ChatCompletionResponseEvent:
additionalProperties: false
properties:
delta:
oneOf:
- type: string
- $ref: '#/components/schemas/ToolCallDelta'
event_type:
$ref: '#/components/schemas/ChatCompletionResponseEventType'
logprobs:
items:
$ref: '#/components/schemas/TokenLogProbs'
type: array
stop_reason:
$ref: '#/components/schemas/StopReason'
required:
- event_type
- delta
title: Chat completion response event.
type: object
ChatCompletionResponseEventType:
enum:
- start
- complete
- progress
type: string
ChatCompletionResponseStreamChunk:
additionalProperties: false
properties:
event:
$ref: '#/components/schemas/ChatCompletionResponseEvent'
required:
- event
title: SSE-stream of these events.
type: object
CompletionMessage: CompletionMessage:
additionalProperties: false additionalProperties: false
properties: properties:
@ -455,65 +307,6 @@ components:
- stop_reason - stop_reason
- tool_calls - tool_calls
type: object type: object
CompletionRequest:
additionalProperties: false
properties:
content:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
- items:
oneOf:
- type: string
- $ref: '#/components/schemas/Attachment'
type: array
logprobs:
additionalProperties: false
properties:
top_k:
type: integer
type: object
model:
$ref: '#/components/schemas/PretrainedModel'
quantization_config:
oneOf:
- $ref: '#/components/schemas/Bf16QuantizationConfig'
- $ref: '#/components/schemas/Fp8QuantizationConfig'
sampling_params:
$ref: '#/components/schemas/SamplingParams'
stream:
type: boolean
required:
- model
- content
type: object
CompletionResponse:
additionalProperties: false
properties:
completion_message:
$ref: '#/components/schemas/CompletionMessage'
logprobs:
items:
$ref: '#/components/schemas/TokenLogProbs'
type: array
required:
- completion_message
type: object
CompletionResponseStreamChunk:
additionalProperties: false
properties:
delta:
type: string
logprobs:
items:
$ref: '#/components/schemas/TokenLogProbs'
type: array
stop_reason:
$ref: '#/components/schemas/StopReason'
required:
- delta
title: streamed completion response.
type: object
CreateDatasetRequest: CreateDatasetRequest:
additionalProperties: false additionalProperties: false
properties: properties:
@ -737,11 +530,35 @@ components:
Fp8QuantizationConfig: Fp8QuantizationConfig:
additionalProperties: false additionalProperties: false
properties: properties:
quantization_type: type:
const: fp8 const: fp8
type: string type: string
required: required:
- quantization_type - type
type: object
InferenceStep:
additionalProperties: false
properties:
completed_at:
format: date-time
type: string
model_response:
$ref: '#/components/schemas/CompletionMessage'
started_at:
format: date-time
type: string
step_id:
type: string
step_type:
const: inference
type: string
turn_id:
type: string
required:
- turn_id
- step_id
- step_type
- model_response
type: object type: object
InstructModel: InstructModel:
enum: enum:
@ -843,30 +660,6 @@ components:
- documents - documents
- scores - scores
type: object type: object
ModelInferenceStep:
additionalProperties: false
properties:
completed_at:
format: date-time
type: string
model_response:
$ref: '#/components/schemas/CompletionMessage'
started_at:
format: date-time
type: string
step_id:
type: string
step_type:
const: model_inference
type: string
turn_id:
type: string
required:
- turn_id
- step_id
- step_type
- model_response
type: object
OnViolationAction: OnViolationAction:
enum: enum:
- 0 - 0
@ -1408,16 +1201,6 @@ components:
- role - role
- content - content
type: object type: object
TokenLogProbs:
additionalProperties: false
properties:
logprobs_by_token:
additionalProperties:
type: number
type: object
required:
- logprobs_by_token
type: object
ToolCall: ToolCall:
additionalProperties: false additionalProperties: false
properties: properties:
@ -1477,32 +1260,11 @@ components:
type: object type: object
ToolCallParseStatus: ToolCallParseStatus:
enum: enum:
- start - started
- in_progress - in_progress
- failure - failure
- success - success
type: string type: string
ToolDefinition:
additionalProperties: false
properties:
description:
type: string
parameters:
additionalProperties:
$ref: '#/components/schemas/ToolParamDefinition'
type: object
tool_name:
oneOf:
- enum:
- brave_search
- wolfram_alpha
- photogen
- code_interpreter
type: string
- type: string
required:
- tool_name
type: object
ToolExecutionStep: ToolExecutionStep:
additionalProperties: false additionalProperties: false
properties: properties:
@ -1686,7 +1448,7 @@ components:
steps: steps:
items: items:
oneOf: oneOf:
- $ref: '#/components/schemas/ModelInferenceStep' - $ref: '#/components/schemas/InferenceStep'
- $ref: '#/components/schemas/ToolExecutionStep' - $ref: '#/components/schemas/ToolExecutionStep'
- $ref: '#/components/schemas/ShieldCallStep' - $ref: '#/components/schemas/ShieldCallStep'
- $ref: '#/components/schemas/MemoryRetrievalStep' - $ref: '#/components/schemas/MemoryRetrievalStep'
@ -1729,7 +1491,7 @@ info:
description: "This is the specification of the llama stack that provides\n \ description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\ \ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\ \ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-07-19 11:49:56.794897" \ draft and subject to change.\n Generated at 2024-07-21 12:19:33.327857"
title: '[DRAFT] Llama Stack Specification' title: '[DRAFT] Llama Stack Specification'
version: 0.0.1 version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -1766,58 +1528,6 @@ paths:
description: OK description: OK
tags: tags:
- AgenticSystem - AgenticSystem
/agentic_system/memory_bank/attach:
post:
parameters:
- in: query
name: agent_id
required: true
schema:
type: string
- in: query
name: session_id
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
items:
type: string
type: array
required: true
responses:
'200':
description: OK
tags:
- AgenticSystem
/agentic_system/memory_bank/detach:
post:
parameters:
- in: query
name: agent_id
required: true
schema:
type: string
- in: query
name: session_id
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
items:
type: string
type: array
required: true
responses:
'200':
description: OK
tags:
- AgenticSystem
/agentic_system/session/create: /agentic_system/session/create:
post: post:
parameters: [] parameters: []
@ -1969,19 +1679,6 @@ paths:
description: OK description: OK
tags: tags:
- Evaluations - Evaluations
/evaluate/job/cancel:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
description: OK
tags:
- Evaluations
/evaluate/job/logs: /evaluate/job/logs:
get: get:
parameters: parameters:
@ -2082,78 +1779,6 @@ paths:
description: OK description: OK
tags: tags:
- Evaluations - Evaluations
/inference/batch_chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchChatCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/ChatCompletionResponse'
description: OK
tags:
- ModelInference
/inference/batch_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/BatchCompletionRequest'
required: true
responses:
'200':
content:
application/jsonl:
schema:
$ref: '#/components/schemas/CompletionResponse'
description: OK
tags:
- ModelInference
/inference/chat_completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletionRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
description: SSE-stream of these events.
tags:
- ModelInference
/inference/completion:
post:
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionResponseStreamChunk'
description: streamed completion response.
tags:
- ModelInference
/memory_bank/delete: /memory_bank/delete:
post: post:
parameters: parameters:
@ -2335,19 +1960,6 @@ paths:
description: OK description: OK
tags: tags:
- PostTraining - PostTraining
/post_training/job/cancel:
get:
parameters:
- in: query
name: job_uuid
required: true
schema:
type: string
responses:
'200':
description: OK
tags:
- PostTraining
/post_training/job/logs: /post_training/job/logs:
get: get:
parameters: parameters:
@ -2471,22 +2083,29 @@ security:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
tags: tags:
- name: RewardScoring
- name: PostTraining
- name: AgenticSystem - name: AgenticSystem
- name: Datasets - name: Datasets
- name: ModelInference
- name: SyntheticDataGeneration
- name: MemoryBanks - name: MemoryBanks
- name: PostTraining
- name: Evaluations - name: Evaluations
- name: RewardScoring - name: SyntheticDataGeneration
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
/>
name: AgenticSystemCreateRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
/>
name: AgenticSystemInstanceConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
/>
name: AgenticSystemToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" /> - description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
name: Attachment name: Attachment
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
/>
name: BatchChatCompletionRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig" - description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
/> />
name: Bf16QuantizationConfig name: Bf16QuantizationConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
name: BuiltinShield
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage" - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
/> />
name: CompletionMessage name: CompletionMessage
@ -2495,19 +2114,28 @@ tags:
name: Fp8QuantizationConfig name: Fp8QuantizationConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" /> - description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" />
name: InstructModel name: InstructModel
- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
/>
name: OnViolationAction
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
/>
name: RestAPIExecutionConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
name: RestAPIMethod
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" /> - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
name: SamplingParams name: SamplingParams
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy" - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
/> />
name: SamplingStrategy name: SamplingStrategy
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
/>
name: ShieldDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" /> - description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" />
name: StopReason name: StopReason
- description: <SchemaDefinition schemaRef="#/components/schemas/SystemMessage" /> - description: <SchemaDefinition schemaRef="#/components/schemas/SystemMessage" />
name: SystemMessage name: SystemMessage
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCall" /> - description: <SchemaDefinition schemaRef="#/components/schemas/ToolCall" />
name: ToolCall name: ToolCall
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolDefinition" />
name: ToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition" - description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition"
/> />
name: ToolParamDefinition name: ToolParamDefinition
@ -2518,74 +2146,6 @@ tags:
name: URL name: URL
- description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" /> - description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
name: UserMessage name: UserMessage
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse"
/>
name: ChatCompletionResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/TokenLogProbs" />
name: TokenLogProbs
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
/>
name: BatchCompletionRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
/>
name: PretrainedModel
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse"
/>
name: CompletionResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
/>
name: ChatCompletionRequest
- description: 'Chat completion response event.
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEvent"
/>'
name: ChatCompletionResponseEvent
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEventType"
/>
name: ChatCompletionResponseEventType
- description: 'SSE-stream of these events.
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
/>'
name: ChatCompletionResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
name: ToolCallDelta
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
/>
name: ToolCallParseStatus
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
/>
name: CompletionRequest
- description: 'streamed completion response.
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
/>'
name: CompletionResponseStreamChunk
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
/>
name: AgenticSystemCreateRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
/>
name: AgenticSystemInstanceConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
/>
name: AgenticSystemToolDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
name: BuiltinShield
- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
/>
name: OnViolationAction
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
/>
name: RestAPIExecutionConfig
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
name: RestAPIMethod
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
/>
name: ShieldDefinition
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse" - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
/> />
name: AgenticSystemCreateResponse name: AgenticSystemCreateResponse
@ -2622,19 +2182,23 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurnResponseTurnStartPayload" - description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurnResponseTurnStartPayload"
/> />
name: AgenticSystemTurnResponseTurnStartPayload name: AgenticSystemTurnResponseTurnStartPayload
- description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
name: InferenceStep
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument" - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
/> />
name: MemoryBankDocument name: MemoryBankDocument
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep" - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
/> />
name: MemoryRetrievalStep name: MemoryRetrievalStep
- description: <SchemaDefinition schemaRef="#/components/schemas/ModelInferenceStep"
/>
name: ModelInferenceStep
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldCallStep" /> - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldCallStep" />
name: ShieldCallStep name: ShieldCallStep
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldResponse" /> - description: <SchemaDefinition schemaRef="#/components/schemas/ShieldResponse" />
name: ShieldResponse name: ShieldResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
name: ToolCallDelta
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
/>
name: ToolCallParseStatus
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolExecutionStep" - description: <SchemaDefinition schemaRef="#/components/schemas/ToolExecutionStep"
/> />
name: ToolExecutionStep name: ToolExecutionStep
@ -2785,6 +2349,9 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />' <SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
name: PostTrainingSFTRequest name: PostTrainingSFTRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
/>
name: PretrainedModel
- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig" - description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
/> />
name: QLoraFinetuningConfig name: QLoraFinetuningConfig
@ -2795,7 +2362,6 @@ x-tagGroups:
- Datasets - Datasets
- Evaluations - Evaluations
- MemoryBanks - MemoryBanks
- ModelInference
- PostTraining - PostTraining
- RewardScoring - RewardScoring
- SyntheticDataGeneration - SyntheticDataGeneration
@ -2816,19 +2382,9 @@ x-tagGroups:
- AgenticSystemTurnResponseTurnCompletePayload - AgenticSystemTurnResponseTurnCompletePayload
- AgenticSystemTurnResponseTurnStartPayload - AgenticSystemTurnResponseTurnStartPayload
- Attachment - Attachment
- BatchChatCompletionRequest
- BatchCompletionRequest
- Bf16QuantizationConfig - Bf16QuantizationConfig
- BuiltinShield - BuiltinShield
- ChatCompletionRequest
- ChatCompletionResponse
- ChatCompletionResponseEvent
- ChatCompletionResponseEventType
- ChatCompletionResponseStreamChunk
- CompletionMessage - CompletionMessage
- CompletionRequest
- CompletionResponse
- CompletionResponseStreamChunk
- CreateDatasetRequest - CreateDatasetRequest
- DPOAlignmentConfig - DPOAlignmentConfig
- DialogGenerations - DialogGenerations
@ -2842,12 +2398,12 @@ x-tagGroups:
- EvaluationJobStatusResponse - EvaluationJobStatusResponse
- FinetuningAlgorithm - FinetuningAlgorithm
- Fp8QuantizationConfig - Fp8QuantizationConfig
- InferenceStep
- InstructModel - InstructModel
- LoraFinetuningConfig - LoraFinetuningConfig
- MemoryBank - MemoryBank
- MemoryBankDocument - MemoryBankDocument
- MemoryRetrievalStep - MemoryRetrievalStep
- ModelInferenceStep
- OnViolationAction - OnViolationAction
- OptimizerConfig - OptimizerConfig
- PostTrainingJob - PostTrainingJob
@ -2877,11 +2433,9 @@ x-tagGroups:
- SyntheticDataGenerationRequest - SyntheticDataGenerationRequest
- SyntheticDataGenerationResponse - SyntheticDataGenerationResponse
- SystemMessage - SystemMessage
- TokenLogProbs
- ToolCall - ToolCall
- ToolCallDelta - ToolCallDelta
- ToolCallParseStatus - ToolCallParseStatus
- ToolDefinition
- ToolExecutionStep - ToolExecutionStep
- ToolParamDefinition - ToolParamDefinition
- ToolResponse - ToolResponse

View file

@ -2,4 +2,4 @@
set -x set -x
PYTHONPATH=../../../oss-ops:../.. python3 -m toolchain.spec.generate PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate