From 67f0510edd909a2394b429be1ff8d639ea178a49 Mon Sep 17 00:00:00 2001 From: rsm Date: Sun, 21 Jul 2024 12:19:52 -0700 Subject: [PATCH] rename ModelInference to Inference --- create_config.sh | 2 +- toolchain/cli/inference/configure.py | 2 +- toolchain/configs/ashwin.yaml | 4 +- toolchain/configs/chrisluc.yaml | 2 +- toolchain/configs/cyni.yaml | 2 +- toolchain/configs/default.yaml | 2 +- toolchain/configs/hjshah.yaml | 2 +- toolchain/configs/long_seqlen.yaml | 2 +- toolchain/inference/api/config.py | 12 +- toolchain/inference/api/endpoints.py | 2 +- toolchain/inference/api_instance.py | 12 +- toolchain/inference/client.py | 6 +- toolchain/inference/inference.py | 4 +- toolchain/inference/server.py | 10 +- toolchain/spec/generate.py | 2 +- toolchain/spec/openapi.html | 1444 ++++++----------------- toolchain/spec/openapi.yaml | 592 ++-------- toolchain/spec/run_openapi_generator.sh | 2 +- 18 files changed, 468 insertions(+), 1636 deletions(-) diff --git a/create_config.sh b/create_config.sh index bf3a6b28e..2b500217e 100644 --- a/create_config.sh +++ b/create_config.sh @@ -30,7 +30,7 @@ create_parent_dir() { # Function to output the YAML configuration output_yaml() { cat < ${yaml_output_path} -model_inference_config: +inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" diff --git a/toolchain/cli/inference/configure.py b/toolchain/cli/inference/configure.py index df97ebf04..0c0ae61fe 100644 --- a/toolchain/cli/inference/configure.py +++ b/toolchain/cli/inference/configure.py @@ -47,7 +47,7 @@ class InferenceConfigure(Subcommand): yaml_output_path ): yaml_content = textwrap.dedent(f""" - model_inference_config: + inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" diff --git a/toolchain/configs/ashwin.yaml b/toolchain/configs/ashwin.yaml index 8eec6d923..21ab6b880 100644 --- a/toolchain/configs/ashwin.yaml +++ b/toolchain/configs/ashwin.yaml @@ -1,4 +1,4 @@ -model_inference_config: +inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" @@ -7,5 +7,5 @@ model_inference_config: model_parallel_size: 8 max_seq_len: 2048 max_batch_size: 1 - quantization: + quantization: type: "fp8" diff --git a/toolchain/configs/chrisluc.yaml b/toolchain/configs/chrisluc.yaml index be51a534c..c44f9524e 100644 --- a/toolchain/configs/chrisluc.yaml +++ b/toolchain/configs/chrisluc.yaml @@ -1,4 +1,4 @@ -model_inference_config: +inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" diff --git a/toolchain/configs/cyni.yaml b/toolchain/configs/cyni.yaml index d0c47b397..e8edbf036 100644 --- a/toolchain/configs/cyni.yaml +++ b/toolchain/configs/cyni.yaml @@ -1,4 +1,4 @@ -model_inference_config: +inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" diff --git a/toolchain/configs/default.yaml b/toolchain/configs/default.yaml index 642a55f22..d13f37226 100644 --- a/toolchain/configs/default.yaml +++ b/toolchain/configs/default.yaml @@ -1,4 +1,4 @@ -model_inference_config: +inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" diff --git a/toolchain/configs/hjshah.yaml b/toolchain/configs/hjshah.yaml index 98e2660ea..089ab1b5a 100644 --- a/toolchain/configs/hjshah.yaml +++ b/toolchain/configs/hjshah.yaml @@ -1,4 +1,4 @@ -model_inference_config: +inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" diff --git a/toolchain/configs/long_seqlen.yaml b/toolchain/configs/long_seqlen.yaml index e137d0273..9eaeab1bd 100644 --- a/toolchain/configs/long_seqlen.yaml +++ b/toolchain/configs/long_seqlen.yaml @@ -1,4 +1,4 @@ -model_inference_config: +inference_config: impl_type: "inline" inline_config: checkpoint_type: "pytorch" diff --git a/toolchain/inference/api/config.py b/toolchain/inference/api/config.py index 4a6c5145f..5994e805b 100644 --- a/toolchain/inference/api/config.py +++ b/toolchain/inference/api/config.py @@ -75,7 +75,7 @@ class RemoteImplConfig(BaseModel): url: str = Field(..., description="The URL of the remote module") -class ModelInferenceConfig(BaseModel): +class InferenceConfig(BaseModel): impl_config: Annotated[ Union[InlineImplConfig, RemoteImplConfig], Field(discriminator="impl_type"), @@ -130,7 +130,7 @@ class RemoteImplHydraConfig: @dataclass -class ModelInferenceHydraConfig: +class InferenceHydraConfig: impl_type: str inline_config: Optional[InlineImplHydraConfig] = None remote_config: Optional[RemoteImplHydraConfig] = None @@ -142,18 +142,18 @@ class ModelInferenceHydraConfig: if self.impl_type == "remote": assert self.remote_config is not None - def convert_to_model_inferene_config(self): + def convert_to_inference_config(self): if self.impl_type == "inline": inline_config = InlineImplHydraConfig(**self.inline_config) - return ModelInferenceConfig( + return InferenceConfig( impl_config=inline_config.convert_to_inline_impl_config() ) elif self.impl_type == "remote": remote_config = RemoteImplHydraConfig(**self.remote_config) - return ModelInferenceConfig( + return InferenceConfig( impl_config=remote_config.convert_to_remote_impl_config() ) cs = ConfigStore.instance() -cs.store(name="model_inference_config", node=ModelInferenceHydraConfig) +cs.store(name="inference_config", node=InferenceHydraConfig) diff --git a/toolchain/inference/api/endpoints.py b/toolchain/inference/api/endpoints.py index 5b262a99c..20efbd111 100644 --- a/toolchain/inference/api/endpoints.py +++ b/toolchain/inference/api/endpoints.py @@ -90,7 +90,7 @@ class BatchChatCompletionResponse(BaseModel): completion_message_batch: List[CompletionMessage] -class ModelInference(Protocol): +class Inference(Protocol): @webmethod(route="/inference/completion") async def completion( diff --git a/toolchain/inference/api_instance.py b/toolchain/inference/api_instance.py index 6110fd257..d39d642be 100644 --- a/toolchain/inference/api_instance.py +++ b/toolchain/inference/api_instance.py @@ -1,12 +1,12 @@ -from .api.config import ImplType, ModelInferenceConfig +from .api.config import ImplType, InferenceConfig -async def get_inference_api_instance(config: ModelInferenceConfig): +async def get_inference_api_instance(config: InferenceConfig): if config.impl_config.impl_type == ImplType.inline.value: - from .inference import ModelInferenceImpl + from .inference import InferenceImpl - return ModelInferenceImpl(config.impl_config) + return InferenceImpl(config.impl_config) - from .client import ModelInferenceClient + from .client import InferenceClient - return ModelInferenceClient(config.impl_config.url) + return InferenceClient(config.impl_config.url) diff --git a/toolchain/inference/client.py b/toolchain/inference/client.py index a4d2b641f..317637efa 100644 --- a/toolchain/inference/client.py +++ b/toolchain/inference/client.py @@ -10,12 +10,12 @@ from .api import ( ChatCompletionResponseStreamChunk, CompletionRequest, InstructModel, - ModelInference, + Inference, UserMessage, ) -class ModelInferenceClient(ModelInference): +class InferenceClient(Inference): def __init__(self, base_url: str): self.base_url = base_url @@ -48,7 +48,7 @@ class ModelInferenceClient(ModelInference): async def run_main(host: str, port: int): - client = ModelInferenceClient(f"http://{host}:{port}") + client = InferenceClient(f"http://{host}:{port}") message = UserMessage(content="hello world, help me out here") req = ChatCompletionRequest( diff --git a/toolchain/inference/inference.py b/toolchain/inference/inference.py index 5ec1c897d..94228ac7b 100644 --- a/toolchain/inference/inference.py +++ b/toolchain/inference/inference.py @@ -18,12 +18,12 @@ from .api.endpoints import ( ChatCompletionRequest, ChatCompletionResponseStreamChunk, CompletionRequest, - ModelInference, + Inference, ) from .model_parallel import LlamaModelParallelGenerator -class ModelInferenceImpl(ModelInference): +class InferenceImpl(Inference): def __init__(self, config: InlineImplConfig) -> None: self.config = config diff --git a/toolchain/inference/server.py b/toolchain/inference/server.py index a2846f136..01a905b2a 100644 --- a/toolchain/inference/server.py +++ b/toolchain/inference/server.py @@ -11,7 +11,7 @@ from fastapi.responses import StreamingResponse from omegaconf import OmegaConf from toolchain.utils import get_default_config_dir, parse_config -from .api.config import ModelInferenceHydraConfig +from .api.config import InferenceHydraConfig from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk from .api_instance import get_inference_api_instance @@ -43,13 +43,13 @@ async def startup(): global InferenceApiInstance config = get_config() - hydra_config = ModelInferenceHydraConfig( - **OmegaConf.to_container(config["model_inference_config"], resolve=True) + hydra_config = InferenceHydraConfig( + **OmegaConf.to_container(config["inference_config"], resolve=True) ) - model_inference_config = hydra_config.convert_to_model_inferene_config() + inference_config = hydra_config.convert_to_inference_config() InferenceApiInstance = await get_inference_api_instance( - model_inference_config, + inference_config, ) await InferenceApiInstance.initialize() diff --git a/toolchain/spec/generate.py b/toolchain/spec/generate.py index 5b4bd9e04..974885b2b 100644 --- a/toolchain/spec/generate.py +++ b/toolchain/spec/generate.py @@ -16,7 +16,7 @@ from agentic_system.api import * # noqa: F403 class LlamaStackEndpoints( - ModelInference, + Inference, AgenticSystem, RewardScoring, SyntheticDataGeneration, diff --git a/toolchain/spec/openapi.html b/toolchain/spec/openapi.html index b09bf6c48..81d720fcb 100644 --- a/toolchain/spec/openapi.html +++ b/toolchain/spec/openapi.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-07-19 11:49:56.794897" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-07-21 12:19:33.327857" }, "servers": [ { @@ -29,213 +29,6 @@ } ], "paths": { - "/agentic_system/memory_bank/attach": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "AgenticSystem" - ], - "parameters": [ - { - "name": "agent_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "session_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "type": "array", - "items": { - "type": "string" - } - } - } - }, - "required": true - } - } - }, - "/inference/batch_chat_completion": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/ChatCompletionResponse" - } - } - } - } - }, - "tags": [ - "ModelInference" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/BatchChatCompletionRequest" - } - } - }, - "required": true - } - } - }, - "/inference/batch_completion": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/CompletionResponse" - } - } - } - } - }, - "tags": [ - "ModelInference" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/BatchCompletionRequest" - } - } - }, - "required": true - } - } - }, - "/evaluate/job/cancel": { - "get": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [ - { - "name": "job_uuid", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, - "/post_training/job/cancel": { - "get": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "PostTraining" - ], - "parameters": [ - { - "name": "job_uuid", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, - "/inference/chat_completion": { - "post": { - "responses": { - "200": { - "description": "SSE-stream of these events.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" - } - } - } - } - }, - "tags": [ - "ModelInference" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ChatCompletionRequest" - } - } - }, - "required": true - } - } - }, - "/inference/completion": { - "post": { - "responses": { - "200": { - "description": "streamed completion response.", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CompletionResponseStreamChunk" - } - } - } - } - }, - "tags": [ - "ModelInference" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CompletionRequest" - } - } - }, - "required": true - } - } - }, "/agentic_system/create": { "post": { "responses": { @@ -464,49 +257,6 @@ } } }, - "/agentic_system/memory_bank/detach": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "AgenticSystem" - ], - "parameters": [ - { - "name": "agent_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "session_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "type": "array", - "items": { - "type": "string" - } - } - } - }, - "required": true - } - } - }, "/agentic_system/session/get": { "post": { "responses": { @@ -1262,6 +1012,133 @@ "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { + "AgenticSystemCreateRequest": { + "type": "object", + "properties": { + "model": { + "$ref": "#/components/schemas/InstructModel" + }, + "instance_config": { + "$ref": "#/components/schemas/AgenticSystemInstanceConfig" + } + }, + "additionalProperties": false, + "required": [ + "model", + "instance_config" + ] + }, + "AgenticSystemInstanceConfig": { + "type": "object", + "properties": { + "instructions": { + "type": "string" + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "available_tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/AgenticSystemToolDefinition" + } + }, + "input_shields": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ShieldDefinition" + } + }, + "output_shields": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ShieldDefinition" + } + }, + "quantization_config": { + "oneOf": [ + { + "$ref": "#/components/schemas/Bf16QuantizationConfig" + }, + { + "$ref": "#/components/schemas/Fp8QuantizationConfig" + } + ] + }, + "debug_prefix_messages": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/ToolResponseMessage" + }, + { + "$ref": "#/components/schemas/CompletionMessage" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "instructions" + ] + }, + "AgenticSystemToolDefinition": { + "type": "object", + "properties": { + "tool_name": { + "oneOf": [ + { + "type": "string", + "enum": [ + "brave_search", + "wolfram_alpha", + "photogen", + "code_interpreter" + ] + }, + { + "type": "string" + } + ] + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ToolParamDefinition" + } + }, + "execution_config": { + "$ref": "#/components/schemas/RestAPIExecutionConfig" + }, + "input_shields": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ShieldDefinition" + } + }, + "output_shields": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ShieldDefinition" + } + } + }, + "additionalProperties": false, + "required": [ + "tool_name" + ] + }, "Attachment": { "type": "object", "properties": { @@ -1278,80 +1155,27 @@ "mime_type" ] }, - "BatchChatCompletionRequest": { - "type": "object", - "properties": { - "model": { - "$ref": "#/components/schemas/InstructModel" - }, - "messages_batch": { - "type": "array", - "items": { - "type": "array", - "items": { - "oneOf": [ - { - "$ref": "#/components/schemas/UserMessage" - }, - { - "$ref": "#/components/schemas/SystemMessage" - }, - { - "$ref": "#/components/schemas/ToolResponseMessage" - }, - { - "$ref": "#/components/schemas/CompletionMessage" - } - ] - } - } - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "available_tools": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolDefinition" - } - }, - "logprobs": { - "type": "object", - "properties": { - "top_k": { - "type": "integer" - } - }, - "additionalProperties": false - }, - "quantization_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/Bf16QuantizationConfig" - }, - { - "$ref": "#/components/schemas/Fp8QuantizationConfig" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "model", - "messages_batch" - ] - }, "Bf16QuantizationConfig": { "type": "object", "properties": { - "quantization_type": { + "type": { "type": "string", "const": "bf16" } }, "additionalProperties": false, "required": [ - "quantization_type" + "type" + ] + }, + "BuiltinShield": { + "type": "string", + "enum": [ + "llama_guard", + "code_scanner_guard", + "third_party_shield", + "injection_shield", + "jailbreak_shield" ] }, "CompletionMessage": { @@ -1405,14 +1229,14 @@ "Fp8QuantizationConfig": { "type": "object", "properties": { - "quantization_type": { + "type": { "type": "string", "const": "fp8" } }, "additionalProperties": false, "required": [ - "quantization_type" + "type" ] }, "InstructModel": { @@ -1422,6 +1246,57 @@ "llama3_70b_chat" ] }, + "OnViolationAction": { + "type": "integer", + "enum": [ + 0, + 1, + 2 + ] + }, + "RestAPIExecutionConfig": { + "type": "object", + "properties": { + "url": { + "$ref": "#/components/schemas/URL" + }, + "method": { + "$ref": "#/components/schemas/RestAPIMethod" + }, + "params": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "headers": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "body": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "additionalProperties": false, + "required": [ + "url", + "method" + ] + }, + "RestAPIMethod": { + "type": "string", + "enum": [ + "GET", + "POST", + "PUT", + "DELETE" + ] + }, "SamplingParams": { "type": "object", "properties": { @@ -1457,6 +1332,41 @@ "top_k" ] }, + "ShieldDefinition": { + "type": "object", + "properties": { + "shield_type": { + "oneOf": [ + { + "$ref": "#/components/schemas/BuiltinShield" + }, + { + "type": "string" + } + ] + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ToolParamDefinition" + } + }, + "on_violation_action": { + "$ref": "#/components/schemas/OnViolationAction" + }, + "execution_config": { + "$ref": "#/components/schemas/RestAPIExecutionConfig" + } + }, + "additionalProperties": false, + "required": [ + "shield_type", + "on_violation_action" + ] + }, "StopReason": { "type": "string", "enum": [ @@ -1598,40 +1508,6 @@ "arguments" ] }, - "ToolDefinition": { - "type": "object", - "properties": { - "tool_name": { - "oneOf": [ - { - "type": "string", - "enum": [ - "brave_search", - "wolfram_alpha", - "photogen", - "code_interpreter" - ] - }, - { - "type": "string" - } - ] - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ToolParamDefinition" - } - } - }, - "additionalProperties": false, - "required": [ - "tool_name" - ] - }, "ToolParamDefinition": { "type": "object", "properties": { @@ -1750,579 +1626,6 @@ "content" ] }, - "ChatCompletionResponse": { - "type": "object", - "properties": { - "completion_message": { - "$ref": "#/components/schemas/CompletionMessage" - }, - "logprobs": { - "type": "array", - "items": { - "$ref": "#/components/schemas/TokenLogProbs" - } - } - }, - "additionalProperties": false, - "required": [ - "completion_message" - ] - }, - "TokenLogProbs": { - "type": "object", - "properties": { - "logprobs_by_token": { - "type": "object", - "additionalProperties": { - "type": "number" - } - } - }, - "additionalProperties": false, - "required": [ - "logprobs_by_token" - ] - }, - "BatchCompletionRequest": { - "type": "object", - "properties": { - "model": { - "$ref": "#/components/schemas/PretrainedModel" - }, - "content_batch": { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/Attachment" - }, - { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/Attachment" - } - ] - } - } - ] - } - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "logprobs": { - "type": "object", - "properties": { - "top_k": { - "type": "integer" - } - }, - "additionalProperties": false - }, - "quantization_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/Bf16QuantizationConfig" - }, - { - "$ref": "#/components/schemas/Fp8QuantizationConfig" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "model", - "content_batch" - ] - }, - "PretrainedModel": { - "type": "string", - "enum": [ - "llama3_8b", - "llama3_70b" - ] - }, - "CompletionResponse": { - "type": "object", - "properties": { - "completion_message": { - "$ref": "#/components/schemas/CompletionMessage" - }, - "logprobs": { - "type": "array", - "items": { - "$ref": "#/components/schemas/TokenLogProbs" - } - } - }, - "additionalProperties": false, - "required": [ - "completion_message" - ] - }, - "ChatCompletionRequest": { - "type": "object", - "properties": { - "model": { - "$ref": "#/components/schemas/InstructModel" - }, - "messages": { - "type": "array", - "items": { - "oneOf": [ - { - "$ref": "#/components/schemas/UserMessage" - }, - { - "$ref": "#/components/schemas/SystemMessage" - }, - { - "$ref": "#/components/schemas/ToolResponseMessage" - }, - { - "$ref": "#/components/schemas/CompletionMessage" - } - ] - } - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "available_tools": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolDefinition" - } - }, - "stream": { - "type": "boolean" - }, - "logprobs": { - "type": "object", - "properties": { - "top_k": { - "type": "integer" - } - }, - "additionalProperties": false - }, - "quantization_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/Bf16QuantizationConfig" - }, - { - "$ref": "#/components/schemas/Fp8QuantizationConfig" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "model", - "messages" - ] - }, - "ChatCompletionResponseEvent": { - "type": "object", - "properties": { - "event_type": { - "$ref": "#/components/schemas/ChatCompletionResponseEventType" - }, - "delta": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/ToolCallDelta" - } - ] - }, - "logprobs": { - "type": "array", - "items": { - "$ref": "#/components/schemas/TokenLogProbs" - } - }, - "stop_reason": { - "$ref": "#/components/schemas/StopReason" - } - }, - "additionalProperties": false, - "required": [ - "event_type", - "delta" - ], - "title": "Chat completion response event." - }, - "ChatCompletionResponseEventType": { - "type": "string", - "enum": [ - "start", - "complete", - "progress" - ] - }, - "ChatCompletionResponseStreamChunk": { - "type": "object", - "properties": { - "event": { - "$ref": "#/components/schemas/ChatCompletionResponseEvent" - } - }, - "additionalProperties": false, - "required": [ - "event" - ], - "title": "SSE-stream of these events." - }, - "ToolCallDelta": { - "type": "object", - "properties": { - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/ToolCall" - } - ] - }, - "parse_status": { - "$ref": "#/components/schemas/ToolCallParseStatus" - } - }, - "additionalProperties": false, - "required": [ - "content", - "parse_status" - ] - }, - "ToolCallParseStatus": { - "type": "string", - "enum": [ - "start", - "in_progress", - "failure", - "success" - ] - }, - "CompletionRequest": { - "type": "object", - "properties": { - "model": { - "$ref": "#/components/schemas/PretrainedModel" - }, - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/Attachment" - }, - { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/Attachment" - } - ] - } - } - ] - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "stream": { - "type": "boolean" - }, - "logprobs": { - "type": "object", - "properties": { - "top_k": { - "type": "integer" - } - }, - "additionalProperties": false - }, - "quantization_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/Bf16QuantizationConfig" - }, - { - "$ref": "#/components/schemas/Fp8QuantizationConfig" - } - ] - } - }, - "additionalProperties": false, - "required": [ - "model", - "content" - ] - }, - "CompletionResponseStreamChunk": { - "type": "object", - "properties": { - "delta": { - "type": "string" - }, - "stop_reason": { - "$ref": "#/components/schemas/StopReason" - }, - "logprobs": { - "type": "array", - "items": { - "$ref": "#/components/schemas/TokenLogProbs" - } - } - }, - "additionalProperties": false, - "required": [ - "delta" - ], - "title": "streamed completion response." - }, - "AgenticSystemCreateRequest": { - "type": "object", - "properties": { - "model": { - "$ref": "#/components/schemas/InstructModel" - }, - "instance_config": { - "$ref": "#/components/schemas/AgenticSystemInstanceConfig" - } - }, - "additionalProperties": false, - "required": [ - "model", - "instance_config" - ] - }, - "AgenticSystemInstanceConfig": { - "type": "object", - "properties": { - "instructions": { - "type": "string" - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "available_tools": { - "type": "array", - "items": { - "$ref": "#/components/schemas/AgenticSystemToolDefinition" - } - }, - "input_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldDefinition" - } - }, - "output_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldDefinition" - } - }, - "quantization_config": { - "oneOf": [ - { - "$ref": "#/components/schemas/Bf16QuantizationConfig" - }, - { - "$ref": "#/components/schemas/Fp8QuantizationConfig" - } - ] - }, - "debug_prefix_messages": { - "type": "array", - "items": { - "oneOf": [ - { - "$ref": "#/components/schemas/UserMessage" - }, - { - "$ref": "#/components/schemas/SystemMessage" - }, - { - "$ref": "#/components/schemas/ToolResponseMessage" - }, - { - "$ref": "#/components/schemas/CompletionMessage" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "instructions" - ] - }, - "AgenticSystemToolDefinition": { - "type": "object", - "properties": { - "tool_name": { - "oneOf": [ - { - "type": "string", - "enum": [ - "brave_search", - "wolfram_alpha", - "photogen", - "code_interpreter" - ] - }, - { - "type": "string" - } - ] - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ToolParamDefinition" - } - }, - "execution_config": { - "$ref": "#/components/schemas/RestAPIExecutionConfig" - }, - "input_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldDefinition" - } - }, - "output_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldDefinition" - } - } - }, - "additionalProperties": false, - "required": [ - "tool_name" - ] - }, - "BuiltinShield": { - "type": "string", - "enum": [ - "llama_guard", - "prompt_guard", - "code_scanner_guard", - "third_party_shield" - ] - }, - "OnViolationAction": { - "type": "integer", - "enum": [ - 0, - 1, - 2 - ] - }, - "RestAPIExecutionConfig": { - "type": "object", - "properties": { - "url": { - "$ref": "#/components/schemas/URL" - }, - "method": { - "$ref": "#/components/schemas/RestAPIMethod" - }, - "params": { - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "headers": { - "type": "object", - "additionalProperties": { - "type": "string" - } - }, - "body": { - "type": "object", - "additionalProperties": { - "type": "string" - } - } - }, - "additionalProperties": false, - "required": [ - "url", - "method" - ] - }, - "RestAPIMethod": { - "type": "string", - "enum": [ - "GET", - "POST", - "PUT", - "DELETE" - ] - }, - "ShieldDefinition": { - "type": "object", - "properties": { - "shield_type": { - "oneOf": [ - { - "$ref": "#/components/schemas/BuiltinShield" - }, - { - "type": "string" - } - ] - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "object", - "additionalProperties": { - "$ref": "#/components/schemas/ToolParamDefinition" - } - }, - "on_violation_action": { - "$ref": "#/components/schemas/OnViolationAction" - }, - "execution_config": { - "$ref": "#/components/schemas/RestAPIExecutionConfig" - } - }, - "additionalProperties": false, - "required": [ - "shield_type", - "on_violation_action" - ] - }, "AgenticSystemCreateResponse": { "type": "object", "properties": { @@ -2438,7 +1741,7 @@ "step_type": { "type": "string", "enum": [ - "model_inference", + "inference", "tool_execution", "shield_call", "memory_retrieval" @@ -2447,7 +1750,7 @@ "step_details": { "oneOf": [ { - "$ref": "#/components/schemas/ModelInferenceStep" + "$ref": "#/components/schemas/InferenceStep" }, { "$ref": "#/components/schemas/ToolExecutionStep" @@ -2478,7 +1781,7 @@ "step_type": { "type": "string", "enum": [ - "model_inference", + "inference", "tool_execution", "shield_call", "memory_retrieval" @@ -2514,7 +1817,7 @@ "step_type": { "type": "string", "enum": [ - "model_inference", + "inference", "tool_execution", "shield_call", "memory_retrieval" @@ -2602,6 +1905,39 @@ "turn_id" ] }, + "InferenceStep": { + "type": "object", + "properties": { + "turn_id": { + "type": "string" + }, + "step_id": { + "type": "string" + }, + "started_at": { + "type": "string", + "format": "date-time" + }, + "completed_at": { + "type": "string", + "format": "date-time" + }, + "step_type": { + "type": "string", + "const": "inference" + }, + "model_response": { + "$ref": "#/components/schemas/CompletionMessage" + } + }, + "additionalProperties": false, + "required": [ + "turn_id", + "step_id", + "step_type", + "model_response" + ] + }, "MemoryBankDocument": { "type": "object", "properties": { @@ -2699,39 +2035,6 @@ "scores" ] }, - "ModelInferenceStep": { - "type": "object", - "properties": { - "turn_id": { - "type": "string" - }, - "step_id": { - "type": "string" - }, - "started_at": { - "type": "string", - "format": "date-time" - }, - "completed_at": { - "type": "string", - "format": "date-time" - }, - "step_type": { - "type": "string", - "const": "model_inference" - }, - "model_response": { - "$ref": "#/components/schemas/CompletionMessage" - } - }, - "additionalProperties": false, - "required": [ - "turn_id", - "step_id", - "step_type", - "model_response" - ] - }, "ShieldCallStep": { "type": "object", "properties": { @@ -2794,6 +2097,38 @@ "is_violation" ] }, + "ToolCallDelta": { + "type": "object", + "properties": { + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/ToolCall" + } + ] + }, + "parse_status": { + "$ref": "#/components/schemas/ToolCallParseStatus" + } + }, + "additionalProperties": false, + "required": [ + "content", + "parse_status" + ] + }, + "ToolCallParseStatus": { + "type": "string", + "enum": [ + "started", + "in_progress", + "failure", + "success" + ] + }, "ToolExecutionStep": { "type": "object", "properties": { @@ -2917,7 +2252,7 @@ "items": { "oneOf": [ { - "$ref": "#/components/schemas/ModelInferenceStep" + "$ref": "#/components/schemas/InferenceStep" }, { "$ref": "#/components/schemas/ToolExecutionStep" @@ -4019,6 +3354,13 @@ ], "title": "Request to finetune a model." }, + "PretrainedModel": { + "type": "string", + "enum": [ + "llama3_8b", + "llama3_70b" + ] + }, "QLoraFinetuningConfig": { "type": "object", "properties": { @@ -4059,42 +3401,51 @@ } ], "tags": [ + { + "name": "RewardScoring" + }, + { + "name": "PostTraining" + }, { "name": "AgenticSystem" }, { "name": "Datasets" }, - { - "name": "ModelInference" - }, - { - "name": "SyntheticDataGeneration" - }, { "name": "MemoryBanks" }, - { - "name": "PostTraining" - }, { "name": "Evaluations" }, { - "name": "RewardScoring" + "name": "SyntheticDataGeneration" + }, + { + "name": "AgenticSystemCreateRequest", + "description": "" + }, + { + "name": "AgenticSystemInstanceConfig", + "description": "" + }, + { + "name": "AgenticSystemToolDefinition", + "description": "" }, { "name": "Attachment", "description": "" }, - { - "name": "BatchChatCompletionRequest", - "description": "" - }, { "name": "Bf16QuantizationConfig", "description": "" }, + { + "name": "BuiltinShield", + "description": "" + }, { "name": "CompletionMessage", "description": "" @@ -4107,6 +3458,18 @@ "name": "InstructModel", "description": "" }, + { + "name": "OnViolationAction", + "description": "" + }, + { + "name": "RestAPIExecutionConfig", + "description": "" + }, + { + "name": "RestAPIMethod", + "description": "" + }, { "name": "SamplingParams", "description": "" @@ -4115,6 +3478,10 @@ "name": "SamplingStrategy", "description": "" }, + { + "name": "ShieldDefinition", + "description": "" + }, { "name": "StopReason", "description": "" @@ -4127,10 +3494,6 @@ "name": "ToolCall", "description": "" }, - { - "name": "ToolDefinition", - "description": "" - }, { "name": "ToolParamDefinition", "description": "" @@ -4147,90 +3510,6 @@ "name": "UserMessage", "description": "" }, - { - "name": "ChatCompletionResponse", - "description": "" - }, - { - "name": "TokenLogProbs", - "description": "" - }, - { - "name": "BatchCompletionRequest", - "description": "" - }, - { - "name": "PretrainedModel", - "description": "" - }, - { - "name": "CompletionResponse", - "description": "" - }, - { - "name": "ChatCompletionRequest", - "description": "" - }, - { - "name": "ChatCompletionResponseEvent", - "description": "Chat completion response event.\n\n" - }, - { - "name": "ChatCompletionResponseEventType", - "description": "" - }, - { - "name": "ChatCompletionResponseStreamChunk", - "description": "SSE-stream of these events.\n\n" - }, - { - "name": "ToolCallDelta", - "description": "" - }, - { - "name": "ToolCallParseStatus", - "description": "" - }, - { - "name": "CompletionRequest", - "description": "" - }, - { - "name": "CompletionResponseStreamChunk", - "description": "streamed completion response.\n\n" - }, - { - "name": "AgenticSystemCreateRequest", - "description": "" - }, - { - "name": "AgenticSystemInstanceConfig", - "description": "" - }, - { - "name": "AgenticSystemToolDefinition", - "description": "" - }, - { - "name": "BuiltinShield", - "description": "" - }, - { - "name": "OnViolationAction", - "description": "" - }, - { - "name": "RestAPIExecutionConfig", - "description": "" - }, - { - "name": "RestAPIMethod", - "description": "" - }, - { - "name": "ShieldDefinition", - "description": "" - }, { "name": "AgenticSystemCreateResponse", "description": "" @@ -4275,6 +3554,10 @@ "name": "AgenticSystemTurnResponseTurnStartPayload", "description": "" }, + { + "name": "InferenceStep", + "description": "" + }, { "name": "MemoryBankDocument", "description": "" @@ -4283,10 +3566,6 @@ "name": "MemoryRetrievalStep", "description": "" }, - { - "name": "ModelInferenceStep", - "description": "" - }, { "name": "ShieldCallStep", "description": "" @@ -4295,6 +3574,14 @@ "name": "ShieldResponse", "description": "" }, + { + "name": "ToolCallDelta", + "description": "" + }, + { + "name": "ToolCallParseStatus", + "description": "" + }, { "name": "ToolExecutionStep", "description": "" @@ -4443,6 +3730,10 @@ "name": "PostTrainingSFTRequest", "description": "Request to finetune a model.\n\n" }, + { + "name": "PretrainedModel", + "description": "" + }, { "name": "QLoraFinetuningConfig", "description": "" @@ -4456,7 +3747,6 @@ "Datasets", "Evaluations", "MemoryBanks", - "ModelInference", "PostTraining", "RewardScoring", "SyntheticDataGeneration" @@ -4480,19 +3770,9 @@ "AgenticSystemTurnResponseTurnCompletePayload", "AgenticSystemTurnResponseTurnStartPayload", "Attachment", - "BatchChatCompletionRequest", - "BatchCompletionRequest", "Bf16QuantizationConfig", "BuiltinShield", - "ChatCompletionRequest", - "ChatCompletionResponse", - "ChatCompletionResponseEvent", - "ChatCompletionResponseEventType", - "ChatCompletionResponseStreamChunk", "CompletionMessage", - "CompletionRequest", - "CompletionResponse", - "CompletionResponseStreamChunk", "CreateDatasetRequest", "DPOAlignmentConfig", "DialogGenerations", @@ -4506,12 +3786,12 @@ "EvaluationJobStatusResponse", "FinetuningAlgorithm", "Fp8QuantizationConfig", + "InferenceStep", "InstructModel", "LoraFinetuningConfig", "MemoryBank", "MemoryBankDocument", "MemoryRetrievalStep", - "ModelInferenceStep", "OnViolationAction", "OptimizerConfig", "PostTrainingJob", @@ -4541,11 +3821,9 @@ "SyntheticDataGenerationRequest", "SyntheticDataGenerationResponse", "SystemMessage", - "TokenLogProbs", "ToolCall", "ToolCallDelta", "ToolCallParseStatus", - "ToolDefinition", "ToolExecutionStep", "ToolParamDefinition", "ToolResponse", diff --git a/toolchain/spec/openapi.yaml b/toolchain/spec/openapi.yaml index 06f735cc5..6c73f6175 100644 --- a/toolchain/spec/openapi.yaml +++ b/toolchain/spec/openapi.yaml @@ -148,13 +148,13 @@ components: type: string step_details: oneOf: - - $ref: '#/components/schemas/ModelInferenceStep' + - $ref: '#/components/schemas/InferenceStep' - $ref: '#/components/schemas/ToolExecutionStep' - $ref: '#/components/schemas/ShieldCallStep' - $ref: '#/components/schemas/MemoryRetrievalStep' step_type: enum: - - model_inference + - inference - tool_execution - shield_call - memory_retrieval @@ -176,7 +176,7 @@ components: type: string step_type: enum: - - model_inference + - inference - tool_execution - shield_call - memory_retrieval @@ -210,7 +210,7 @@ components: type: string step_type: enum: - - model_inference + - inference - tool_execution - shield_call - memory_retrieval @@ -263,171 +263,23 @@ components: - url - mime_type type: object - BatchChatCompletionRequest: - additionalProperties: false - properties: - available_tools: - items: - $ref: '#/components/schemas/ToolDefinition' - type: array - logprobs: - additionalProperties: false - properties: - top_k: - type: integer - type: object - messages_batch: - items: - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/SystemMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - - $ref: '#/components/schemas/CompletionMessage' - type: array - type: array - model: - $ref: '#/components/schemas/InstructModel' - quantization_config: - oneOf: - - $ref: '#/components/schemas/Bf16QuantizationConfig' - - $ref: '#/components/schemas/Fp8QuantizationConfig' - sampling_params: - $ref: '#/components/schemas/SamplingParams' - required: - - model - - messages_batch - type: object - BatchCompletionRequest: - additionalProperties: false - properties: - content_batch: - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - type: array - logprobs: - additionalProperties: false - properties: - top_k: - type: integer - type: object - model: - $ref: '#/components/schemas/PretrainedModel' - quantization_config: - oneOf: - - $ref: '#/components/schemas/Bf16QuantizationConfig' - - $ref: '#/components/schemas/Fp8QuantizationConfig' - sampling_params: - $ref: '#/components/schemas/SamplingParams' - required: - - model - - content_batch - type: object Bf16QuantizationConfig: additionalProperties: false properties: - quantization_type: + type: const: bf16 type: string required: - - quantization_type + - type type: object BuiltinShield: enum: - llama_guard - - prompt_guard - code_scanner_guard - third_party_shield + - injection_shield + - jailbreak_shield type: string - ChatCompletionRequest: - additionalProperties: false - properties: - available_tools: - items: - $ref: '#/components/schemas/ToolDefinition' - type: array - logprobs: - additionalProperties: false - properties: - top_k: - type: integer - type: object - messages: - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/SystemMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - - $ref: '#/components/schemas/CompletionMessage' - type: array - model: - $ref: '#/components/schemas/InstructModel' - quantization_config: - oneOf: - - $ref: '#/components/schemas/Bf16QuantizationConfig' - - $ref: '#/components/schemas/Fp8QuantizationConfig' - sampling_params: - $ref: '#/components/schemas/SamplingParams' - stream: - type: boolean - required: - - model - - messages - type: object - ChatCompletionResponse: - additionalProperties: false - properties: - completion_message: - $ref: '#/components/schemas/CompletionMessage' - logprobs: - items: - $ref: '#/components/schemas/TokenLogProbs' - type: array - required: - - completion_message - type: object - ChatCompletionResponseEvent: - additionalProperties: false - properties: - delta: - oneOf: - - type: string - - $ref: '#/components/schemas/ToolCallDelta' - event_type: - $ref: '#/components/schemas/ChatCompletionResponseEventType' - logprobs: - items: - $ref: '#/components/schemas/TokenLogProbs' - type: array - stop_reason: - $ref: '#/components/schemas/StopReason' - required: - - event_type - - delta - title: Chat completion response event. - type: object - ChatCompletionResponseEventType: - enum: - - start - - complete - - progress - type: string - ChatCompletionResponseStreamChunk: - additionalProperties: false - properties: - event: - $ref: '#/components/schemas/ChatCompletionResponseEvent' - required: - - event - title: SSE-stream of these events. - type: object CompletionMessage: additionalProperties: false properties: @@ -455,65 +307,6 @@ components: - stop_reason - tool_calls type: object - CompletionRequest: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - logprobs: - additionalProperties: false - properties: - top_k: - type: integer - type: object - model: - $ref: '#/components/schemas/PretrainedModel' - quantization_config: - oneOf: - - $ref: '#/components/schemas/Bf16QuantizationConfig' - - $ref: '#/components/schemas/Fp8QuantizationConfig' - sampling_params: - $ref: '#/components/schemas/SamplingParams' - stream: - type: boolean - required: - - model - - content - type: object - CompletionResponse: - additionalProperties: false - properties: - completion_message: - $ref: '#/components/schemas/CompletionMessage' - logprobs: - items: - $ref: '#/components/schemas/TokenLogProbs' - type: array - required: - - completion_message - type: object - CompletionResponseStreamChunk: - additionalProperties: false - properties: - delta: - type: string - logprobs: - items: - $ref: '#/components/schemas/TokenLogProbs' - type: array - stop_reason: - $ref: '#/components/schemas/StopReason' - required: - - delta - title: streamed completion response. - type: object CreateDatasetRequest: additionalProperties: false properties: @@ -737,11 +530,35 @@ components: Fp8QuantizationConfig: additionalProperties: false properties: - quantization_type: + type: const: fp8 type: string required: - - quantization_type + - type + type: object + InferenceStep: + additionalProperties: false + properties: + completed_at: + format: date-time + type: string + model_response: + $ref: '#/components/schemas/CompletionMessage' + started_at: + format: date-time + type: string + step_id: + type: string + step_type: + const: inference + type: string + turn_id: + type: string + required: + - turn_id + - step_id + - step_type + - model_response type: object InstructModel: enum: @@ -843,30 +660,6 @@ components: - documents - scores type: object - ModelInferenceStep: - additionalProperties: false - properties: - completed_at: - format: date-time - type: string - model_response: - $ref: '#/components/schemas/CompletionMessage' - started_at: - format: date-time - type: string - step_id: - type: string - step_type: - const: model_inference - type: string - turn_id: - type: string - required: - - turn_id - - step_id - - step_type - - model_response - type: object OnViolationAction: enum: - 0 @@ -1408,16 +1201,6 @@ components: - role - content type: object - TokenLogProbs: - additionalProperties: false - properties: - logprobs_by_token: - additionalProperties: - type: number - type: object - required: - - logprobs_by_token - type: object ToolCall: additionalProperties: false properties: @@ -1477,32 +1260,11 @@ components: type: object ToolCallParseStatus: enum: - - start + - started - in_progress - failure - success type: string - ToolDefinition: - additionalProperties: false - properties: - description: - type: string - parameters: - additionalProperties: - $ref: '#/components/schemas/ToolParamDefinition' - type: object - tool_name: - oneOf: - - enum: - - brave_search - - wolfram_alpha - - photogen - - code_interpreter - type: string - - type: string - required: - - tool_name - type: object ToolExecutionStep: additionalProperties: false properties: @@ -1686,7 +1448,7 @@ components: steps: items: oneOf: - - $ref: '#/components/schemas/ModelInferenceStep' + - $ref: '#/components/schemas/InferenceStep' - $ref: '#/components/schemas/ToolExecutionStep' - $ref: '#/components/schemas/ShieldCallStep' - $ref: '#/components/schemas/MemoryRetrievalStep' @@ -1729,7 +1491,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-07-19 11:49:56.794897" + \ draft and subject to change.\n Generated at 2024-07-21 12:19:33.327857" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -1766,58 +1528,6 @@ paths: description: OK tags: - AgenticSystem - /agentic_system/memory_bank/attach: - post: - parameters: - - in: query - name: agent_id - required: true - schema: - type: string - - in: query - name: session_id - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - items: - type: string - type: array - required: true - responses: - '200': - description: OK - tags: - - AgenticSystem - /agentic_system/memory_bank/detach: - post: - parameters: - - in: query - name: agent_id - required: true - schema: - type: string - - in: query - name: session_id - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - items: - type: string - type: array - required: true - responses: - '200': - description: OK - tags: - - AgenticSystem /agentic_system/session/create: post: parameters: [] @@ -1969,19 +1679,6 @@ paths: description: OK tags: - Evaluations - /evaluate/job/cancel: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - responses: - '200': - description: OK - tags: - - Evaluations /evaluate/job/logs: get: parameters: @@ -2082,78 +1779,6 @@ paths: description: OK tags: - Evaluations - /inference/batch_chat_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/BatchChatCompletionRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/ChatCompletionResponse' - description: OK - tags: - - ModelInference - /inference/batch_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/BatchCompletionRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/CompletionResponse' - description: OK - tags: - - ModelInference - /inference/chat_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ChatCompletionRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' - description: SSE-stream of these events. - tags: - - ModelInference - /inference/completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CompletionRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/CompletionResponseStreamChunk' - description: streamed completion response. - tags: - - ModelInference /memory_bank/delete: post: parameters: @@ -2335,19 +1960,6 @@ paths: description: OK tags: - PostTraining - /post_training/job/cancel: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - responses: - '200': - description: OK - tags: - - PostTraining /post_training/job/logs: get: parameters: @@ -2471,22 +2083,29 @@ security: servers: - url: http://any-hosted-llama-stack.com tags: +- name: RewardScoring +- name: PostTraining - name: AgenticSystem - name: Datasets -- name: ModelInference -- name: SyntheticDataGeneration - name: MemoryBanks -- name: PostTraining - name: Evaluations -- name: RewardScoring +- name: SyntheticDataGeneration +- description: + name: AgenticSystemCreateRequest +- description: + name: AgenticSystemInstanceConfig +- description: + name: AgenticSystemToolDefinition - description: name: Attachment -- description: - name: BatchChatCompletionRequest - description: name: Bf16QuantizationConfig +- description: + name: BuiltinShield - description: name: CompletionMessage @@ -2495,19 +2114,28 @@ tags: name: Fp8QuantizationConfig - description: name: InstructModel +- description: + name: OnViolationAction +- description: + name: RestAPIExecutionConfig +- description: + name: RestAPIMethod - description: name: SamplingParams - description: name: SamplingStrategy +- description: + name: ShieldDefinition - description: name: StopReason - description: name: SystemMessage - description: name: ToolCall -- description: - name: ToolDefinition - description: name: ToolParamDefinition @@ -2518,74 +2146,6 @@ tags: name: URL - description: name: UserMessage -- description: - name: ChatCompletionResponse -- description: - name: TokenLogProbs -- description: - name: BatchCompletionRequest -- description: - name: PretrainedModel -- description: - name: CompletionResponse -- description: - name: ChatCompletionRequest -- description: 'Chat completion response event. - - - ' - name: ChatCompletionResponseEvent -- description: - name: ChatCompletionResponseEventType -- description: 'SSE-stream of these events. - - - ' - name: ChatCompletionResponseStreamChunk -- description: - name: ToolCallDelta -- description: - name: ToolCallParseStatus -- description: - name: CompletionRequest -- description: 'streamed completion response. - - - ' - name: CompletionResponseStreamChunk -- description: - name: AgenticSystemCreateRequest -- description: - name: AgenticSystemInstanceConfig -- description: - name: AgenticSystemToolDefinition -- description: - name: BuiltinShield -- description: - name: OnViolationAction -- description: - name: RestAPIExecutionConfig -- description: - name: RestAPIMethod -- description: - name: ShieldDefinition - description: name: AgenticSystemCreateResponse @@ -2622,19 +2182,23 @@ tags: - description: name: AgenticSystemTurnResponseTurnStartPayload +- description: + name: InferenceStep - description: name: MemoryBankDocument - description: name: MemoryRetrievalStep -- description: - name: ModelInferenceStep - description: name: ShieldCallStep - description: name: ShieldResponse +- description: + name: ToolCallDelta +- description: + name: ToolCallParseStatus - description: name: ToolExecutionStep @@ -2785,6 +2349,9 @@ tags: ' name: PostTrainingSFTRequest +- description: + name: PretrainedModel - description: name: QLoraFinetuningConfig @@ -2795,7 +2362,6 @@ x-tagGroups: - Datasets - Evaluations - MemoryBanks - - ModelInference - PostTraining - RewardScoring - SyntheticDataGeneration @@ -2816,19 +2382,9 @@ x-tagGroups: - AgenticSystemTurnResponseTurnCompletePayload - AgenticSystemTurnResponseTurnStartPayload - Attachment - - BatchChatCompletionRequest - - BatchCompletionRequest - Bf16QuantizationConfig - BuiltinShield - - ChatCompletionRequest - - ChatCompletionResponse - - ChatCompletionResponseEvent - - ChatCompletionResponseEventType - - ChatCompletionResponseStreamChunk - CompletionMessage - - CompletionRequest - - CompletionResponse - - CompletionResponseStreamChunk - CreateDatasetRequest - DPOAlignmentConfig - DialogGenerations @@ -2842,12 +2398,12 @@ x-tagGroups: - EvaluationJobStatusResponse - FinetuningAlgorithm - Fp8QuantizationConfig + - InferenceStep - InstructModel - LoraFinetuningConfig - MemoryBank - MemoryBankDocument - MemoryRetrievalStep - - ModelInferenceStep - OnViolationAction - OptimizerConfig - PostTrainingJob @@ -2877,11 +2433,9 @@ x-tagGroups: - SyntheticDataGenerationRequest - SyntheticDataGenerationResponse - SystemMessage - - TokenLogProbs - ToolCall - ToolCallDelta - ToolCallParseStatus - - ToolDefinition - ToolExecutionStep - ToolParamDefinition - ToolResponse diff --git a/toolchain/spec/run_openapi_generator.sh b/toolchain/spec/run_openapi_generator.sh index 5e74f4a8e..bb0171fa3 100644 --- a/toolchain/spec/run_openapi_generator.sh +++ b/toolchain/spec/run_openapi_generator.sh @@ -2,4 +2,4 @@ set -x -PYTHONPATH=../../../oss-ops:../.. python3 -m toolchain.spec.generate +PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate