mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
rename ModelInference to Inference
This commit is contained in:
parent
245461620d
commit
67f0510edd
18 changed files with 468 additions and 1636 deletions
|
@ -30,7 +30,7 @@ create_parent_dir() {
|
||||||
# Function to output the YAML configuration
|
# Function to output the YAML configuration
|
||||||
output_yaml() {
|
output_yaml() {
|
||||||
cat <<EOL > ${yaml_output_path}
|
cat <<EOL > ${yaml_output_path}
|
||||||
model_inference_config:
|
inference_config:
|
||||||
impl_type: "inline"
|
impl_type: "inline"
|
||||||
inline_config:
|
inline_config:
|
||||||
checkpoint_type: "pytorch"
|
checkpoint_type: "pytorch"
|
||||||
|
|
|
@ -47,7 +47,7 @@ class InferenceConfigure(Subcommand):
|
||||||
yaml_output_path
|
yaml_output_path
|
||||||
):
|
):
|
||||||
yaml_content = textwrap.dedent(f"""
|
yaml_content = textwrap.dedent(f"""
|
||||||
model_inference_config:
|
inference_config:
|
||||||
impl_type: "inline"
|
impl_type: "inline"
|
||||||
inline_config:
|
inline_config:
|
||||||
checkpoint_type: "pytorch"
|
checkpoint_type: "pytorch"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
model_inference_config:
|
inference_config:
|
||||||
impl_type: "inline"
|
impl_type: "inline"
|
||||||
inline_config:
|
inline_config:
|
||||||
checkpoint_type: "pytorch"
|
checkpoint_type: "pytorch"
|
||||||
|
@ -7,5 +7,5 @@ model_inference_config:
|
||||||
model_parallel_size: 8
|
model_parallel_size: 8
|
||||||
max_seq_len: 2048
|
max_seq_len: 2048
|
||||||
max_batch_size: 1
|
max_batch_size: 1
|
||||||
quantization:
|
quantization:
|
||||||
type: "fp8"
|
type: "fp8"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
model_inference_config:
|
inference_config:
|
||||||
impl_type: "inline"
|
impl_type: "inline"
|
||||||
inline_config:
|
inline_config:
|
||||||
checkpoint_type: "pytorch"
|
checkpoint_type: "pytorch"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
model_inference_config:
|
inference_config:
|
||||||
impl_type: "inline"
|
impl_type: "inline"
|
||||||
inline_config:
|
inline_config:
|
||||||
checkpoint_type: "pytorch"
|
checkpoint_type: "pytorch"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
model_inference_config:
|
inference_config:
|
||||||
impl_type: "inline"
|
impl_type: "inline"
|
||||||
inline_config:
|
inline_config:
|
||||||
checkpoint_type: "pytorch"
|
checkpoint_type: "pytorch"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
model_inference_config:
|
inference_config:
|
||||||
impl_type: "inline"
|
impl_type: "inline"
|
||||||
inline_config:
|
inline_config:
|
||||||
checkpoint_type: "pytorch"
|
checkpoint_type: "pytorch"
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
model_inference_config:
|
inference_config:
|
||||||
impl_type: "inline"
|
impl_type: "inline"
|
||||||
inline_config:
|
inline_config:
|
||||||
checkpoint_type: "pytorch"
|
checkpoint_type: "pytorch"
|
||||||
|
|
|
@ -75,7 +75,7 @@ class RemoteImplConfig(BaseModel):
|
||||||
url: str = Field(..., description="The URL of the remote module")
|
url: str = Field(..., description="The URL of the remote module")
|
||||||
|
|
||||||
|
|
||||||
class ModelInferenceConfig(BaseModel):
|
class InferenceConfig(BaseModel):
|
||||||
impl_config: Annotated[
|
impl_config: Annotated[
|
||||||
Union[InlineImplConfig, RemoteImplConfig],
|
Union[InlineImplConfig, RemoteImplConfig],
|
||||||
Field(discriminator="impl_type"),
|
Field(discriminator="impl_type"),
|
||||||
|
@ -130,7 +130,7 @@ class RemoteImplHydraConfig:
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ModelInferenceHydraConfig:
|
class InferenceHydraConfig:
|
||||||
impl_type: str
|
impl_type: str
|
||||||
inline_config: Optional[InlineImplHydraConfig] = None
|
inline_config: Optional[InlineImplHydraConfig] = None
|
||||||
remote_config: Optional[RemoteImplHydraConfig] = None
|
remote_config: Optional[RemoteImplHydraConfig] = None
|
||||||
|
@ -142,18 +142,18 @@ class ModelInferenceHydraConfig:
|
||||||
if self.impl_type == "remote":
|
if self.impl_type == "remote":
|
||||||
assert self.remote_config is not None
|
assert self.remote_config is not None
|
||||||
|
|
||||||
def convert_to_model_inferene_config(self):
|
def convert_to_inference_config(self):
|
||||||
if self.impl_type == "inline":
|
if self.impl_type == "inline":
|
||||||
inline_config = InlineImplHydraConfig(**self.inline_config)
|
inline_config = InlineImplHydraConfig(**self.inline_config)
|
||||||
return ModelInferenceConfig(
|
return InferenceConfig(
|
||||||
impl_config=inline_config.convert_to_inline_impl_config()
|
impl_config=inline_config.convert_to_inline_impl_config()
|
||||||
)
|
)
|
||||||
elif self.impl_type == "remote":
|
elif self.impl_type == "remote":
|
||||||
remote_config = RemoteImplHydraConfig(**self.remote_config)
|
remote_config = RemoteImplHydraConfig(**self.remote_config)
|
||||||
return ModelInferenceConfig(
|
return InferenceConfig(
|
||||||
impl_config=remote_config.convert_to_remote_impl_config()
|
impl_config=remote_config.convert_to_remote_impl_config()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
cs = ConfigStore.instance()
|
cs = ConfigStore.instance()
|
||||||
cs.store(name="model_inference_config", node=ModelInferenceHydraConfig)
|
cs.store(name="inference_config", node=InferenceHydraConfig)
|
||||||
|
|
|
@ -90,7 +90,7 @@ class BatchChatCompletionResponse(BaseModel):
|
||||||
completion_message_batch: List[CompletionMessage]
|
completion_message_batch: List[CompletionMessage]
|
||||||
|
|
||||||
|
|
||||||
class ModelInference(Protocol):
|
class Inference(Protocol):
|
||||||
|
|
||||||
@webmethod(route="/inference/completion")
|
@webmethod(route="/inference/completion")
|
||||||
async def completion(
|
async def completion(
|
||||||
|
|
|
@ -1,12 +1,12 @@
|
||||||
from .api.config import ImplType, ModelInferenceConfig
|
from .api.config import ImplType, InferenceConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_inference_api_instance(config: ModelInferenceConfig):
|
async def get_inference_api_instance(config: InferenceConfig):
|
||||||
if config.impl_config.impl_type == ImplType.inline.value:
|
if config.impl_config.impl_type == ImplType.inline.value:
|
||||||
from .inference import ModelInferenceImpl
|
from .inference import InferenceImpl
|
||||||
|
|
||||||
return ModelInferenceImpl(config.impl_config)
|
return InferenceImpl(config.impl_config)
|
||||||
|
|
||||||
from .client import ModelInferenceClient
|
from .client import InferenceClient
|
||||||
|
|
||||||
return ModelInferenceClient(config.impl_config.url)
|
return InferenceClient(config.impl_config.url)
|
||||||
|
|
|
@ -10,12 +10,12 @@ from .api import (
|
||||||
ChatCompletionResponseStreamChunk,
|
ChatCompletionResponseStreamChunk,
|
||||||
CompletionRequest,
|
CompletionRequest,
|
||||||
InstructModel,
|
InstructModel,
|
||||||
ModelInference,
|
Inference,
|
||||||
UserMessage,
|
UserMessage,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ModelInferenceClient(ModelInference):
|
class InferenceClient(Inference):
|
||||||
def __init__(self, base_url: str):
|
def __init__(self, base_url: str):
|
||||||
self.base_url = base_url
|
self.base_url = base_url
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ class ModelInferenceClient(ModelInference):
|
||||||
|
|
||||||
|
|
||||||
async def run_main(host: str, port: int):
|
async def run_main(host: str, port: int):
|
||||||
client = ModelInferenceClient(f"http://{host}:{port}")
|
client = InferenceClient(f"http://{host}:{port}")
|
||||||
|
|
||||||
message = UserMessage(content="hello world, help me out here")
|
message = UserMessage(content="hello world, help me out here")
|
||||||
req = ChatCompletionRequest(
|
req = ChatCompletionRequest(
|
||||||
|
|
|
@ -18,12 +18,12 @@ from .api.endpoints import (
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
ChatCompletionResponseStreamChunk,
|
ChatCompletionResponseStreamChunk,
|
||||||
CompletionRequest,
|
CompletionRequest,
|
||||||
ModelInference,
|
Inference,
|
||||||
)
|
)
|
||||||
from .model_parallel import LlamaModelParallelGenerator
|
from .model_parallel import LlamaModelParallelGenerator
|
||||||
|
|
||||||
|
|
||||||
class ModelInferenceImpl(ModelInference):
|
class InferenceImpl(Inference):
|
||||||
|
|
||||||
def __init__(self, config: InlineImplConfig) -> None:
|
def __init__(self, config: InlineImplConfig) -> None:
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
|
@ -11,7 +11,7 @@ from fastapi.responses import StreamingResponse
|
||||||
from omegaconf import OmegaConf
|
from omegaconf import OmegaConf
|
||||||
|
|
||||||
from toolchain.utils import get_default_config_dir, parse_config
|
from toolchain.utils import get_default_config_dir, parse_config
|
||||||
from .api.config import ModelInferenceHydraConfig
|
from .api.config import InferenceHydraConfig
|
||||||
from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk
|
from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk
|
||||||
|
|
||||||
from .api_instance import get_inference_api_instance
|
from .api_instance import get_inference_api_instance
|
||||||
|
@ -43,13 +43,13 @@ async def startup():
|
||||||
global InferenceApiInstance
|
global InferenceApiInstance
|
||||||
|
|
||||||
config = get_config()
|
config = get_config()
|
||||||
hydra_config = ModelInferenceHydraConfig(
|
hydra_config = InferenceHydraConfig(
|
||||||
**OmegaConf.to_container(config["model_inference_config"], resolve=True)
|
**OmegaConf.to_container(config["inference_config"], resolve=True)
|
||||||
)
|
)
|
||||||
model_inference_config = hydra_config.convert_to_model_inferene_config()
|
inference_config = hydra_config.convert_to_inference_config()
|
||||||
|
|
||||||
InferenceApiInstance = await get_inference_api_instance(
|
InferenceApiInstance = await get_inference_api_instance(
|
||||||
model_inference_config,
|
inference_config,
|
||||||
)
|
)
|
||||||
await InferenceApiInstance.initialize()
|
await InferenceApiInstance.initialize()
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ from agentic_system.api import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
class LlamaStackEndpoints(
|
class LlamaStackEndpoints(
|
||||||
ModelInference,
|
Inference,
|
||||||
AgenticSystem,
|
AgenticSystem,
|
||||||
RewardScoring,
|
RewardScoring,
|
||||||
SyntheticDataGeneration,
|
SyntheticDataGeneration,
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -148,13 +148,13 @@ components:
|
||||||
type: string
|
type: string
|
||||||
step_details:
|
step_details:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/ModelInferenceStep'
|
- $ref: '#/components/schemas/InferenceStep'
|
||||||
- $ref: '#/components/schemas/ToolExecutionStep'
|
- $ref: '#/components/schemas/ToolExecutionStep'
|
||||||
- $ref: '#/components/schemas/ShieldCallStep'
|
- $ref: '#/components/schemas/ShieldCallStep'
|
||||||
- $ref: '#/components/schemas/MemoryRetrievalStep'
|
- $ref: '#/components/schemas/MemoryRetrievalStep'
|
||||||
step_type:
|
step_type:
|
||||||
enum:
|
enum:
|
||||||
- model_inference
|
- inference
|
||||||
- tool_execution
|
- tool_execution
|
||||||
- shield_call
|
- shield_call
|
||||||
- memory_retrieval
|
- memory_retrieval
|
||||||
|
@ -176,7 +176,7 @@ components:
|
||||||
type: string
|
type: string
|
||||||
step_type:
|
step_type:
|
||||||
enum:
|
enum:
|
||||||
- model_inference
|
- inference
|
||||||
- tool_execution
|
- tool_execution
|
||||||
- shield_call
|
- shield_call
|
||||||
- memory_retrieval
|
- memory_retrieval
|
||||||
|
@ -210,7 +210,7 @@ components:
|
||||||
type: string
|
type: string
|
||||||
step_type:
|
step_type:
|
||||||
enum:
|
enum:
|
||||||
- model_inference
|
- inference
|
||||||
- tool_execution
|
- tool_execution
|
||||||
- shield_call
|
- shield_call
|
||||||
- memory_retrieval
|
- memory_retrieval
|
||||||
|
@ -263,171 +263,23 @@ components:
|
||||||
- url
|
- url
|
||||||
- mime_type
|
- mime_type
|
||||||
type: object
|
type: object
|
||||||
BatchChatCompletionRequest:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
available_tools:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/ToolDefinition'
|
|
||||||
type: array
|
|
||||||
logprobs:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
top_k:
|
|
||||||
type: integer
|
|
||||||
type: object
|
|
||||||
messages_batch:
|
|
||||||
items:
|
|
||||||
items:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/UserMessage'
|
|
||||||
- $ref: '#/components/schemas/SystemMessage'
|
|
||||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
|
||||||
- $ref: '#/components/schemas/CompletionMessage'
|
|
||||||
type: array
|
|
||||||
type: array
|
|
||||||
model:
|
|
||||||
$ref: '#/components/schemas/InstructModel'
|
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
|
||||||
required:
|
|
||||||
- model
|
|
||||||
- messages_batch
|
|
||||||
type: object
|
|
||||||
BatchCompletionRequest:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
content_batch:
|
|
||||||
items:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- $ref: '#/components/schemas/Attachment'
|
|
||||||
- items:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- $ref: '#/components/schemas/Attachment'
|
|
||||||
type: array
|
|
||||||
type: array
|
|
||||||
logprobs:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
top_k:
|
|
||||||
type: integer
|
|
||||||
type: object
|
|
||||||
model:
|
|
||||||
$ref: '#/components/schemas/PretrainedModel'
|
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
|
||||||
required:
|
|
||||||
- model
|
|
||||||
- content_batch
|
|
||||||
type: object
|
|
||||||
Bf16QuantizationConfig:
|
Bf16QuantizationConfig:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
quantization_type:
|
type:
|
||||||
const: bf16
|
const: bf16
|
||||||
type: string
|
type: string
|
||||||
required:
|
required:
|
||||||
- quantization_type
|
- type
|
||||||
type: object
|
type: object
|
||||||
BuiltinShield:
|
BuiltinShield:
|
||||||
enum:
|
enum:
|
||||||
- llama_guard
|
- llama_guard
|
||||||
- prompt_guard
|
|
||||||
- code_scanner_guard
|
- code_scanner_guard
|
||||||
- third_party_shield
|
- third_party_shield
|
||||||
|
- injection_shield
|
||||||
|
- jailbreak_shield
|
||||||
type: string
|
type: string
|
||||||
ChatCompletionRequest:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
available_tools:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/ToolDefinition'
|
|
||||||
type: array
|
|
||||||
logprobs:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
top_k:
|
|
||||||
type: integer
|
|
||||||
type: object
|
|
||||||
messages:
|
|
||||||
items:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/UserMessage'
|
|
||||||
- $ref: '#/components/schemas/SystemMessage'
|
|
||||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
|
||||||
- $ref: '#/components/schemas/CompletionMessage'
|
|
||||||
type: array
|
|
||||||
model:
|
|
||||||
$ref: '#/components/schemas/InstructModel'
|
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
|
||||||
stream:
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- model
|
|
||||||
- messages
|
|
||||||
type: object
|
|
||||||
ChatCompletionResponse:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
completion_message:
|
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
|
||||||
logprobs:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/TokenLogProbs'
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- completion_message
|
|
||||||
type: object
|
|
||||||
ChatCompletionResponseEvent:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
delta:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- $ref: '#/components/schemas/ToolCallDelta'
|
|
||||||
event_type:
|
|
||||||
$ref: '#/components/schemas/ChatCompletionResponseEventType'
|
|
||||||
logprobs:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/TokenLogProbs'
|
|
||||||
type: array
|
|
||||||
stop_reason:
|
|
||||||
$ref: '#/components/schemas/StopReason'
|
|
||||||
required:
|
|
||||||
- event_type
|
|
||||||
- delta
|
|
||||||
title: Chat completion response event.
|
|
||||||
type: object
|
|
||||||
ChatCompletionResponseEventType:
|
|
||||||
enum:
|
|
||||||
- start
|
|
||||||
- complete
|
|
||||||
- progress
|
|
||||||
type: string
|
|
||||||
ChatCompletionResponseStreamChunk:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
event:
|
|
||||||
$ref: '#/components/schemas/ChatCompletionResponseEvent'
|
|
||||||
required:
|
|
||||||
- event
|
|
||||||
title: SSE-stream of these events.
|
|
||||||
type: object
|
|
||||||
CompletionMessage:
|
CompletionMessage:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -455,65 +307,6 @@ components:
|
||||||
- stop_reason
|
- stop_reason
|
||||||
- tool_calls
|
- tool_calls
|
||||||
type: object
|
type: object
|
||||||
CompletionRequest:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- $ref: '#/components/schemas/Attachment'
|
|
||||||
- items:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- $ref: '#/components/schemas/Attachment'
|
|
||||||
type: array
|
|
||||||
logprobs:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
top_k:
|
|
||||||
type: integer
|
|
||||||
type: object
|
|
||||||
model:
|
|
||||||
$ref: '#/components/schemas/PretrainedModel'
|
|
||||||
quantization_config:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
|
||||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
|
||||||
sampling_params:
|
|
||||||
$ref: '#/components/schemas/SamplingParams'
|
|
||||||
stream:
|
|
||||||
type: boolean
|
|
||||||
required:
|
|
||||||
- model
|
|
||||||
- content
|
|
||||||
type: object
|
|
||||||
CompletionResponse:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
completion_message:
|
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
|
||||||
logprobs:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/TokenLogProbs'
|
|
||||||
type: array
|
|
||||||
required:
|
|
||||||
- completion_message
|
|
||||||
type: object
|
|
||||||
CompletionResponseStreamChunk:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
delta:
|
|
||||||
type: string
|
|
||||||
logprobs:
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/TokenLogProbs'
|
|
||||||
type: array
|
|
||||||
stop_reason:
|
|
||||||
$ref: '#/components/schemas/StopReason'
|
|
||||||
required:
|
|
||||||
- delta
|
|
||||||
title: streamed completion response.
|
|
||||||
type: object
|
|
||||||
CreateDatasetRequest:
|
CreateDatasetRequest:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -737,11 +530,35 @@ components:
|
||||||
Fp8QuantizationConfig:
|
Fp8QuantizationConfig:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
quantization_type:
|
type:
|
||||||
const: fp8
|
const: fp8
|
||||||
type: string
|
type: string
|
||||||
required:
|
required:
|
||||||
- quantization_type
|
- type
|
||||||
|
type: object
|
||||||
|
InferenceStep:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
completed_at:
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
|
model_response:
|
||||||
|
$ref: '#/components/schemas/CompletionMessage'
|
||||||
|
started_at:
|
||||||
|
format: date-time
|
||||||
|
type: string
|
||||||
|
step_id:
|
||||||
|
type: string
|
||||||
|
step_type:
|
||||||
|
const: inference
|
||||||
|
type: string
|
||||||
|
turn_id:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- turn_id
|
||||||
|
- step_id
|
||||||
|
- step_type
|
||||||
|
- model_response
|
||||||
type: object
|
type: object
|
||||||
InstructModel:
|
InstructModel:
|
||||||
enum:
|
enum:
|
||||||
|
@ -843,30 +660,6 @@ components:
|
||||||
- documents
|
- documents
|
||||||
- scores
|
- scores
|
||||||
type: object
|
type: object
|
||||||
ModelInferenceStep:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
completed_at:
|
|
||||||
format: date-time
|
|
||||||
type: string
|
|
||||||
model_response:
|
|
||||||
$ref: '#/components/schemas/CompletionMessage'
|
|
||||||
started_at:
|
|
||||||
format: date-time
|
|
||||||
type: string
|
|
||||||
step_id:
|
|
||||||
type: string
|
|
||||||
step_type:
|
|
||||||
const: model_inference
|
|
||||||
type: string
|
|
||||||
turn_id:
|
|
||||||
type: string
|
|
||||||
required:
|
|
||||||
- turn_id
|
|
||||||
- step_id
|
|
||||||
- step_type
|
|
||||||
- model_response
|
|
||||||
type: object
|
|
||||||
OnViolationAction:
|
OnViolationAction:
|
||||||
enum:
|
enum:
|
||||||
- 0
|
- 0
|
||||||
|
@ -1408,16 +1201,6 @@ components:
|
||||||
- role
|
- role
|
||||||
- content
|
- content
|
||||||
type: object
|
type: object
|
||||||
TokenLogProbs:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
logprobs_by_token:
|
|
||||||
additionalProperties:
|
|
||||||
type: number
|
|
||||||
type: object
|
|
||||||
required:
|
|
||||||
- logprobs_by_token
|
|
||||||
type: object
|
|
||||||
ToolCall:
|
ToolCall:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -1477,32 +1260,11 @@ components:
|
||||||
type: object
|
type: object
|
||||||
ToolCallParseStatus:
|
ToolCallParseStatus:
|
||||||
enum:
|
enum:
|
||||||
- start
|
- started
|
||||||
- in_progress
|
- in_progress
|
||||||
- failure
|
- failure
|
||||||
- success
|
- success
|
||||||
type: string
|
type: string
|
||||||
ToolDefinition:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
description:
|
|
||||||
type: string
|
|
||||||
parameters:
|
|
||||||
additionalProperties:
|
|
||||||
$ref: '#/components/schemas/ToolParamDefinition'
|
|
||||||
type: object
|
|
||||||
tool_name:
|
|
||||||
oneOf:
|
|
||||||
- enum:
|
|
||||||
- brave_search
|
|
||||||
- wolfram_alpha
|
|
||||||
- photogen
|
|
||||||
- code_interpreter
|
|
||||||
type: string
|
|
||||||
- type: string
|
|
||||||
required:
|
|
||||||
- tool_name
|
|
||||||
type: object
|
|
||||||
ToolExecutionStep:
|
ToolExecutionStep:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -1686,7 +1448,7 @@ components:
|
||||||
steps:
|
steps:
|
||||||
items:
|
items:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/ModelInferenceStep'
|
- $ref: '#/components/schemas/InferenceStep'
|
||||||
- $ref: '#/components/schemas/ToolExecutionStep'
|
- $ref: '#/components/schemas/ToolExecutionStep'
|
||||||
- $ref: '#/components/schemas/ShieldCallStep'
|
- $ref: '#/components/schemas/ShieldCallStep'
|
||||||
- $ref: '#/components/schemas/MemoryRetrievalStep'
|
- $ref: '#/components/schemas/MemoryRetrievalStep'
|
||||||
|
@ -1729,7 +1491,7 @@ info:
|
||||||
description: "This is the specification of the llama stack that provides\n \
|
description: "This is the specification of the llama stack that provides\n \
|
||||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||||
\ to\n best leverage Llama Models. The specification is still in\
|
\ to\n best leverage Llama Models. The specification is still in\
|
||||||
\ draft and subject to change.\n Generated at 2024-07-19 11:49:56.794897"
|
\ draft and subject to change.\n Generated at 2024-07-21 12:19:33.327857"
|
||||||
title: '[DRAFT] Llama Stack Specification'
|
title: '[DRAFT] Llama Stack Specification'
|
||||||
version: 0.0.1
|
version: 0.0.1
|
||||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||||
|
@ -1766,58 +1528,6 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- AgenticSystem
|
- AgenticSystem
|
||||||
/agentic_system/memory_bank/attach:
|
|
||||||
post:
|
|
||||||
parameters:
|
|
||||||
- in: query
|
|
||||||
name: agent_id
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
- in: query
|
|
||||||
name: session_id
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- AgenticSystem
|
|
||||||
/agentic_system/memory_bank/detach:
|
|
||||||
post:
|
|
||||||
parameters:
|
|
||||||
- in: query
|
|
||||||
name: agent_id
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
- in: query
|
|
||||||
name: session_id
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
type: array
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- AgenticSystem
|
|
||||||
/agentic_system/session/create:
|
/agentic_system/session/create:
|
||||||
post:
|
post:
|
||||||
parameters: []
|
parameters: []
|
||||||
|
@ -1969,19 +1679,6 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Evaluations
|
- Evaluations
|
||||||
/evaluate/job/cancel:
|
|
||||||
get:
|
|
||||||
parameters:
|
|
||||||
- in: query
|
|
||||||
name: job_uuid
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- Evaluations
|
|
||||||
/evaluate/job/logs:
|
/evaluate/job/logs:
|
||||||
get:
|
get:
|
||||||
parameters:
|
parameters:
|
||||||
|
@ -2082,78 +1779,6 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Evaluations
|
- Evaluations
|
||||||
/inference/batch_chat_completion:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/jsonl:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ChatCompletionResponse'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- ModelInference
|
|
||||||
/inference/batch_completion:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/jsonl:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/CompletionResponse'
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- ModelInference
|
|
||||||
/inference/chat_completion:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ChatCompletionRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
|
||||||
description: SSE-stream of these events.
|
|
||||||
tags:
|
|
||||||
- ModelInference
|
|
||||||
/inference/completion:
|
|
||||||
post:
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/CompletionRequest'
|
|
||||||
required: true
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/CompletionResponseStreamChunk'
|
|
||||||
description: streamed completion response.
|
|
||||||
tags:
|
|
||||||
- ModelInference
|
|
||||||
/memory_bank/delete:
|
/memory_bank/delete:
|
||||||
post:
|
post:
|
||||||
parameters:
|
parameters:
|
||||||
|
@ -2335,19 +1960,6 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- PostTraining
|
- PostTraining
|
||||||
/post_training/job/cancel:
|
|
||||||
get:
|
|
||||||
parameters:
|
|
||||||
- in: query
|
|
||||||
name: job_uuid
|
|
||||||
required: true
|
|
||||||
schema:
|
|
||||||
type: string
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: OK
|
|
||||||
tags:
|
|
||||||
- PostTraining
|
|
||||||
/post_training/job/logs:
|
/post_training/job/logs:
|
||||||
get:
|
get:
|
||||||
parameters:
|
parameters:
|
||||||
|
@ -2471,22 +2083,29 @@ security:
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
tags:
|
tags:
|
||||||
|
- name: RewardScoring
|
||||||
|
- name: PostTraining
|
||||||
- name: AgenticSystem
|
- name: AgenticSystem
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
- name: ModelInference
|
|
||||||
- name: SyntheticDataGeneration
|
|
||||||
- name: MemoryBanks
|
- name: MemoryBanks
|
||||||
- name: PostTraining
|
|
||||||
- name: Evaluations
|
- name: Evaluations
|
||||||
- name: RewardScoring
|
- name: SyntheticDataGeneration
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
||||||
|
/>
|
||||||
|
name: AgenticSystemCreateRequest
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
|
||||||
|
/>
|
||||||
|
name: AgenticSystemInstanceConfig
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
|
||||||
|
/>
|
||||||
|
name: AgenticSystemToolDefinition
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
|
||||||
name: Attachment
|
name: Attachment
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
|
||||||
/>
|
|
||||||
name: BatchChatCompletionRequest
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
|
||||||
/>
|
/>
|
||||||
name: Bf16QuantizationConfig
|
name: Bf16QuantizationConfig
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
|
||||||
|
name: BuiltinShield
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
||||||
/>
|
/>
|
||||||
name: CompletionMessage
|
name: CompletionMessage
|
||||||
|
@ -2495,19 +2114,28 @@ tags:
|
||||||
name: Fp8QuantizationConfig
|
name: Fp8QuantizationConfig
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" />
|
||||||
name: InstructModel
|
name: InstructModel
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
|
||||||
|
/>
|
||||||
|
name: OnViolationAction
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
|
||||||
|
/>
|
||||||
|
name: RestAPIExecutionConfig
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
|
||||||
|
name: RestAPIMethod
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
|
||||||
name: SamplingParams
|
name: SamplingParams
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
|
||||||
/>
|
/>
|
||||||
name: SamplingStrategy
|
name: SamplingStrategy
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
|
||||||
|
/>
|
||||||
|
name: ShieldDefinition
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" />
|
||||||
name: StopReason
|
name: StopReason
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SystemMessage" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/SystemMessage" />
|
||||||
name: SystemMessage
|
name: SystemMessage
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCall" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCall" />
|
||||||
name: ToolCall
|
name: ToolCall
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolDefinition" />
|
|
||||||
name: ToolDefinition
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition"
|
||||||
/>
|
/>
|
||||||
name: ToolParamDefinition
|
name: ToolParamDefinition
|
||||||
|
@ -2518,74 +2146,6 @@ tags:
|
||||||
name: URL
|
name: URL
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
|
||||||
name: UserMessage
|
name: UserMessage
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse"
|
|
||||||
/>
|
|
||||||
name: ChatCompletionResponse
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/TokenLogProbs" />
|
|
||||||
name: TokenLogProbs
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
|
|
||||||
/>
|
|
||||||
name: BatchCompletionRequest
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
|
|
||||||
/>
|
|
||||||
name: PretrainedModel
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse"
|
|
||||||
/>
|
|
||||||
name: CompletionResponse
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
|
|
||||||
/>
|
|
||||||
name: ChatCompletionRequest
|
|
||||||
- description: 'Chat completion response event.
|
|
||||||
|
|
||||||
|
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEvent"
|
|
||||||
/>'
|
|
||||||
name: ChatCompletionResponseEvent
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEventType"
|
|
||||||
/>
|
|
||||||
name: ChatCompletionResponseEventType
|
|
||||||
- description: 'SSE-stream of these events.
|
|
||||||
|
|
||||||
|
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
|
|
||||||
/>'
|
|
||||||
name: ChatCompletionResponseStreamChunk
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
|
|
||||||
name: ToolCallDelta
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
|
|
||||||
/>
|
|
||||||
name: ToolCallParseStatus
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
|
|
||||||
/>
|
|
||||||
name: CompletionRequest
|
|
||||||
- description: 'streamed completion response.
|
|
||||||
|
|
||||||
|
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
|
|
||||||
/>'
|
|
||||||
name: CompletionResponseStreamChunk
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
|
||||||
/>
|
|
||||||
name: AgenticSystemCreateRequest
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
|
|
||||||
/>
|
|
||||||
name: AgenticSystemInstanceConfig
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
|
|
||||||
/>
|
|
||||||
name: AgenticSystemToolDefinition
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
|
|
||||||
name: BuiltinShield
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
|
|
||||||
/>
|
|
||||||
name: OnViolationAction
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
|
|
||||||
/>
|
|
||||||
name: RestAPIExecutionConfig
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
|
|
||||||
name: RestAPIMethod
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
|
|
||||||
/>
|
|
||||||
name: ShieldDefinition
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
|
||||||
/>
|
/>
|
||||||
name: AgenticSystemCreateResponse
|
name: AgenticSystemCreateResponse
|
||||||
|
@ -2622,19 +2182,23 @@ tags:
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurnResponseTurnStartPayload"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurnResponseTurnStartPayload"
|
||||||
/>
|
/>
|
||||||
name: AgenticSystemTurnResponseTurnStartPayload
|
name: AgenticSystemTurnResponseTurnStartPayload
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
|
||||||
|
name: InferenceStep
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
|
||||||
/>
|
/>
|
||||||
name: MemoryBankDocument
|
name: MemoryBankDocument
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
|
||||||
/>
|
/>
|
||||||
name: MemoryRetrievalStep
|
name: MemoryRetrievalStep
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ModelInferenceStep"
|
|
||||||
/>
|
|
||||||
name: ModelInferenceStep
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldCallStep" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldCallStep" />
|
||||||
name: ShieldCallStep
|
name: ShieldCallStep
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldResponse" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldResponse" />
|
||||||
name: ShieldResponse
|
name: ShieldResponse
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
|
||||||
|
name: ToolCallDelta
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
|
||||||
|
/>
|
||||||
|
name: ToolCallParseStatus
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolExecutionStep"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolExecutionStep"
|
||||||
/>
|
/>
|
||||||
name: ToolExecutionStep
|
name: ToolExecutionStep
|
||||||
|
@ -2785,6 +2349,9 @@ tags:
|
||||||
|
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
|
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
|
||||||
name: PostTrainingSFTRequest
|
name: PostTrainingSFTRequest
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
|
||||||
|
/>
|
||||||
|
name: PretrainedModel
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
|
||||||
/>
|
/>
|
||||||
name: QLoraFinetuningConfig
|
name: QLoraFinetuningConfig
|
||||||
|
@ -2795,7 +2362,6 @@ x-tagGroups:
|
||||||
- Datasets
|
- Datasets
|
||||||
- Evaluations
|
- Evaluations
|
||||||
- MemoryBanks
|
- MemoryBanks
|
||||||
- ModelInference
|
|
||||||
- PostTraining
|
- PostTraining
|
||||||
- RewardScoring
|
- RewardScoring
|
||||||
- SyntheticDataGeneration
|
- SyntheticDataGeneration
|
||||||
|
@ -2816,19 +2382,9 @@ x-tagGroups:
|
||||||
- AgenticSystemTurnResponseTurnCompletePayload
|
- AgenticSystemTurnResponseTurnCompletePayload
|
||||||
- AgenticSystemTurnResponseTurnStartPayload
|
- AgenticSystemTurnResponseTurnStartPayload
|
||||||
- Attachment
|
- Attachment
|
||||||
- BatchChatCompletionRequest
|
|
||||||
- BatchCompletionRequest
|
|
||||||
- Bf16QuantizationConfig
|
- Bf16QuantizationConfig
|
||||||
- BuiltinShield
|
- BuiltinShield
|
||||||
- ChatCompletionRequest
|
|
||||||
- ChatCompletionResponse
|
|
||||||
- ChatCompletionResponseEvent
|
|
||||||
- ChatCompletionResponseEventType
|
|
||||||
- ChatCompletionResponseStreamChunk
|
|
||||||
- CompletionMessage
|
- CompletionMessage
|
||||||
- CompletionRequest
|
|
||||||
- CompletionResponse
|
|
||||||
- CompletionResponseStreamChunk
|
|
||||||
- CreateDatasetRequest
|
- CreateDatasetRequest
|
||||||
- DPOAlignmentConfig
|
- DPOAlignmentConfig
|
||||||
- DialogGenerations
|
- DialogGenerations
|
||||||
|
@ -2842,12 +2398,12 @@ x-tagGroups:
|
||||||
- EvaluationJobStatusResponse
|
- EvaluationJobStatusResponse
|
||||||
- FinetuningAlgorithm
|
- FinetuningAlgorithm
|
||||||
- Fp8QuantizationConfig
|
- Fp8QuantizationConfig
|
||||||
|
- InferenceStep
|
||||||
- InstructModel
|
- InstructModel
|
||||||
- LoraFinetuningConfig
|
- LoraFinetuningConfig
|
||||||
- MemoryBank
|
- MemoryBank
|
||||||
- MemoryBankDocument
|
- MemoryBankDocument
|
||||||
- MemoryRetrievalStep
|
- MemoryRetrievalStep
|
||||||
- ModelInferenceStep
|
|
||||||
- OnViolationAction
|
- OnViolationAction
|
||||||
- OptimizerConfig
|
- OptimizerConfig
|
||||||
- PostTrainingJob
|
- PostTrainingJob
|
||||||
|
@ -2877,11 +2433,9 @@ x-tagGroups:
|
||||||
- SyntheticDataGenerationRequest
|
- SyntheticDataGenerationRequest
|
||||||
- SyntheticDataGenerationResponse
|
- SyntheticDataGenerationResponse
|
||||||
- SystemMessage
|
- SystemMessage
|
||||||
- TokenLogProbs
|
|
||||||
- ToolCall
|
- ToolCall
|
||||||
- ToolCallDelta
|
- ToolCallDelta
|
||||||
- ToolCallParseStatus
|
- ToolCallParseStatus
|
||||||
- ToolDefinition
|
|
||||||
- ToolExecutionStep
|
- ToolExecutionStep
|
||||||
- ToolParamDefinition
|
- ToolParamDefinition
|
||||||
- ToolResponse
|
- ToolResponse
|
||||||
|
|
|
@ -2,4 +2,4 @@
|
||||||
|
|
||||||
set -x
|
set -x
|
||||||
|
|
||||||
PYTHONPATH=../../../oss-ops:../.. python3 -m toolchain.spec.generate
|
PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue