mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
rename ModelInference to Inference
This commit is contained in:
parent
245461620d
commit
67f0510edd
18 changed files with 468 additions and 1636 deletions
|
@ -30,7 +30,7 @@ create_parent_dir() {
|
|||
# Function to output the YAML configuration
|
||||
output_yaml() {
|
||||
cat <<EOL > ${yaml_output_path}
|
||||
model_inference_config:
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
|
|
|
@ -47,7 +47,7 @@ class InferenceConfigure(Subcommand):
|
|||
yaml_output_path
|
||||
):
|
||||
yaml_content = textwrap.dedent(f"""
|
||||
model_inference_config:
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
model_inference_config:
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
|
@ -7,5 +7,5 @@ model_inference_config:
|
|||
model_parallel_size: 8
|
||||
max_seq_len: 2048
|
||||
max_batch_size: 1
|
||||
quantization:
|
||||
quantization:
|
||||
type: "fp8"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
model_inference_config:
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
model_inference_config:
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
model_inference_config:
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
model_inference_config:
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
model_inference_config:
|
||||
inference_config:
|
||||
impl_type: "inline"
|
||||
inline_config:
|
||||
checkpoint_type: "pytorch"
|
||||
|
|
|
@ -75,7 +75,7 @@ class RemoteImplConfig(BaseModel):
|
|||
url: str = Field(..., description="The URL of the remote module")
|
||||
|
||||
|
||||
class ModelInferenceConfig(BaseModel):
|
||||
class InferenceConfig(BaseModel):
|
||||
impl_config: Annotated[
|
||||
Union[InlineImplConfig, RemoteImplConfig],
|
||||
Field(discriminator="impl_type"),
|
||||
|
@ -130,7 +130,7 @@ class RemoteImplHydraConfig:
|
|||
|
||||
|
||||
@dataclass
|
||||
class ModelInferenceHydraConfig:
|
||||
class InferenceHydraConfig:
|
||||
impl_type: str
|
||||
inline_config: Optional[InlineImplHydraConfig] = None
|
||||
remote_config: Optional[RemoteImplHydraConfig] = None
|
||||
|
@ -142,18 +142,18 @@ class ModelInferenceHydraConfig:
|
|||
if self.impl_type == "remote":
|
||||
assert self.remote_config is not None
|
||||
|
||||
def convert_to_model_inferene_config(self):
|
||||
def convert_to_inference_config(self):
|
||||
if self.impl_type == "inline":
|
||||
inline_config = InlineImplHydraConfig(**self.inline_config)
|
||||
return ModelInferenceConfig(
|
||||
return InferenceConfig(
|
||||
impl_config=inline_config.convert_to_inline_impl_config()
|
||||
)
|
||||
elif self.impl_type == "remote":
|
||||
remote_config = RemoteImplHydraConfig(**self.remote_config)
|
||||
return ModelInferenceConfig(
|
||||
return InferenceConfig(
|
||||
impl_config=remote_config.convert_to_remote_impl_config()
|
||||
)
|
||||
|
||||
|
||||
cs = ConfigStore.instance()
|
||||
cs.store(name="model_inference_config", node=ModelInferenceHydraConfig)
|
||||
cs.store(name="inference_config", node=InferenceHydraConfig)
|
||||
|
|
|
@ -90,7 +90,7 @@ class BatchChatCompletionResponse(BaseModel):
|
|||
completion_message_batch: List[CompletionMessage]
|
||||
|
||||
|
||||
class ModelInference(Protocol):
|
||||
class Inference(Protocol):
|
||||
|
||||
@webmethod(route="/inference/completion")
|
||||
async def completion(
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
from .api.config import ImplType, ModelInferenceConfig
|
||||
from .api.config import ImplType, InferenceConfig
|
||||
|
||||
|
||||
async def get_inference_api_instance(config: ModelInferenceConfig):
|
||||
async def get_inference_api_instance(config: InferenceConfig):
|
||||
if config.impl_config.impl_type == ImplType.inline.value:
|
||||
from .inference import ModelInferenceImpl
|
||||
from .inference import InferenceImpl
|
||||
|
||||
return ModelInferenceImpl(config.impl_config)
|
||||
return InferenceImpl(config.impl_config)
|
||||
|
||||
from .client import ModelInferenceClient
|
||||
from .client import InferenceClient
|
||||
|
||||
return ModelInferenceClient(config.impl_config.url)
|
||||
return InferenceClient(config.impl_config.url)
|
||||
|
|
|
@ -10,12 +10,12 @@ from .api import (
|
|||
ChatCompletionResponseStreamChunk,
|
||||
CompletionRequest,
|
||||
InstructModel,
|
||||
ModelInference,
|
||||
Inference,
|
||||
UserMessage,
|
||||
)
|
||||
|
||||
|
||||
class ModelInferenceClient(ModelInference):
|
||||
class InferenceClient(Inference):
|
||||
def __init__(self, base_url: str):
|
||||
self.base_url = base_url
|
||||
|
||||
|
@ -48,7 +48,7 @@ class ModelInferenceClient(ModelInference):
|
|||
|
||||
|
||||
async def run_main(host: str, port: int):
|
||||
client = ModelInferenceClient(f"http://{host}:{port}")
|
||||
client = InferenceClient(f"http://{host}:{port}")
|
||||
|
||||
message = UserMessage(content="hello world, help me out here")
|
||||
req = ChatCompletionRequest(
|
||||
|
|
|
@ -18,12 +18,12 @@ from .api.endpoints import (
|
|||
ChatCompletionRequest,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
CompletionRequest,
|
||||
ModelInference,
|
||||
Inference,
|
||||
)
|
||||
from .model_parallel import LlamaModelParallelGenerator
|
||||
|
||||
|
||||
class ModelInferenceImpl(ModelInference):
|
||||
class InferenceImpl(Inference):
|
||||
|
||||
def __init__(self, config: InlineImplConfig) -> None:
|
||||
self.config = config
|
||||
|
|
|
@ -11,7 +11,7 @@ from fastapi.responses import StreamingResponse
|
|||
from omegaconf import OmegaConf
|
||||
|
||||
from toolchain.utils import get_default_config_dir, parse_config
|
||||
from .api.config import ModelInferenceHydraConfig
|
||||
from .api.config import InferenceHydraConfig
|
||||
from .api.endpoints import ChatCompletionRequest, ChatCompletionResponseStreamChunk
|
||||
|
||||
from .api_instance import get_inference_api_instance
|
||||
|
@ -43,13 +43,13 @@ async def startup():
|
|||
global InferenceApiInstance
|
||||
|
||||
config = get_config()
|
||||
hydra_config = ModelInferenceHydraConfig(
|
||||
**OmegaConf.to_container(config["model_inference_config"], resolve=True)
|
||||
hydra_config = InferenceHydraConfig(
|
||||
**OmegaConf.to_container(config["inference_config"], resolve=True)
|
||||
)
|
||||
model_inference_config = hydra_config.convert_to_model_inferene_config()
|
||||
inference_config = hydra_config.convert_to_inference_config()
|
||||
|
||||
InferenceApiInstance = await get_inference_api_instance(
|
||||
model_inference_config,
|
||||
inference_config,
|
||||
)
|
||||
await InferenceApiInstance.initialize()
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ from agentic_system.api import * # noqa: F403
|
|||
|
||||
|
||||
class LlamaStackEndpoints(
|
||||
ModelInference,
|
||||
Inference,
|
||||
AgenticSystem,
|
||||
RewardScoring,
|
||||
SyntheticDataGeneration,
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -148,13 +148,13 @@ components:
|
|||
type: string
|
||||
step_details:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ModelInferenceStep'
|
||||
- $ref: '#/components/schemas/InferenceStep'
|
||||
- $ref: '#/components/schemas/ToolExecutionStep'
|
||||
- $ref: '#/components/schemas/ShieldCallStep'
|
||||
- $ref: '#/components/schemas/MemoryRetrievalStep'
|
||||
step_type:
|
||||
enum:
|
||||
- model_inference
|
||||
- inference
|
||||
- tool_execution
|
||||
- shield_call
|
||||
- memory_retrieval
|
||||
|
@ -176,7 +176,7 @@ components:
|
|||
type: string
|
||||
step_type:
|
||||
enum:
|
||||
- model_inference
|
||||
- inference
|
||||
- tool_execution
|
||||
- shield_call
|
||||
- memory_retrieval
|
||||
|
@ -210,7 +210,7 @@ components:
|
|||
type: string
|
||||
step_type:
|
||||
enum:
|
||||
- model_inference
|
||||
- inference
|
||||
- tool_execution
|
||||
- shield_call
|
||||
- memory_retrieval
|
||||
|
@ -263,171 +263,23 @@ components:
|
|||
- url
|
||||
- mime_type
|
||||
type: object
|
||||
BatchChatCompletionRequest:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
available_tools:
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolDefinition'
|
||||
type: array
|
||||
logprobs:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
type: object
|
||||
messages_batch:
|
||||
items:
|
||||
items:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UserMessage'
|
||||
- $ref: '#/components/schemas/SystemMessage'
|
||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
||||
- $ref: '#/components/schemas/CompletionMessage'
|
||||
type: array
|
||||
type: array
|
||||
model:
|
||||
$ref: '#/components/schemas/InstructModel'
|
||||
quantization_config:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
required:
|
||||
- model
|
||||
- messages_batch
|
||||
type: object
|
||||
BatchCompletionRequest:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
content_batch:
|
||||
items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/Attachment'
|
||||
- items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/Attachment'
|
||||
type: array
|
||||
type: array
|
||||
logprobs:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
type: object
|
||||
model:
|
||||
$ref: '#/components/schemas/PretrainedModel'
|
||||
quantization_config:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
required:
|
||||
- model
|
||||
- content_batch
|
||||
type: object
|
||||
Bf16QuantizationConfig:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
quantization_type:
|
||||
type:
|
||||
const: bf16
|
||||
type: string
|
||||
required:
|
||||
- quantization_type
|
||||
- type
|
||||
type: object
|
||||
BuiltinShield:
|
||||
enum:
|
||||
- llama_guard
|
||||
- prompt_guard
|
||||
- code_scanner_guard
|
||||
- third_party_shield
|
||||
- injection_shield
|
||||
- jailbreak_shield
|
||||
type: string
|
||||
ChatCompletionRequest:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
available_tools:
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolDefinition'
|
||||
type: array
|
||||
logprobs:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
type: object
|
||||
messages:
|
||||
items:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/UserMessage'
|
||||
- $ref: '#/components/schemas/SystemMessage'
|
||||
- $ref: '#/components/schemas/ToolResponseMessage'
|
||||
- $ref: '#/components/schemas/CompletionMessage'
|
||||
type: array
|
||||
model:
|
||||
$ref: '#/components/schemas/InstructModel'
|
||||
quantization_config:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
stream:
|
||||
type: boolean
|
||||
required:
|
||||
- model
|
||||
- messages
|
||||
type: object
|
||||
ChatCompletionResponse:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
completion_message:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
logprobs:
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
type: array
|
||||
required:
|
||||
- completion_message
|
||||
type: object
|
||||
ChatCompletionResponseEvent:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
delta:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/ToolCallDelta'
|
||||
event_type:
|
||||
$ref: '#/components/schemas/ChatCompletionResponseEventType'
|
||||
logprobs:
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
type: array
|
||||
stop_reason:
|
||||
$ref: '#/components/schemas/StopReason'
|
||||
required:
|
||||
- event_type
|
||||
- delta
|
||||
title: Chat completion response event.
|
||||
type: object
|
||||
ChatCompletionResponseEventType:
|
||||
enum:
|
||||
- start
|
||||
- complete
|
||||
- progress
|
||||
type: string
|
||||
ChatCompletionResponseStreamChunk:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
event:
|
||||
$ref: '#/components/schemas/ChatCompletionResponseEvent'
|
||||
required:
|
||||
- event
|
||||
title: SSE-stream of these events.
|
||||
type: object
|
||||
CompletionMessage:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
|
@ -455,65 +307,6 @@ components:
|
|||
- stop_reason
|
||||
- tool_calls
|
||||
type: object
|
||||
CompletionRequest:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
content:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/Attachment'
|
||||
- items:
|
||||
oneOf:
|
||||
- type: string
|
||||
- $ref: '#/components/schemas/Attachment'
|
||||
type: array
|
||||
logprobs:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
top_k:
|
||||
type: integer
|
||||
type: object
|
||||
model:
|
||||
$ref: '#/components/schemas/PretrainedModel'
|
||||
quantization_config:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/Bf16QuantizationConfig'
|
||||
- $ref: '#/components/schemas/Fp8QuantizationConfig'
|
||||
sampling_params:
|
||||
$ref: '#/components/schemas/SamplingParams'
|
||||
stream:
|
||||
type: boolean
|
||||
required:
|
||||
- model
|
||||
- content
|
||||
type: object
|
||||
CompletionResponse:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
completion_message:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
logprobs:
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
type: array
|
||||
required:
|
||||
- completion_message
|
||||
type: object
|
||||
CompletionResponseStreamChunk:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
delta:
|
||||
type: string
|
||||
logprobs:
|
||||
items:
|
||||
$ref: '#/components/schemas/TokenLogProbs'
|
||||
type: array
|
||||
stop_reason:
|
||||
$ref: '#/components/schemas/StopReason'
|
||||
required:
|
||||
- delta
|
||||
title: streamed completion response.
|
||||
type: object
|
||||
CreateDatasetRequest:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
|
@ -737,11 +530,35 @@ components:
|
|||
Fp8QuantizationConfig:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
quantization_type:
|
||||
type:
|
||||
const: fp8
|
||||
type: string
|
||||
required:
|
||||
- quantization_type
|
||||
- type
|
||||
type: object
|
||||
InferenceStep:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
completed_at:
|
||||
format: date-time
|
||||
type: string
|
||||
model_response:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
started_at:
|
||||
format: date-time
|
||||
type: string
|
||||
step_id:
|
||||
type: string
|
||||
step_type:
|
||||
const: inference
|
||||
type: string
|
||||
turn_id:
|
||||
type: string
|
||||
required:
|
||||
- turn_id
|
||||
- step_id
|
||||
- step_type
|
||||
- model_response
|
||||
type: object
|
||||
InstructModel:
|
||||
enum:
|
||||
|
@ -843,30 +660,6 @@ components:
|
|||
- documents
|
||||
- scores
|
||||
type: object
|
||||
ModelInferenceStep:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
completed_at:
|
||||
format: date-time
|
||||
type: string
|
||||
model_response:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
started_at:
|
||||
format: date-time
|
||||
type: string
|
||||
step_id:
|
||||
type: string
|
||||
step_type:
|
||||
const: model_inference
|
||||
type: string
|
||||
turn_id:
|
||||
type: string
|
||||
required:
|
||||
- turn_id
|
||||
- step_id
|
||||
- step_type
|
||||
- model_response
|
||||
type: object
|
||||
OnViolationAction:
|
||||
enum:
|
||||
- 0
|
||||
|
@ -1408,16 +1201,6 @@ components:
|
|||
- role
|
||||
- content
|
||||
type: object
|
||||
TokenLogProbs:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
logprobs_by_token:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
required:
|
||||
- logprobs_by_token
|
||||
type: object
|
||||
ToolCall:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
|
@ -1477,32 +1260,11 @@ components:
|
|||
type: object
|
||||
ToolCallParseStatus:
|
||||
enum:
|
||||
- start
|
||||
- started
|
||||
- in_progress
|
||||
- failure
|
||||
- success
|
||||
type: string
|
||||
ToolDefinition:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
description:
|
||||
type: string
|
||||
parameters:
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/ToolParamDefinition'
|
||||
type: object
|
||||
tool_name:
|
||||
oneOf:
|
||||
- enum:
|
||||
- brave_search
|
||||
- wolfram_alpha
|
||||
- photogen
|
||||
- code_interpreter
|
||||
type: string
|
||||
- type: string
|
||||
required:
|
||||
- tool_name
|
||||
type: object
|
||||
ToolExecutionStep:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
|
@ -1686,7 +1448,7 @@ components:
|
|||
steps:
|
||||
items:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/ModelInferenceStep'
|
||||
- $ref: '#/components/schemas/InferenceStep'
|
||||
- $ref: '#/components/schemas/ToolExecutionStep'
|
||||
- $ref: '#/components/schemas/ShieldCallStep'
|
||||
- $ref: '#/components/schemas/MemoryRetrievalStep'
|
||||
|
@ -1729,7 +1491,7 @@ info:
|
|||
description: "This is the specification of the llama stack that provides\n \
|
||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||
\ to\n best leverage Llama Models. The specification is still in\
|
||||
\ draft and subject to change.\n Generated at 2024-07-19 11:49:56.794897"
|
||||
\ draft and subject to change.\n Generated at 2024-07-21 12:19:33.327857"
|
||||
title: '[DRAFT] Llama Stack Specification'
|
||||
version: 0.0.1
|
||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||
|
@ -1766,58 +1528,6 @@ paths:
|
|||
description: OK
|
||||
tags:
|
||||
- AgenticSystem
|
||||
/agentic_system/memory_bank/attach:
|
||||
post:
|
||||
parameters:
|
||||
- in: query
|
||||
name: agent_id
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: session_id
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
tags:
|
||||
- AgenticSystem
|
||||
/agentic_system/memory_bank/detach:
|
||||
post:
|
||||
parameters:
|
||||
- in: query
|
||||
name: agent_id
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
- in: query
|
||||
name: session_id
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
tags:
|
||||
- AgenticSystem
|
||||
/agentic_system/session/create:
|
||||
post:
|
||||
parameters: []
|
||||
|
@ -1969,19 +1679,6 @@ paths:
|
|||
description: OK
|
||||
tags:
|
||||
- Evaluations
|
||||
/evaluate/job/cancel:
|
||||
get:
|
||||
parameters:
|
||||
- in: query
|
||||
name: job_uuid
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
tags:
|
||||
- Evaluations
|
||||
/evaluate/job/logs:
|
||||
get:
|
||||
parameters:
|
||||
|
@ -2082,78 +1779,6 @@ paths:
|
|||
description: OK
|
||||
tags:
|
||||
- Evaluations
|
||||
/inference/batch_chat_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/jsonl:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionResponse'
|
||||
description: OK
|
||||
tags:
|
||||
- ModelInference
|
||||
/inference/batch_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/BatchCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/jsonl:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionResponse'
|
||||
description: OK
|
||||
tags:
|
||||
- ModelInference
|
||||
/inference/chat_completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
|
||||
description: SSE-stream of these events.
|
||||
tags:
|
||||
- ModelInference
|
||||
/inference/completion:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CompletionResponseStreamChunk'
|
||||
description: streamed completion response.
|
||||
tags:
|
||||
- ModelInference
|
||||
/memory_bank/delete:
|
||||
post:
|
||||
parameters:
|
||||
|
@ -2335,19 +1960,6 @@ paths:
|
|||
description: OK
|
||||
tags:
|
||||
- PostTraining
|
||||
/post_training/job/cancel:
|
||||
get:
|
||||
parameters:
|
||||
- in: query
|
||||
name: job_uuid
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
tags:
|
||||
- PostTraining
|
||||
/post_training/job/logs:
|
||||
get:
|
||||
parameters:
|
||||
|
@ -2471,22 +2083,29 @@ security:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
tags:
|
||||
- name: RewardScoring
|
||||
- name: PostTraining
|
||||
- name: AgenticSystem
|
||||
- name: Datasets
|
||||
- name: ModelInference
|
||||
- name: SyntheticDataGeneration
|
||||
- name: MemoryBanks
|
||||
- name: PostTraining
|
||||
- name: Evaluations
|
||||
- name: RewardScoring
|
||||
- name: SyntheticDataGeneration
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
||||
/>
|
||||
name: AgenticSystemCreateRequest
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
|
||||
/>
|
||||
name: AgenticSystemInstanceConfig
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
|
||||
/>
|
||||
name: AgenticSystemToolDefinition
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
|
||||
name: Attachment
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
|
||||
/>
|
||||
name: BatchChatCompletionRequest
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/Bf16QuantizationConfig"
|
||||
/>
|
||||
name: Bf16QuantizationConfig
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
|
||||
name: BuiltinShield
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
||||
/>
|
||||
name: CompletionMessage
|
||||
|
@ -2495,19 +2114,28 @@ tags:
|
|||
name: Fp8QuantizationConfig
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" />
|
||||
name: InstructModel
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
|
||||
/>
|
||||
name: OnViolationAction
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
|
||||
/>
|
||||
name: RestAPIExecutionConfig
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
|
||||
name: RestAPIMethod
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
|
||||
name: SamplingParams
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
|
||||
/>
|
||||
name: SamplingStrategy
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
|
||||
/>
|
||||
name: ShieldDefinition
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/StopReason" />
|
||||
name: StopReason
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/SystemMessage" />
|
||||
name: SystemMessage
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCall" />
|
||||
name: ToolCall
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolDefinition" />
|
||||
name: ToolDefinition
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolParamDefinition"
|
||||
/>
|
||||
name: ToolParamDefinition
|
||||
|
@ -2518,74 +2146,6 @@ tags:
|
|||
name: URL
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/UserMessage" />
|
||||
name: UserMessage
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse"
|
||||
/>
|
||||
name: ChatCompletionResponse
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/TokenLogProbs" />
|
||||
name: TokenLogProbs
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
|
||||
/>
|
||||
name: BatchCompletionRequest
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
|
||||
/>
|
||||
name: PretrainedModel
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse"
|
||||
/>
|
||||
name: CompletionResponse
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
|
||||
/>
|
||||
name: ChatCompletionRequest
|
||||
- description: 'Chat completion response event.
|
||||
|
||||
|
||||
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEvent"
|
||||
/>'
|
||||
name: ChatCompletionResponseEvent
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseEventType"
|
||||
/>
|
||||
name: ChatCompletionResponseEventType
|
||||
- description: 'SSE-stream of these events.
|
||||
|
||||
|
||||
<SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponseStreamChunk"
|
||||
/>'
|
||||
name: ChatCompletionResponseStreamChunk
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
|
||||
name: ToolCallDelta
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
|
||||
/>
|
||||
name: ToolCallParseStatus
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
|
||||
/>
|
||||
name: CompletionRequest
|
||||
- description: 'streamed completion response.
|
||||
|
||||
|
||||
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
|
||||
/>'
|
||||
name: CompletionResponseStreamChunk
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
||||
/>
|
||||
name: AgenticSystemCreateRequest
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemInstanceConfig"
|
||||
/>
|
||||
name: AgenticSystemInstanceConfig
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemToolDefinition"
|
||||
/>
|
||||
name: AgenticSystemToolDefinition
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinShield" />
|
||||
name: BuiltinShield
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/OnViolationAction"
|
||||
/>
|
||||
name: OnViolationAction
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIExecutionConfig"
|
||||
/>
|
||||
name: RestAPIExecutionConfig
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/RestAPIMethod" />
|
||||
name: RestAPIMethod
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldDefinition"
|
||||
/>
|
||||
name: ShieldDefinition
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateResponse"
|
||||
/>
|
||||
name: AgenticSystemCreateResponse
|
||||
|
@ -2622,19 +2182,23 @@ tags:
|
|||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemTurnResponseTurnStartPayload"
|
||||
/>
|
||||
name: AgenticSystemTurnResponseTurnStartPayload
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
|
||||
name: InferenceStep
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
|
||||
/>
|
||||
name: MemoryBankDocument
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
|
||||
/>
|
||||
name: MemoryRetrievalStep
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ModelInferenceStep"
|
||||
/>
|
||||
name: ModelInferenceStep
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldCallStep" />
|
||||
name: ShieldCallStep
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldResponse" />
|
||||
name: ShieldResponse
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallDelta" />
|
||||
name: ToolCallDelta
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolCallParseStatus"
|
||||
/>
|
||||
name: ToolCallParseStatus
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ToolExecutionStep"
|
||||
/>
|
||||
name: ToolExecutionStep
|
||||
|
@ -2785,6 +2349,9 @@ tags:
|
|||
|
||||
<SchemaDefinition schemaRef="#/components/schemas/PostTrainingSFTRequest" />'
|
||||
name: PostTrainingSFTRequest
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel"
|
||||
/>
|
||||
name: PretrainedModel
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
|
||||
/>
|
||||
name: QLoraFinetuningConfig
|
||||
|
@ -2795,7 +2362,6 @@ x-tagGroups:
|
|||
- Datasets
|
||||
- Evaluations
|
||||
- MemoryBanks
|
||||
- ModelInference
|
||||
- PostTraining
|
||||
- RewardScoring
|
||||
- SyntheticDataGeneration
|
||||
|
@ -2816,19 +2382,9 @@ x-tagGroups:
|
|||
- AgenticSystemTurnResponseTurnCompletePayload
|
||||
- AgenticSystemTurnResponseTurnStartPayload
|
||||
- Attachment
|
||||
- BatchChatCompletionRequest
|
||||
- BatchCompletionRequest
|
||||
- Bf16QuantizationConfig
|
||||
- BuiltinShield
|
||||
- ChatCompletionRequest
|
||||
- ChatCompletionResponse
|
||||
- ChatCompletionResponseEvent
|
||||
- ChatCompletionResponseEventType
|
||||
- ChatCompletionResponseStreamChunk
|
||||
- CompletionMessage
|
||||
- CompletionRequest
|
||||
- CompletionResponse
|
||||
- CompletionResponseStreamChunk
|
||||
- CreateDatasetRequest
|
||||
- DPOAlignmentConfig
|
||||
- DialogGenerations
|
||||
|
@ -2842,12 +2398,12 @@ x-tagGroups:
|
|||
- EvaluationJobStatusResponse
|
||||
- FinetuningAlgorithm
|
||||
- Fp8QuantizationConfig
|
||||
- InferenceStep
|
||||
- InstructModel
|
||||
- LoraFinetuningConfig
|
||||
- MemoryBank
|
||||
- MemoryBankDocument
|
||||
- MemoryRetrievalStep
|
||||
- ModelInferenceStep
|
||||
- OnViolationAction
|
||||
- OptimizerConfig
|
||||
- PostTrainingJob
|
||||
|
@ -2877,11 +2433,9 @@ x-tagGroups:
|
|||
- SyntheticDataGenerationRequest
|
||||
- SyntheticDataGenerationResponse
|
||||
- SystemMessage
|
||||
- TokenLogProbs
|
||||
- ToolCall
|
||||
- ToolCallDelta
|
||||
- ToolCallParseStatus
|
||||
- ToolDefinition
|
||||
- ToolExecutionStep
|
||||
- ToolParamDefinition
|
||||
- ToolResponse
|
||||
|
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
set -x
|
||||
|
||||
PYTHONPATH=../../../oss-ops:../.. python3 -m toolchain.spec.generate
|
||||
PYTHONPATH=/data/users/rsm/llama-models:/data/users/rsm/llama-toolchain:/data/users/rsm/llama-agentic-system:../../../oss-ops:../.. python -m toolchain.spec.generate
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue