diff --git a/llama_toolchain/agentic_system/api/api.py b/llama_toolchain/agentic_system/api/api.py index b8be54861..95af3727b 100644 --- a/llama_toolchain/agentic_system/api/api.py +++ b/llama_toolchain/agentic_system/api/api.py @@ -416,7 +416,16 @@ class AgenticSystem(Protocol): @webmethod(route="/agentic_system/turn/create") async def create_agentic_system_turn( self, - request: AgenticSystemTurnCreateRequest, + agent_id: str, + session_id: str, + messages: List[ + Union[ + UserMessage, + ToolResponseMessage, + ] + ], + attachments: Optional[List[Attachment]] = None, + stream: Optional[bool] = False, ) -> AgenticSystemTurnResponseStreamChunk: ... @webmethod(route="/agentic_system/turn/get") diff --git a/llama_toolchain/agentic_system/client.py b/llama_toolchain/agentic_system/client.py index e30e90376..52cf0dee2 100644 --- a/llama_toolchain/agentic_system/client.py +++ b/llama_toolchain/agentic_system/client.py @@ -73,9 +73,7 @@ class AgenticSystemClient(AgenticSystem): async with client.stream( "POST", f"{self.base_url}/agentic_system/turn/create", - json={ - "request": encodable_dict(request), - }, + json=encodable_dict(request), headers={"Content-Type": "application/json"}, timeout=20, ) as response: diff --git a/llama_toolchain/agentic_system/meta_reference/agent_instance.py b/llama_toolchain/agentic_system/meta_reference/agent_instance.py index 36c3d19e8..202f42a3c 100644 --- a/llama_toolchain/agentic_system/meta_reference/agent_instance.py +++ b/llama_toolchain/agentic_system/meta_reference/agent_instance.py @@ -388,19 +388,17 @@ class ChatAgent(ShieldRunnerMixin): ) ) - req = ChatCompletionRequest( - model=self.agent_config.model, - messages=input_messages, + tool_calls = [] + content = "" + stop_reason = None + async for chunk in self.inference_api.chat_completion( + self.agent_config.model, + input_messages, tools=self._get_tools(), tool_prompt_format=self.agent_config.tool_prompt_format, stream=True, sampling_params=sampling_params, - ) - - tool_calls = [] - content = "" - stop_reason = None - async for chunk in self.inference_api.chat_completion(req): + ): event = chunk.event if event.event_type == ChatCompletionResponseEventType.start: continue diff --git a/llama_toolchain/agentic_system/meta_reference/agentic_system.py b/llama_toolchain/agentic_system/meta_reference/agentic_system.py index 9caa3a75b..3990ab58a 100644 --- a/llama_toolchain/agentic_system/meta_reference/agentic_system.py +++ b/llama_toolchain/agentic_system/meta_reference/agentic_system.py @@ -114,8 +114,26 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem): async def create_agentic_system_turn( self, - request: AgenticSystemTurnCreateRequest, + agent_id: str, + session_id: str, + messages: List[ + Union[ + UserMessage, + ToolResponseMessage, + ] + ], + attachments: Optional[List[Attachment]] = None, + stream: Optional[bool] = False, ) -> AsyncGenerator: + # wrapper request to make it easier to pass around (internal only, not exposed to API) + request = AgenticSystemTurnCreateRequest( + agent_id=agent_id, + session_id=session_id, + messages=messages, + attachments=attachments, + stream=stream, + ) + agent_id = request.agent_id assert agent_id in AGENT_INSTANCES_BY_ID, f"System {agent_id} not found" agent = AGENT_INSTANCES_BY_ID[agent_id] diff --git a/llama_toolchain/batch_inference/api/api.py b/llama_toolchain/batch_inference/api/api.py index a02815388..3d67120dd 100644 --- a/llama_toolchain/batch_inference/api/api.py +++ b/llama_toolchain/batch_inference/api/api.py @@ -51,11 +51,21 @@ class BatchInference(Protocol): @webmethod(route="/batch_inference/completion") async def batch_completion( self, - request: BatchCompletionRequest, + model: str, + content_batch: List[InterleavedTextMedia], + sampling_params: Optional[SamplingParams] = SamplingParams(), + logprobs: Optional[LogProbConfig] = None, ) -> BatchCompletionResponse: ... @webmethod(route="/batch_inference/chat_completion") async def batch_chat_completion( self, - request: BatchChatCompletionRequest, + model: str, + messages_batch: List[List[Message]], + sampling_params: Optional[SamplingParams] = SamplingParams(), + # zero-shot tool definitions as input to the model + tools: Optional[List[ToolDefinition]] = list, + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + logprobs: Optional[LogProbConfig] = None, ) -> BatchChatCompletionResponse: ... diff --git a/llama_toolchain/dataset/api/api.py b/llama_toolchain/dataset/api/api.py index c22fc01b0..2fa8bb4e5 100644 --- a/llama_toolchain/dataset/api/api.py +++ b/llama_toolchain/dataset/api/api.py @@ -46,7 +46,8 @@ class Datasets(Protocol): @webmethod(route="/datasets/create") def create_dataset( self, - request: CreateDatasetRequest, + uuid: str, + dataset: TrainEvalDataset, ) -> None: ... @webmethod(route="/datasets/get") diff --git a/llama_toolchain/evaluations/api/api.py b/llama_toolchain/evaluations/api/api.py index b8f3fa825..898dc2822 100644 --- a/llama_toolchain/evaluations/api/api.py +++ b/llama_toolchain/evaluations/api/api.py @@ -86,19 +86,19 @@ class Evaluations(Protocol): @webmethod(route="/evaluate/text_generation/") def evaluate_text_generation( self, - request: EvaluateTextGenerationRequest, + metrics: List[TextGenerationMetric], ) -> EvaluationJob: ... @webmethod(route="/evaluate/question_answering/") def evaluate_question_answering( self, - request: EvaluateQuestionAnsweringRequest, + metrics: List[QuestionAnsweringMetric], ) -> EvaluationJob: ... @webmethod(route="/evaluate/summarization/") def evaluate_summarization( self, - request: EvaluateSummarizationRequest, + metrics: List[SummarizationMetric], ) -> EvaluationJob: ... @webmethod(route="/evaluate/jobs") diff --git a/llama_toolchain/inference/adapters/fireworks/fireworks.py b/llama_toolchain/inference/adapters/fireworks/fireworks.py index b0eb41017..e51a730de 100644 --- a/llama_toolchain/inference/adapters/fireworks/fireworks.py +++ b/llama_toolchain/inference/adapters/fireworks/fireworks.py @@ -76,7 +76,28 @@ class FireworksInferenceAdapter(Inference): return options - async def chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator: + async def chat_completion( + self, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + tools: Optional[List[ToolDefinition]] = list(), + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> AsyncGenerator: + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools, + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + messages = prepare_messages(request) # accumulate sampling params and other options to pass to fireworks diff --git a/llama_toolchain/inference/adapters/ollama/ollama.py b/llama_toolchain/inference/adapters/ollama/ollama.py index 375257ea9..92fbf7585 100644 --- a/llama_toolchain/inference/adapters/ollama/ollama.py +++ b/llama_toolchain/inference/adapters/ollama/ollama.py @@ -84,7 +84,28 @@ class OllamaInferenceAdapter(Inference): return options - async def chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator: + async def chat_completion( + self, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + tools: Optional[List[ToolDefinition]] = list(), + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> AsyncGenerator: + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools, + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + messages = prepare_messages(request) # accumulate sampling params and other options to pass to ollama options = self.get_ollama_chat_options(request) diff --git a/llama_toolchain/inference/adapters/tgi/tgi.py b/llama_toolchain/inference/adapters/tgi/tgi.py index bb7b99d02..7b1028817 100644 --- a/llama_toolchain/inference/adapters/tgi/tgi.py +++ b/llama_toolchain/inference/adapters/tgi/tgi.py @@ -82,7 +82,28 @@ class TGIAdapter(Inference): return options - async def chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator: + async def chat_completion( + self, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + tools: Optional[List[ToolDefinition]] = list(), + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> AsyncGenerator: + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools, + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + messages = prepare_messages(request) model_input = self.formatter.encode_dialog_prompt(messages) prompt = self.tokenizer.decode(model_input.tokens) diff --git a/llama_toolchain/inference/adapters/together/together.py b/llama_toolchain/inference/adapters/together/together.py index 4800de6ad..76403a85b 100644 --- a/llama_toolchain/inference/adapters/together/together.py +++ b/llama_toolchain/inference/adapters/together/together.py @@ -76,7 +76,29 @@ class TogetherInferenceAdapter(Inference): return options - async def chat_completion(self, request: ChatCompletionRequest) -> AsyncGenerator: + async def chat_completion( + self, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + tools: Optional[List[ToolDefinition]] = list(), + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> AsyncGenerator: + # wrapper request to make it easier to pass around (internal only, not exposed to API) + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools, + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + # accumulate sampling params and other options to pass to together options = self.get_together_chat_options(request) together_model = self.resolve_together_model(request.model) diff --git a/llama_toolchain/inference/api/api.py b/llama_toolchain/inference/api/api.py index 7298cb27b..8887d312f 100644 --- a/llama_toolchain/inference/api/api.py +++ b/llama_toolchain/inference/api/api.py @@ -85,6 +85,8 @@ class CompletionRequest(BaseModel): @json_schema_type class CompletionResponse(BaseModel): + """Completion response.""" + completion_message: CompletionMessage logprobs: Optional[List[TokenLogProbs]] = None @@ -108,6 +110,8 @@ class BatchCompletionRequest(BaseModel): @json_schema_type class BatchCompletionResponse(BaseModel): + """Batch completion response.""" + completion_message_batch: List[CompletionMessage] @@ -137,6 +141,8 @@ class ChatCompletionResponseStreamChunk(BaseModel): @json_schema_type class ChatCompletionResponse(BaseModel): + """Chat completion response.""" + completion_message: CompletionMessage logprobs: Optional[List[TokenLogProbs]] = None @@ -170,13 +176,25 @@ class Inference(Protocol): @webmethod(route="/inference/completion") async def completion( self, - request: CompletionRequest, + model: str, + content: InterleavedTextMedia, + sampling_params: Optional[SamplingParams] = SamplingParams(), + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ... @webmethod(route="/inference/chat_completion") async def chat_completion( self, - request: ChatCompletionRequest, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + # zero-shot tool definitions as input to the model + tools: Optional[List[ToolDefinition]] = list, + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ... @webmethod(route="/inference/embeddings") diff --git a/llama_toolchain/inference/client.py b/llama_toolchain/inference/client.py index 5ba9314bc..c57433a8f 100644 --- a/llama_toolchain/inference/client.py +++ b/llama_toolchain/inference/client.py @@ -10,10 +10,10 @@ from typing import Any, AsyncGenerator import fire import httpx -from pydantic import BaseModel -from termcolor import cprint from llama_toolchain.core.datatypes import RemoteProviderConfig +from pydantic import BaseModel +from termcolor import cprint from .api import ( ChatCompletionRequest, @@ -52,9 +52,7 @@ class InferenceClient(Inference): async with client.stream( "POST", f"{self.base_url}/inference/chat_completion", - json={ - "request": encodable_dict(request), - }, + json=encodable_dict(request), headers={"Content-Type": "application/json"}, timeout=20, ) as response: diff --git a/llama_toolchain/inference/meta_reference/inference.py b/llama_toolchain/inference/meta_reference/inference.py index 187d5baae..247c08f23 100644 --- a/llama_toolchain/inference/meta_reference/inference.py +++ b/llama_toolchain/inference/meta_reference/inference.py @@ -22,9 +22,12 @@ from llama_toolchain.inference.api import ( ToolCallParseStatus, ) from llama_toolchain.inference.prepare_messages import prepare_messages + from .config import MetaReferenceImplConfig from .model_parallel import LlamaModelParallelGenerator +from llama_models.llama3.api.datatypes import * # noqa: F403 +from llama_toolchain.inference.api import * # noqa: F403 # there's a single model parallel process running serving the model. for now, # we don't support multiple concurrent requests to this process. @@ -50,10 +53,30 @@ class MetaReferenceInferenceImpl(Inference): # hm, when stream=False, we should not be doing SSE :/ which is what the # top-level server is going to do. make the typing more specific here async def chat_completion( - self, request: ChatCompletionRequest + self, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + tools: Optional[List[ToolDefinition]] = list(), + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, ) -> AsyncIterator[ Union[ChatCompletionResponseStreamChunk, ChatCompletionResponse] ]: + # wrapper request to make it easier to pass around (internal only, not exposed to API) + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools, + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + messages = prepare_messages(request) model = resolve_model(request.model) if model is None: diff --git a/llama_toolchain/post_training/api/api.py b/llama_toolchain/post_training/api/api.py index 447a729fb..378515f83 100644 --- a/llama_toolchain/post_training/api/api.py +++ b/llama_toolchain/post_training/api/api.py @@ -179,13 +179,33 @@ class PostTraining(Protocol): @webmethod(route="/post_training/supervised_fine_tune") def supervised_fine_tune( self, - request: PostTrainingSFTRequest, + job_uuid: str, + model: str, + dataset: TrainEvalDataset, + validation_dataset: TrainEvalDataset, + algorithm: FinetuningAlgorithm, + algorithm_config: Union[ + LoraFinetuningConfig, QLoraFinetuningConfig, DoraFinetuningConfig + ], + optimizer_config: OptimizerConfig, + training_config: TrainingConfig, + hyperparam_search_config: Dict[str, Any], + logger_config: Dict[str, Any], ) -> PostTrainingJob: ... @webmethod(route="/post_training/preference_optimize") def preference_optimize( self, - request: PostTrainingRLHFRequest, + job_uuid: str, + finetuned_model: URL, + dataset: TrainEvalDataset, + validation_dataset: TrainEvalDataset, + algorithm: RLHFAlgorithm, + algorithm_config: Union[DPOAlignmentConfig], + optimizer_config: OptimizerConfig, + training_config: TrainingConfig, + hyperparam_search_config: Dict[str, Any], + logger_config: Dict[str, Any], ) -> PostTrainingJob: ... @webmethod(route="/post_training/jobs") diff --git a/llama_toolchain/reward_scoring/api/api.py b/llama_toolchain/reward_scoring/api/api.py index c91931f09..9d689f232 100644 --- a/llama_toolchain/reward_scoring/api/api.py +++ b/llama_toolchain/reward_scoring/api/api.py @@ -50,5 +50,6 @@ class RewardScoring(Protocol): @webmethod(route="/reward_scoring/score") def reward_score( self, - request: RewardScoringRequest, + dialog_generations: List[DialogGenerations], + model: str, ) -> Union[RewardScoringResponse]: ... diff --git a/llama_toolchain/safety/api/api.py b/llama_toolchain/safety/api/api.py index 96682d172..631cfa992 100644 --- a/llama_toolchain/safety/api/api.py +++ b/llama_toolchain/safety/api/api.py @@ -86,5 +86,6 @@ class Safety(Protocol): @webmethod(route="/safety/run_shields") async def run_shields( self, - request: RunShieldRequest, + messages: List[Message], + shields: List[ShieldDefinition], ) -> RunShieldResponse: ... diff --git a/llama_toolchain/safety/client.py b/llama_toolchain/safety/client.py index 0cf7deae8..26a9813b3 100644 --- a/llama_toolchain/safety/client.py +++ b/llama_toolchain/safety/client.py @@ -13,10 +13,10 @@ import fire import httpx from llama_models.llama3.api.datatypes import UserMessage -from pydantic import BaseModel -from termcolor import cprint from llama_toolchain.core.datatypes import RemoteProviderConfig +from pydantic import BaseModel +from termcolor import cprint from .api import * # noqa: F403 @@ -43,9 +43,7 @@ class SafetyClient(Safety): async with httpx.AsyncClient() as client: response = await client.post( f"{self.base_url}/safety/run_shields", - json={ - "request": encodable_dict(request), - }, + json=encodable_dict(request), headers={"Content-Type": "application/json"}, timeout=20, ) diff --git a/llama_toolchain/safety/meta_reference/safety.py b/llama_toolchain/safety/meta_reference/safety.py index e71ac09a2..6c75e74e8 100644 --- a/llama_toolchain/safety/meta_reference/safety.py +++ b/llama_toolchain/safety/meta_reference/safety.py @@ -52,13 +52,12 @@ class MetaReferenceSafetyImpl(Safety): async def run_shields( self, - request: RunShieldRequest, + messages: List[Message], + shields: List[ShieldDefinition], ) -> RunShieldResponse: - shields = [shield_config_to_shield(c, self.config) for c in request.shields] + shields = [shield_config_to_shield(c, self.config) for c in shields] - responses = await asyncio.gather( - *[shield.run(request.messages) for shield in shields] - ) + responses = await asyncio.gather(*[shield.run(messages) for shield in shields]) return RunShieldResponse(responses=responses) diff --git a/llama_toolchain/synthetic_data_generation/api/api.py b/llama_toolchain/synthetic_data_generation/api/api.py index 44b8327a9..9a6c487af 100644 --- a/llama_toolchain/synthetic_data_generation/api/api.py +++ b/llama_toolchain/synthetic_data_generation/api/api.py @@ -48,5 +48,7 @@ class SyntheticDataGeneration(Protocol): @webmethod(route="/synthetic_data_generation/generate") def synthetic_data_generate( self, - request: SyntheticDataGenerationRequest, + dialogs: List[Message], + filtering_function: FilteringFunction = FilteringFunction.none, + model: Optional[str] = None, ) -> Union[SyntheticDataGenerationResponse]: ... diff --git a/llama_toolchain/telemetry/api/api.py b/llama_toolchain/telemetry/api/api.py index 100836b46..2546c1ede 100644 --- a/llama_toolchain/telemetry/api/api.py +++ b/llama_toolchain/telemetry/api/api.py @@ -125,7 +125,7 @@ Event = Annotated[ class Telemetry(Protocol): @webmethod(route="/telemetry/log_event") - async def log_event(self, event: Event): ... + async def log_event(self, event: Event) -> None: ... @webmethod(route="/telemetry/get_trace", method="GET") async def get_trace(self, trace_id: str) -> Trace: ... diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html index 211290ce1..6e7fe287f 100644 --- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html +++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-10 16:42:15.870336" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-09-11 16:05:23.016090" }, "servers": [ { @@ -51,7 +51,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/BatchChatCompletionRequestWrapper" + "$ref": "#/components/schemas/BatchChatCompletionRequest" } } }, @@ -81,7 +81,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/BatchCompletionRequestWrapper" + "$ref": "#/components/schemas/BatchCompletionRequest" } } }, @@ -139,11 +139,18 @@ "post": { "responses": { "200": { - "description": "SSE-stream of these events.", + "description": "Chat completion response. **OR** SSE-stream of these events.", "content": { "text/event-stream": { "schema": { - "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" + "oneOf": [ + { + "$ref": "#/components/schemas/ChatCompletionResponse" + }, + { + "$ref": "#/components/schemas/ChatCompletionResponseStreamChunk" + } + ] } } } @@ -157,7 +164,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/ChatCompletionRequestWrapper" + "$ref": "#/components/schemas/ChatCompletionRequest" } } }, @@ -169,11 +176,18 @@ "post": { "responses": { "200": { - "description": "streamed completion response.", + "description": "Completion response. **OR** streamed completion response.", "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CompletionResponseStreamChunk" + "oneOf": [ + { + "$ref": "#/components/schemas/CompletionResponse" + }, + { + "$ref": "#/components/schemas/CompletionResponseStreamChunk" + } + ] } } } @@ -187,7 +201,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CompletionRequestWrapper" + "$ref": "#/components/schemas/CompletionRequest" } } }, @@ -277,7 +291,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/AgenticSystemTurnCreateRequestWrapper" + "$ref": "#/components/schemas/CreateAgenticSystemTurnRequest" } } }, @@ -300,37 +314,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/CreateDatasetRequestWrapper" - } - } - }, - "required": true - } - } - }, - "/experiments/create": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Experiment" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateExperimentRequestWrapper" + "$ref": "#/components/schemas/CreateDatasetRequest" } } }, @@ -368,36 +352,6 @@ } } }, - "/experiments/create_run": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Run" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateRunRequestWrapper" - } - } - }, - "required": true - } - } - }, "/agentic_system/delete": { "post": { "responses": { @@ -572,7 +526,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequestWrapper" + "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest" } } }, @@ -602,7 +556,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/EvaluateSummarizationRequestWrapper" + "$ref": "#/components/schemas/EvaluateSummarizationRequest" } } }, @@ -632,7 +586,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/EvaluateTextGenerationRequestWrapper" + "$ref": "#/components/schemas/EvaluateTextGenerationRequest" } } }, @@ -769,35 +723,6 @@ ] } }, - "/artifacts/get": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Artifact" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [ - { - "name": "artifact_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, "/datasets/get": { "get": { "responses": { @@ -973,65 +898,6 @@ "parameters": [] } }, - "/experiments/get": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Experiment" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [ - { - "name": "experiment_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ] - } - }, - "/logging/get_logs": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/Log" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/LogSearchRequestWrapper" - } - } - }, - "required": true - } - } - }, "/memory_banks/get": { "get": { "responses": { @@ -1068,15 +934,15 @@ ] } }, - "/runs/metrics": { + "/telemetry/get_trace": { "get": { "responses": { "200": { "description": "OK", "content": { - "application/jsonl": { + "application/json": { "schema": { - "$ref": "#/components/schemas/Metric" + "$ref": "#/components/schemas/Trace" } } } @@ -1087,7 +953,7 @@ ], "parameters": [ { - "name": "run_id", + "name": "trace_id", "in": "query", "required": true, "schema": { @@ -1227,56 +1093,6 @@ } } }, - "/experiments/artifacts/get": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/Artifact" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ListArtifactsRequest" - } - } - }, - "required": true - } - } - }, - "/experiments/list": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/Experiment" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [] - } - }, "/memory_banks/list": { "get": { "responses": { @@ -1297,7 +1113,7 @@ "parameters": [] } }, - "/logging/log_messages": { + "/telemetry/log_event": { "post": { "responses": { "200": { @@ -1312,30 +1128,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/LogMessagesRequestWrapper" - } - } - }, - "required": true - } - } - }, - "/runs/log_metrics": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/LogMetricsRequestWrapper" + "$ref": "#/components/schemas/LogEventRequest" } } }, @@ -1365,7 +1158,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/PostTrainingRLHFRequestWrapper" + "$ref": "#/components/schemas/PreferenceOptimizeRequest" } } }, @@ -1425,7 +1218,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RewardScoringRequestWrapper" + "$ref": "#/components/schemas/RewardScoreRequest" } } }, @@ -1455,7 +1248,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/RunShieldRequestWrapper" + "$ref": "#/components/schemas/RunShieldsRequest" } } }, @@ -1485,7 +1278,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/PostTrainingSFTRequestWrapper" + "$ref": "#/components/schemas/SupervisedFineTuneRequest" } } }, @@ -1515,7 +1308,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/SyntheticDataGenerationRequestWrapper" + "$ref": "#/components/schemas/SyntheticDataGenerateRequest" } } }, @@ -1545,160 +1338,11 @@ "required": true } } - }, - "/experiments/update": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Experiment" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UpdateExperimentRequestWrapper" - } - } - }, - "required": true - } - } - }, - "/runs/update": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Run" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UpdateRunRequestWrapper" - } - } - }, - "required": true - } - } - }, - "/experiments/artifacts/upload": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Artifact" - } - } - } - } - }, - "tags": [ - "Telemetry" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/UploadArtifactRequestWrapper" - } - } - }, - "required": true - } - } } }, "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { - "BatchChatCompletionRequest": { - "type": "object", - "properties": { - "model": { - "type": "string" - }, - "messages_batch": { - "type": "array", - "items": { - "type": "array", - "items": { - "oneOf": [ - { - "$ref": "#/components/schemas/UserMessage" - }, - { - "$ref": "#/components/schemas/SystemMessage" - }, - { - "$ref": "#/components/schemas/ToolResponseMessage" - }, - { - "$ref": "#/components/schemas/CompletionMessage" - } - ] - } - } - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "tools": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolDefinition" - } - }, - "tool_choice": { - "$ref": "#/components/schemas/ToolChoice" - }, - "tool_prompt_format": { - "$ref": "#/components/schemas/ToolPromptFormat" - }, - "logprobs": { - "type": "object", - "properties": { - "top_k": { - "type": "integer" - } - }, - "additionalProperties": false - } - }, - "additionalProperties": false, - "required": [ - "model", - "messages_batch" - ] - }, "BuiltinTool": { "type": "string", "enum": [ @@ -2050,16 +1694,63 @@ "content" ] }, - "BatchChatCompletionRequestWrapper": { + "BatchChatCompletionRequest": { "type": "object", "properties": { - "request": { - "$ref": "#/components/schemas/BatchChatCompletionRequest" + "model": { + "type": "string" + }, + "messages_batch": { + "type": "array", + "items": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/ToolResponseMessage" + }, + { + "$ref": "#/components/schemas/CompletionMessage" + } + ] + } + } + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolDefinition" + } + }, + "tool_choice": { + "$ref": "#/components/schemas/ToolChoice" + }, + "tool_prompt_format": { + "$ref": "#/components/schemas/ToolPromptFormat" + }, + "logprobs": { + "type": "object", + "properties": { + "top_k": { + "type": "integer" + } + }, + "additionalProperties": false } }, "additionalProperties": false, "required": [ - "request" + "model", + "messages_batch" ] }, "BatchChatCompletionResponse": { @@ -2118,18 +1809,6 @@ "content_batch" ] }, - "BatchCompletionRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/BatchCompletionRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, "BatchCompletionResponse": { "type": "object", "properties": { @@ -2228,17 +1907,24 @@ "messages" ] }, - "ChatCompletionRequestWrapper": { + "ChatCompletionResponse": { "type": "object", "properties": { - "request": { - "$ref": "#/components/schemas/ChatCompletionRequest" + "completion_message": { + "$ref": "#/components/schemas/CompletionMessage" + }, + "logprobs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TokenLogProbs" + } } }, "additionalProperties": false, "required": [ - "request" - ] + "completion_message" + ], + "title": "Chat completion response." }, "ChatCompletionResponseEvent": { "type": "object", @@ -2382,17 +2068,24 @@ "content" ] }, - "CompletionRequestWrapper": { + "CompletionResponse": { "type": "object", "properties": { - "request": { - "$ref": "#/components/schemas/CompletionRequest" + "completion_message": { + "$ref": "#/components/schemas/CompletionMessage" + }, + "logprobs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TokenLogProbs" + } } }, "additionalProperties": false, "required": [ - "request" - ] + "completion_message" + ], + "title": "Completion response." }, "CompletionResponseStreamChunk": { "type": "object", @@ -3031,234 +2724,38 @@ "session_id" ] }, - "AgenticSystemTurnCreateRequest": { + "Attachment": { "type": "object", "properties": { - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "input_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldDefinition" - } - }, - "output_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldDefinition" - } - }, - "tools": { - "type": "array", - "items": { - "oneOf": [ - { - "$ref": "#/components/schemas/SearchToolDefinition" - }, - { - "$ref": "#/components/schemas/WolframAlphaToolDefinition" - }, - { - "$ref": "#/components/schemas/PhotogenToolDefinition" - }, - { - "$ref": "#/components/schemas/CodeInterpreterToolDefinition" - }, - { - "$ref": "#/components/schemas/FunctionCallToolDefinition" - }, - { - "type": "object", - "properties": { - "input_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldDefinition" - } - }, - "output_shields": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ShieldDefinition" - } - }, - "type": { - "type": "string", - "const": "memory" - }, - "memory_bank_configs": { - "type": "array", - "items": { - "oneOf": [ - { - "type": "object", - "properties": { - "bank_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "vector" - } - }, - "additionalProperties": false, - "required": [ - "bank_id", - "type" - ] - }, - { - "type": "object", - "properties": { - "bank_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "keyvalue" - }, - "keys": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false, - "required": [ - "bank_id", - "type", - "keys" - ] - }, - { - "type": "object", - "properties": { - "bank_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "keyword" - } - }, - "additionalProperties": false, - "required": [ - "bank_id", - "type" - ] - }, - { - "type": "object", - "properties": { - "bank_id": { - "type": "string" - }, - "type": { - "type": "string", - "const": "graph" - }, - "entities": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false, - "required": [ - "bank_id", - "type", - "entities" - ] - } - ] - } - }, - "query_generator_config": { - "oneOf": [ - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "default" - }, - "sep": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "type", - "sep" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "llm" - }, - "model": { - "type": "string" - }, - "template": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "type", - "model", - "template" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "custom" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - } - ] - }, - "max_tokens_in_context": { - "type": "integer" - }, - "max_chunks": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "type", - "memory_bank_configs", - "query_generator_config", - "max_tokens_in_context", - "max_chunks" - ] + "content": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" } - ] - } + }, + { + "$ref": "#/components/schemas/URL" + } + ] }, - "tool_choice": { - "$ref": "#/components/schemas/ToolChoice" - }, - "tool_prompt_format": { - "$ref": "#/components/schemas/ToolPromptFormat" - }, - "instructions": { + "mime_type": { "type": "string" - }, + } + }, + "additionalProperties": false, + "required": [ + "content", + "mime_type" + ] + }, + "CreateAgenticSystemTurnRequest": { + "type": "object", + "properties": { "agent_id": { "type": "string" }, @@ -3295,47 +2792,6 @@ "messages" ] }, - "Attachment": { - "type": "object", - "properties": { - "content": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - }, - { - "$ref": "#/components/schemas/URL" - } - ] - }, - "mime_type": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "content", - "mime_type" - ] - }, - "AgenticSystemTurnCreateRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/AgenticSystemTurnCreateRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, "AgenticSystemTurnResponseEvent": { "type": "object", "properties": { @@ -3835,23 +3291,6 @@ ], "title": "A single turn in an interaction with an Agentic System." }, - "CreateDatasetRequest": { - "type": "object", - "properties": { - "uuid": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/TrainEvalDataset" - } - }, - "additionalProperties": false, - "required": [ - "uuid", - "dataset" - ], - "title": "Request to create a dataset." - }, "TrainEvalDataset": { "type": "object", "properties": { @@ -3907,130 +3346,20 @@ "json" ] }, - "CreateDatasetRequestWrapper": { + "CreateDatasetRequest": { "type": "object", "properties": { - "request": { - "$ref": "#/components/schemas/CreateDatasetRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, - "CreateExperimentRequest": { - "type": "object", - "properties": { - "name": { + "uuid": { "type": "string" }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } + "dataset": { + "$ref": "#/components/schemas/TrainEvalDataset" } }, "additionalProperties": false, "required": [ - "name" - ] - }, - "CreateExperimentRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/CreateExperimentRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, - "Experiment": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "status": { - "$ref": "#/components/schemas/ExperimentStatus" - }, - "created_at": { - "type": "string", - "format": "date-time" - }, - "updated_at": { - "type": "string", - "format": "date-time" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "id", - "name", - "status", - "created_at", - "updated_at", - "metadata" - ] - }, - "ExperimentStatus": { - "type": "string", - "enum": [ - "not_started", - "running", - "completed", - "failed" + "uuid", + "dataset" ] }, "CreateMemoryBankRequest": { @@ -4203,110 +3532,6 @@ "config" ] }, - "CreateRunRequest": { - "type": "object", - "properties": { - "experiment_id": { - "type": "string" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "experiment_id" - ] - }, - "CreateRunRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/CreateRunRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, - "Run": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "experiment_id": { - "type": "string" - }, - "status": { - "type": "string" - }, - "started_at": { - "type": "string", - "format": "date-time" - }, - "ended_at": { - "type": "string", - "format": "date-time" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "id", - "experiment_id", - "status", - "started_at", - "metadata" - ] - }, "DeleteAgenticSystemRequest": { "type": "object", "properties": { @@ -4425,24 +3650,9 @@ "embeddings" ] }, - "Checkpoint": { - "description": "Checkpoint created during training runs" - }, "EvaluateQuestionAnsweringRequest": { "type": "object", "properties": { - "job_uuid": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/TrainEvalDataset" - }, - "checkpoint": { - "$ref": "#/components/schemas/Checkpoint" - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, "metrics": { "type": "array", "items": { @@ -4456,24 +3666,7 @@ }, "additionalProperties": false, "required": [ - "job_uuid", - "dataset", - "checkpoint", - "sampling_params", "metrics" - ], - "title": "Request to evaluate question answering." - }, - "EvaluateQuestionAnsweringRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" ] }, "EvaluationJob": { @@ -4491,18 +3684,6 @@ "EvaluateSummarizationRequest": { "type": "object", "properties": { - "job_uuid": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/TrainEvalDataset" - }, - "checkpoint": { - "$ref": "#/components/schemas/Checkpoint" - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, "metrics": { "type": "array", "items": { @@ -4516,41 +3697,12 @@ }, "additionalProperties": false, "required": [ - "job_uuid", - "dataset", - "checkpoint", - "sampling_params", "metrics" - ], - "title": "Request to evaluate summarization." - }, - "EvaluateSummarizationRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/EvaluateSummarizationRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" ] }, "EvaluateTextGenerationRequest": { "type": "object", "properties": { - "job_uuid": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/TrainEvalDataset" - }, - "checkpoint": { - "$ref": "#/components/schemas/Checkpoint" - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, "metrics": { "type": "array", "items": { @@ -4565,24 +3717,7 @@ }, "additionalProperties": false, "required": [ - "job_uuid", - "dataset", - "checkpoint", - "sampling_params", "metrics" - ], - "title": "Request to evaluate text generation." - }, - "EvaluateTextGenerationRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/EvaluateTextGenerationRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" ] }, "GetAgenticSystemSessionRequest": { @@ -4654,74 +3789,6 @@ "step" ] }, - "Artifact": { - "type": "object", - "properties": { - "id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "type": { - "$ref": "#/components/schemas/ArtifactType" - }, - "size": { - "type": "integer" - }, - "created_at": { - "type": "string", - "format": "date-time" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "id", - "name", - "type", - "size", - "created_at", - "metadata" - ] - }, - "ArtifactType": { - "type": "string", - "enum": [ - "model", - "dataset", - "checkpoint", - "plot", - "metric", - "config", - "code", - "other" - ] - }, "GetDocumentsRequest": { "type": "object", "properties": { @@ -4833,140 +3900,34 @@ "job_uuid" ] }, - "LogSearchRequest": { + "Trace": { "type": "object", "properties": { - "query": { + "trace_id": { "type": "string" }, - "filters": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "query" - ] - }, - "LogSearchRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/LogSearchRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, - "Log": { - "type": "object", - "properties": { - "message": { + "root_span_id": { "type": "string" }, - "level": { - "type": "string" - }, - "timestamp": { + "start_time": { "type": "string", "format": "date-time" }, - "additional_info": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "message", - "level", - "timestamp", - "additional_info" - ] - }, - "Metric": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "value": { - "oneOf": [ - { - "type": "number" - }, - { - "type": "integer" - }, - { - "type": "string" - }, - { - "type": "boolean" - } - ] - }, - "timestamp": { + "end_time": { "type": "string", "format": "date-time" - }, - "run_id": { - "type": "string" } }, "additionalProperties": false, "required": [ - "name", - "value", - "timestamp", - "run_id" + "trace_id", + "root_span_id", + "start_time" ] }, + "Checkpoint": { + "description": "Checkpoint created during training runs" + }, "PostTrainingJobArtifactsResponse": { "type": "object", "properties": { @@ -5111,77 +4072,272 @@ "documents" ] }, - "ListArtifactsRequest": { - "type": "object", - "properties": { - "experiment_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "experiment_id" + "LogSeverity": { + "type": "string", + "enum": [ + "verbose", + "debug", + "info", + "warn", + "error", + "critical" ] }, - "LogMessagesRequest": { + "MetricEvent": { "type": "object", "properties": { - "logs": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Log" - } - }, - "run_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "logs" - ] - }, - "LogMessagesRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/LogMessagesRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, - "LogMetricsRequest": { - "type": "object", - "properties": { - "run_id": { + "trace_id": { "type": "string" }, - "metrics": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Metric" + "span_id": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] } + }, + "type": { + "type": "string", + "const": "metric" + }, + "metric": { + "type": "string" + }, + "value": { + "oneOf": [ + { + "type": "integer" + }, + { + "type": "number" + } + ] + }, + "unit": { + "type": "string" } }, "additionalProperties": false, "required": [ - "run_id", - "metrics" + "trace_id", + "span_id", + "timestamp", + "type", + "metric", + "value", + "unit" ] }, - "LogMetricsRequestWrapper": { + "SpanEndPayload": { "type": "object", "properties": { - "request": { - "$ref": "#/components/schemas/LogMetricsRequest" + "type": { + "type": "string", + "const": "span_end" + }, + "status": { + "$ref": "#/components/schemas/SpanStatus" } }, "additionalProperties": false, "required": [ - "request" + "type", + "status" + ] + }, + "SpanStartPayload": { + "type": "object", + "properties": { + "type": { + "type": "string", + "const": "span_start" + }, + "name": { + "type": "string" + }, + "parent_span_id": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "type", + "name" + ] + }, + "SpanStatus": { + "type": "string", + "enum": [ + "ok", + "error" + ] + }, + "StructuredLogEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string" + }, + "span_id": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "type": { + "type": "string", + "const": "structured_log" + }, + "payload": { + "oneOf": [ + { + "$ref": "#/components/schemas/SpanStartPayload" + }, + { + "$ref": "#/components/schemas/SpanEndPayload" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "payload" + ] + }, + "UnstructuredLogEvent": { + "type": "object", + "properties": { + "trace_id": { + "type": "string" + }, + "span_id": { + "type": "string" + }, + "timestamp": { + "type": "string", + "format": "date-time" + }, + "attributes": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + }, + "type": { + "type": "string", + "const": "unstructured_log" + }, + "message": { + "type": "string" + }, + "severity": { + "$ref": "#/components/schemas/LogSeverity" + } + }, + "additionalProperties": false, + "required": [ + "trace_id", + "span_id", + "timestamp", + "type", + "message", + "severity" + ] + }, + "LogEventRequest": { + "type": "object", + "properties": { + "event": { + "oneOf": [ + { + "$ref": "#/components/schemas/UnstructuredLogEvent" + }, + { + "$ref": "#/components/schemas/MetricEvent" + }, + { + "$ref": "#/components/schemas/StructuredLogEvent" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "event" ] }, "DPOAlignmentConfig": { @@ -5237,7 +4393,49 @@ "weight_decay" ] }, - "PostTrainingRLHFRequest": { + "RLHFAlgorithm": { + "type": "string", + "enum": [ + "dpo" + ] + }, + "TrainingConfig": { + "type": "object", + "properties": { + "n_epochs": { + "type": "integer" + }, + "batch_size": { + "type": "integer" + }, + "shuffle": { + "type": "boolean" + }, + "n_iters": { + "type": "integer" + }, + "enable_activation_checkpointing": { + "type": "boolean" + }, + "memory_efficient_fsdp_wrap": { + "type": "boolean" + }, + "fsdp_cpu_offload": { + "type": "boolean" + } + }, + "additionalProperties": false, + "required": [ + "n_epochs", + "batch_size", + "shuffle", + "n_iters", + "enable_activation_checkpointing", + "memory_efficient_fsdp_wrap", + "fsdp_cpu_offload" + ] + }, + "PreferenceOptimizeRequest": { "type": "object", "properties": { "job_uuid": { @@ -5327,61 +4525,6 @@ "training_config", "hyperparam_search_config", "logger_config" - ], - "title": "Request to finetune a model." - }, - "RLHFAlgorithm": { - "type": "string", - "enum": [ - "dpo" - ] - }, - "TrainingConfig": { - "type": "object", - "properties": { - "n_epochs": { - "type": "integer" - }, - "batch_size": { - "type": "integer" - }, - "shuffle": { - "type": "boolean" - }, - "n_iters": { - "type": "integer" - }, - "enable_activation_checkpointing": { - "type": "boolean" - }, - "memory_efficient_fsdp_wrap": { - "type": "boolean" - }, - "fsdp_cpu_offload": { - "type": "boolean" - } - }, - "additionalProperties": false, - "required": [ - "n_epochs", - "batch_size", - "shuffle", - "n_iters", - "enable_activation_checkpointing", - "memory_efficient_fsdp_wrap", - "fsdp_cpu_offload" - ] - }, - "PostTrainingRLHFRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/PostTrainingRLHFRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" ] }, "QueryDocumentsRequest": { @@ -5532,7 +4675,7 @@ "sampled_generations" ] }, - "RewardScoringRequest": { + "RewardScoreRequest": { "type": "object", "properties": { "dialog_generations": { @@ -5549,19 +4692,6 @@ "required": [ "dialog_generations", "model" - ], - "title": "Request to score a reward function. A list of prompts and a list of responses per prompt." - }, - "RewardScoringRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/RewardScoringRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" ] }, "RewardScoringResponse": { @@ -5644,7 +4774,7 @@ "score" ] }, - "RunShieldRequest": { + "RunShieldsRequest": { "type": "object", "properties": { "messages": { @@ -5679,18 +4809,6 @@ "shields" ] }, - "RunShieldRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/RunShieldRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, "RunShieldResponse": { "type": "object", "properties": { @@ -5777,7 +4895,38 @@ "alpha" ] }, - "PostTrainingSFTRequest": { + "QLoraFinetuningConfig": { + "type": "object", + "properties": { + "lora_attn_modules": { + "type": "array", + "items": { + "type": "string" + } + }, + "apply_lora_to_mlp": { + "type": "boolean" + }, + "apply_lora_to_output": { + "type": "boolean" + }, + "rank": { + "type": "integer" + }, + "alpha": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "lora_attn_modules", + "apply_lora_to_mlp", + "apply_lora_to_output", + "rank", + "alpha" + ] + }, + "SupervisedFineTuneRequest": { "type": "object", "properties": { "job_uuid": { @@ -5877,53 +5026,9 @@ "training_config", "hyperparam_search_config", "logger_config" - ], - "title": "Request to finetune a model." - }, - "QLoraFinetuningConfig": { - "type": "object", - "properties": { - "lora_attn_modules": { - "type": "array", - "items": { - "type": "string" - } - }, - "apply_lora_to_mlp": { - "type": "boolean" - }, - "apply_lora_to_output": { - "type": "boolean" - }, - "rank": { - "type": "integer" - }, - "alpha": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "lora_attn_modules", - "apply_lora_to_mlp", - "apply_lora_to_output", - "rank", - "alpha" ] }, - "PostTrainingSFTRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/PostTrainingSFTRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, - "SyntheticDataGenerationRequest": { + "SyntheticDataGenerateRequest": { "type": "object", "properties": { "dialogs": { @@ -5965,19 +5070,6 @@ "required": [ "dialogs", "filtering_function" - ], - "title": "Request to generate synthetic data. A small batch of prompts and a filtering function" - }, - "SyntheticDataGenerationRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/SyntheticDataGenerationRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" ] }, "SyntheticDataGenerationResponse": { @@ -6039,176 +5131,6 @@ "bank_id", "documents" ] - }, - "UpdateExperimentRequest": { - "type": "object", - "properties": { - "experiment_id": { - "type": "string" - }, - "status": { - "$ref": "#/components/schemas/ExperimentStatus" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "experiment_id" - ] - }, - "UpdateExperimentRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/UpdateExperimentRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, - "UpdateRunRequest": { - "type": "object", - "properties": { - "run_id": { - "type": "string" - }, - "status": { - "type": "string" - }, - "ended_at": { - "type": "string", - "format": "date-time" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "run_id" - ] - }, - "UpdateRunRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/UpdateRunRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] - }, - "UploadArtifactRequest": { - "type": "object", - "properties": { - "experiment_id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "artifact_type": { - "type": "string" - }, - "content": { - "type": "string", - "contentEncoding": "base64" - }, - "metadata": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "experiment_id", - "name", - "artifact_type", - "content" - ] - }, - "UploadArtifactRequestWrapper": { - "type": "object", - "properties": { - "request": { - "$ref": "#/components/schemas/UploadArtifactRequest" - } - }, - "additionalProperties": false, - "required": [ - "request" - ] } }, "responses": {} @@ -6219,42 +5141,38 @@ } ], "tags": [ - { - "name": "BatchInference" - }, - { - "name": "Safety" - }, - { - "name": "Telemetry" - }, - { - "name": "RewardScoring" - }, - { - "name": "Memory" - }, { "name": "SyntheticDataGeneration" }, - { - "name": "Inference" - }, - { - "name": "Evaluations" - }, - { - "name": "PostTraining" - }, { "name": "Datasets" }, { - "name": "AgenticSystem" + "name": "Evaluations" }, { - "name": "BatchChatCompletionRequest", - "description": "" + "name": "Safety" + }, + { + "name": "Inference" + }, + { + "name": "Telemetry" + }, + { + "name": "PostTraining" + }, + { + "name": "Memory" + }, + { + "name": "RewardScoring" + }, + { + "name": "BatchInference" + }, + { + "name": "AgenticSystem" }, { "name": "BuiltinTool", @@ -6309,8 +5227,8 @@ "description": "" }, { - "name": "BatchChatCompletionRequestWrapper", - "description": "" + "name": "BatchChatCompletionRequest", + "description": "" }, { "name": "BatchChatCompletionResponse", @@ -6320,10 +5238,6 @@ "name": "BatchCompletionRequest", "description": "" }, - { - "name": "BatchCompletionRequestWrapper", - "description": "" - }, { "name": "BatchCompletionResponse", "description": "" @@ -6341,8 +5255,8 @@ "description": "" }, { - "name": "ChatCompletionRequestWrapper", - "description": "" + "name": "ChatCompletionResponse", + "description": "Chat completion response.\n\n" }, { "name": "ChatCompletionResponseEvent", @@ -6373,8 +5287,8 @@ "description": "" }, { - "name": "CompletionRequestWrapper", - "description": "" + "name": "CompletionResponse", + "description": "Completion response.\n\n" }, { "name": "CompletionResponseStreamChunk", @@ -6444,17 +5358,13 @@ "name": "AgenticSystemSessionCreateResponse", "description": "" }, - { - "name": "AgenticSystemTurnCreateRequest", - "description": "" - }, { "name": "Attachment", "description": "" }, { - "name": "AgenticSystemTurnCreateRequestWrapper", - "description": "" + "name": "CreateAgenticSystemTurnRequest", + "description": "" }, { "name": "AgenticSystemTurnResponseEvent", @@ -6512,10 +5422,6 @@ "name": "Turn", "description": "A single turn in an interaction with an Agentic System.\n\n" }, - { - "name": "CreateDatasetRequest", - "description": "Request to create a dataset.\n\n" - }, { "name": "TrainEvalDataset", "description": "Dataset to be used for training or evaluating language models.\n\n" @@ -6525,24 +5431,8 @@ "description": "" }, { - "name": "CreateDatasetRequestWrapper", - "description": "" - }, - { - "name": "CreateExperimentRequest", - "description": "" - }, - { - "name": "CreateExperimentRequestWrapper", - "description": "" - }, - { - "name": "Experiment", - "description": "" - }, - { - "name": "ExperimentStatus", - "description": "" + "name": "CreateDatasetRequest", + "description": "" }, { "name": "CreateMemoryBankRequest", @@ -6552,18 +5442,6 @@ "name": "MemoryBank", "description": "" }, - { - "name": "CreateRunRequest", - "description": "" - }, - { - "name": "CreateRunRequestWrapper", - "description": "" - }, - { - "name": "Run", - "description": "" - }, { "name": "DeleteAgenticSystemRequest", "description": "" @@ -6592,17 +5470,9 @@ "name": "EmbeddingsResponse", "description": "" }, - { - "name": "Checkpoint", - "description": "Checkpoint created during training runs\n\n" - }, { "name": "EvaluateQuestionAnsweringRequest", - "description": "Request to evaluate question answering.\n\n" - }, - { - "name": "EvaluateQuestionAnsweringRequestWrapper", - "description": "" + "description": "" }, { "name": "EvaluationJob", @@ -6610,19 +5480,11 @@ }, { "name": "EvaluateSummarizationRequest", - "description": "Request to evaluate summarization.\n\n" - }, - { - "name": "EvaluateSummarizationRequestWrapper", - "description": "" + "description": "" }, { "name": "EvaluateTextGenerationRequest", - "description": "Request to evaluate text generation.\n\n" - }, - { - "name": "EvaluateTextGenerationRequestWrapper", - "description": "" + "description": "" }, { "name": "GetAgenticSystemSessionRequest", @@ -6636,14 +5498,6 @@ "name": "AgenticSystemStepResponse", "description": "" }, - { - "name": "Artifact", - "description": "" - }, - { - "name": "ArtifactType", - "description": "" - }, { "name": "GetDocumentsRequest", "description": "" @@ -6665,20 +5519,12 @@ "description": "" }, { - "name": "LogSearchRequest", - "description": "" + "name": "Trace", + "description": "" }, { - "name": "LogSearchRequestWrapper", - "description": "" - }, - { - "name": "Log", - "description": "" - }, - { - "name": "Metric", - "description": "" + "name": "Checkpoint", + "description": "Checkpoint created during training runs\n\n" }, { "name": "PostTrainingJobArtifactsResponse", @@ -6705,24 +5551,36 @@ "description": "" }, { - "name": "ListArtifactsRequest", - "description": "" + "name": "LogSeverity", + "description": "" }, { - "name": "LogMessagesRequest", - "description": "" + "name": "MetricEvent", + "description": "" }, { - "name": "LogMessagesRequestWrapper", - "description": "" + "name": "SpanEndPayload", + "description": "" }, { - "name": "LogMetricsRequest", - "description": "" + "name": "SpanStartPayload", + "description": "" }, { - "name": "LogMetricsRequestWrapper", - "description": "" + "name": "SpanStatus", + "description": "" + }, + { + "name": "StructuredLogEvent", + "description": "" + }, + { + "name": "UnstructuredLogEvent", + "description": "" + }, + { + "name": "LogEventRequest", + "description": "" }, { "name": "DPOAlignmentConfig", @@ -6732,10 +5590,6 @@ "name": "OptimizerConfig", "description": "" }, - { - "name": "PostTrainingRLHFRequest", - "description": "Request to finetune a model.\n\n" - }, { "name": "RLHFAlgorithm", "description": "" @@ -6745,8 +5599,8 @@ "description": "" }, { - "name": "PostTrainingRLHFRequestWrapper", - "description": "" + "name": "PreferenceOptimizeRequest", + "description": "" }, { "name": "QueryDocumentsRequest", @@ -6761,12 +5615,8 @@ "description": "" }, { - "name": "RewardScoringRequest", - "description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n" - }, - { - "name": "RewardScoringRequestWrapper", - "description": "" + "name": "RewardScoreRequest", + "description": "" }, { "name": "RewardScoringResponse", @@ -6781,12 +5631,8 @@ "description": "" }, { - "name": "RunShieldRequest", - "description": "" - }, - { - "name": "RunShieldRequestWrapper", - "description": "" + "name": "RunShieldsRequest", + "description": "" }, { "name": "RunShieldResponse", @@ -6804,25 +5650,17 @@ "name": "LoraFinetuningConfig", "description": "" }, - { - "name": "PostTrainingSFTRequest", - "description": "Request to finetune a model.\n\n" - }, { "name": "QLoraFinetuningConfig", "description": "" }, { - "name": "PostTrainingSFTRequestWrapper", - "description": "" + "name": "SupervisedFineTuneRequest", + "description": "" }, { - "name": "SyntheticDataGenerationRequest", - "description": "Request to generate synthetic data. A small batch of prompts and a filtering function\n\n" - }, - { - "name": "SyntheticDataGenerationRequestWrapper", - "description": "" + "name": "SyntheticDataGenerateRequest", + "description": "" }, { "name": "SyntheticDataGenerationResponse", @@ -6831,30 +5669,6 @@ { "name": "UpdateDocumentsRequest", "description": "" - }, - { - "name": "UpdateExperimentRequest", - "description": "" - }, - { - "name": "UpdateExperimentRequestWrapper", - "description": "" - }, - { - "name": "UpdateRunRequest", - "description": "" - }, - { - "name": "UpdateRunRequestWrapper", - "description": "" - }, - { - "name": "UploadArtifactRequest", - "description": "" - }, - { - "name": "UploadArtifactRequestWrapper", - "description": "" } ], "x-tagGroups": [ @@ -6881,8 +5695,6 @@ "AgenticSystemCreateResponse", "AgenticSystemSessionCreateResponse", "AgenticSystemStepResponse", - "AgenticSystemTurnCreateRequest", - "AgenticSystemTurnCreateRequestWrapper", "AgenticSystemTurnResponseEvent", "AgenticSystemTurnResponseStepCompletePayload", "AgenticSystemTurnResponseStepProgressPayload", @@ -6890,21 +5702,17 @@ "AgenticSystemTurnResponseStreamChunk", "AgenticSystemTurnResponseTurnCompletePayload", "AgenticSystemTurnResponseTurnStartPayload", - "Artifact", - "ArtifactType", "Attachment", "BatchChatCompletionRequest", - "BatchChatCompletionRequestWrapper", "BatchChatCompletionResponse", "BatchCompletionRequest", - "BatchCompletionRequestWrapper", "BatchCompletionResponse", "BuiltinShield", "BuiltinTool", "CancelEvaluationJobRequest", "CancelTrainingJobRequest", "ChatCompletionRequest", - "ChatCompletionRequestWrapper", + "ChatCompletionResponse", "ChatCompletionResponseEvent", "ChatCompletionResponseEventType", "ChatCompletionResponseStreamChunk", @@ -6912,17 +5720,13 @@ "CodeInterpreterToolDefinition", "CompletionMessage", "CompletionRequest", - "CompletionRequestWrapper", + "CompletionResponse", "CompletionResponseStreamChunk", "CreateAgenticSystemRequest", "CreateAgenticSystemSessionRequest", + "CreateAgenticSystemTurnRequest", "CreateDatasetRequest", - "CreateDatasetRequestWrapper", - "CreateExperimentRequest", - "CreateExperimentRequestWrapper", "CreateMemoryBankRequest", - "CreateRunRequest", - "CreateRunRequestWrapper", "DPOAlignmentConfig", "DeleteAgenticSystemRequest", "DeleteAgenticSystemSessionRequest", @@ -6934,36 +5738,25 @@ "EmbeddingsRequest", "EmbeddingsResponse", "EvaluateQuestionAnsweringRequest", - "EvaluateQuestionAnsweringRequestWrapper", "EvaluateSummarizationRequest", - "EvaluateSummarizationRequestWrapper", "EvaluateTextGenerationRequest", - "EvaluateTextGenerationRequestWrapper", "EvaluationJob", "EvaluationJobArtifactsResponse", "EvaluationJobLogStream", "EvaluationJobStatusResponse", - "Experiment", - "ExperimentStatus", "FinetuningAlgorithm", "FunctionCallToolDefinition", "GetAgenticSystemSessionRequest", "GetDocumentsRequest", "InferenceStep", "InsertDocumentsRequest", - "ListArtifactsRequest", - "Log", - "LogMessagesRequest", - "LogMessagesRequestWrapper", - "LogMetricsRequest", - "LogMetricsRequestWrapper", - "LogSearchRequest", - "LogSearchRequestWrapper", + "LogEventRequest", + "LogSeverity", "LoraFinetuningConfig", "MemoryBank", "MemoryBankDocument", "MemoryRetrievalStep", - "Metric", + "MetricEvent", "OnViolationAction", "OptimizerConfig", "PhotogenToolDefinition", @@ -6972,23 +5765,17 @@ "PostTrainingJobLogStream", "PostTrainingJobStatus", "PostTrainingJobStatusResponse", - "PostTrainingRLHFRequest", - "PostTrainingRLHFRequestWrapper", - "PostTrainingSFTRequest", - "PostTrainingSFTRequestWrapper", + "PreferenceOptimizeRequest", "QLoraFinetuningConfig", "QueryDocumentsRequest", "QueryDocumentsResponse", "RLHFAlgorithm", "RestAPIExecutionConfig", "RestAPIMethod", - "RewardScoringRequest", - "RewardScoringRequestWrapper", + "RewardScoreRequest", "RewardScoringResponse", - "Run", - "RunShieldRequest", - "RunShieldRequestWrapper", "RunShieldResponse", + "RunShieldsRequest", "SamplingParams", "SamplingStrategy", "ScoredDialogGenerations", @@ -6998,9 +5785,13 @@ "ShieldCallStep", "ShieldDefinition", "ShieldResponse", + "SpanEndPayload", + "SpanStartPayload", + "SpanStatus", "StopReason", - "SyntheticDataGenerationRequest", - "SyntheticDataGenerationRequestWrapper", + "StructuredLogEvent", + "SupervisedFineTuneRequest", + "SyntheticDataGenerateRequest", "SyntheticDataGenerationResponse", "SystemMessage", "TokenLogProbs", @@ -7014,18 +5805,14 @@ "ToolPromptFormat", "ToolResponse", "ToolResponseMessage", + "Trace", "TrainEvalDataset", "TrainEvalDatasetColumnType", "TrainingConfig", "Turn", "URL", + "UnstructuredLogEvent", "UpdateDocumentsRequest", - "UpdateExperimentRequest", - "UpdateExperimentRequestWrapper", - "UpdateRunRequest", - "UpdateRunRequestWrapper", - "UploadArtifactRequest", - "UploadArtifactRequestWrapper", "UserMessage", "WolframAlphaToolDefinition" ] diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml index 322645813..4d1b27bb7 100644 --- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml +++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml @@ -180,180 +180,6 @@ components: required: - step type: object - AgenticSystemTurnCreateRequest: - additionalProperties: false - properties: - agent_id: - type: string - attachments: - items: - $ref: '#/components/schemas/Attachment' - type: array - input_shields: - items: - $ref: '#/components/schemas/ShieldDefinition' - type: array - instructions: - type: string - messages: - items: - oneOf: - - $ref: '#/components/schemas/UserMessage' - - $ref: '#/components/schemas/ToolResponseMessage' - type: array - output_shields: - items: - $ref: '#/components/schemas/ShieldDefinition' - type: array - sampling_params: - $ref: '#/components/schemas/SamplingParams' - session_id: - type: string - stream: - type: boolean - tool_choice: - $ref: '#/components/schemas/ToolChoice' - tool_prompt_format: - $ref: '#/components/schemas/ToolPromptFormat' - tools: - items: - oneOf: - - $ref: '#/components/schemas/SearchToolDefinition' - - $ref: '#/components/schemas/WolframAlphaToolDefinition' - - $ref: '#/components/schemas/PhotogenToolDefinition' - - $ref: '#/components/schemas/CodeInterpreterToolDefinition' - - $ref: '#/components/schemas/FunctionCallToolDefinition' - - additionalProperties: false - properties: - input_shields: - items: - $ref: '#/components/schemas/ShieldDefinition' - type: array - max_chunks: - type: integer - max_tokens_in_context: - type: integer - memory_bank_configs: - items: - oneOf: - - additionalProperties: false - properties: - bank_id: - type: string - type: - const: vector - type: string - required: - - bank_id - - type - type: object - - additionalProperties: false - properties: - bank_id: - type: string - keys: - items: - type: string - type: array - type: - const: keyvalue - type: string - required: - - bank_id - - type - - keys - type: object - - additionalProperties: false - properties: - bank_id: - type: string - type: - const: keyword - type: string - required: - - bank_id - - type - type: object - - additionalProperties: false - properties: - bank_id: - type: string - entities: - items: - type: string - type: array - type: - const: graph - type: string - required: - - bank_id - - type - - entities - type: object - type: array - output_shields: - items: - $ref: '#/components/schemas/ShieldDefinition' - type: array - query_generator_config: - oneOf: - - additionalProperties: false - properties: - sep: - type: string - type: - const: default - type: string - required: - - type - - sep - type: object - - additionalProperties: false - properties: - model: - type: string - template: - type: string - type: - const: llm - type: string - required: - - type - - model - - template - type: object - - additionalProperties: false - properties: - type: - const: custom - type: string - required: - - type - type: object - type: - const: memory - type: string - required: - - type - - memory_bank_configs - - query_generator_config - - max_tokens_in_context - - max_chunks - type: object - type: array - required: - - agent_id - - session_id - - messages - type: object - AgenticSystemTurnCreateRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/AgenticSystemTurnCreateRequest' - required: - - request - type: object AgenticSystemTurnResponseEvent: additionalProperties: false properties: @@ -480,49 +306,6 @@ components: - event_type - turn_id type: object - Artifact: - additionalProperties: false - properties: - created_at: - format: date-time - type: string - id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - name: - type: string - size: - type: integer - type: - $ref: '#/components/schemas/ArtifactType' - required: - - id - - name - - type - - size - - created_at - - metadata - type: object - ArtifactType: - enum: - - model - - dataset - - checkpoint - - plot - - metric - - config - - code - - other - type: string Attachment: additionalProperties: false properties: @@ -574,14 +357,6 @@ components: - model - messages_batch type: object - BatchChatCompletionRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/BatchChatCompletionRequest' - required: - - request - type: object BatchChatCompletionResponse: additionalProperties: false properties: @@ -617,14 +392,6 @@ components: - model - content_batch type: object - BatchCompletionRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/BatchCompletionRequest' - required: - - request - type: object BatchCompletionResponse: additionalProperties: false properties: @@ -701,13 +468,18 @@ components: - model - messages type: object - ChatCompletionRequestWrapper: + ChatCompletionResponse: additionalProperties: false properties: - request: - $ref: '#/components/schemas/ChatCompletionRequest' + completion_message: + $ref: '#/components/schemas/CompletionMessage' + logprobs: + items: + $ref: '#/components/schemas/TokenLogProbs' + type: array required: - - request + - completion_message + title: Chat completion response. type: object ChatCompletionResponseEvent: additionalProperties: false @@ -817,13 +589,18 @@ components: - model - content type: object - CompletionRequestWrapper: + CompletionResponse: additionalProperties: false properties: - request: - $ref: '#/components/schemas/CompletionRequest' + completion_message: + $ref: '#/components/schemas/CompletionMessage' + logprobs: + items: + $ref: '#/components/schemas/TokenLogProbs' + type: array required: - - request + - completion_message + title: Completion response. type: object CompletionResponseStreamChunk: additionalProperties: false @@ -859,6 +636,30 @@ components: - agent_id - session_name type: object + CreateAgenticSystemTurnRequest: + additionalProperties: false + properties: + agent_id: + type: string + attachments: + items: + $ref: '#/components/schemas/Attachment' + type: array + messages: + items: + oneOf: + - $ref: '#/components/schemas/UserMessage' + - $ref: '#/components/schemas/ToolResponseMessage' + type: array + session_id: + type: string + stream: + type: boolean + required: + - agent_id + - session_id + - messages + type: object CreateDatasetRequest: additionalProperties: false properties: @@ -869,41 +670,6 @@ components: required: - uuid - dataset - title: Request to create a dataset. - type: object - CreateDatasetRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/CreateDatasetRequest' - required: - - request - type: object - CreateExperimentRequest: - additionalProperties: false - properties: - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - name: - type: string - required: - - name - type: object - CreateExperimentRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/CreateExperimentRequest' - required: - - request type: object CreateMemoryBankRequest: additionalProperties: false @@ -958,32 +724,6 @@ components: - name - config type: object - CreateRunRequest: - additionalProperties: false - properties: - experiment_id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - required: - - experiment_id - type: object - CreateRunRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/CreateRunRequest' - required: - - request - type: object DPOAlignmentConfig: additionalProperties: false properties: @@ -1126,12 +866,6 @@ components: EvaluateQuestionAnsweringRequest: additionalProperties: false properties: - checkpoint: - $ref: '#/components/schemas/Checkpoint' - dataset: - $ref: '#/components/schemas/TrainEvalDataset' - job_uuid: - type: string metrics: items: enum: @@ -1139,33 +873,12 @@ components: - f1 type: string type: array - sampling_params: - $ref: '#/components/schemas/SamplingParams' required: - - job_uuid - - dataset - - checkpoint - - sampling_params - metrics - title: Request to evaluate question answering. - type: object - EvaluateQuestionAnsweringRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/EvaluateQuestionAnsweringRequest' - required: - - request type: object EvaluateSummarizationRequest: additionalProperties: false properties: - checkpoint: - $ref: '#/components/schemas/Checkpoint' - dataset: - $ref: '#/components/schemas/TrainEvalDataset' - job_uuid: - type: string metrics: items: enum: @@ -1173,33 +886,12 @@ components: - bleu type: string type: array - sampling_params: - $ref: '#/components/schemas/SamplingParams' required: - - job_uuid - - dataset - - checkpoint - - sampling_params - metrics - title: Request to evaluate summarization. - type: object - EvaluateSummarizationRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/EvaluateSummarizationRequest' - required: - - request type: object EvaluateTextGenerationRequest: additionalProperties: false properties: - checkpoint: - $ref: '#/components/schemas/Checkpoint' - dataset: - $ref: '#/components/schemas/TrainEvalDataset' - job_uuid: - type: string metrics: items: enum: @@ -1208,23 +900,8 @@ components: - bleu type: string type: array - sampling_params: - $ref: '#/components/schemas/SamplingParams' required: - - job_uuid - - dataset - - checkpoint - - sampling_params - metrics - title: Request to evaluate text generation. - type: object - EvaluateTextGenerationRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/EvaluateTextGenerationRequest' - required: - - request type: object EvaluationJob: additionalProperties: false @@ -1259,46 +936,6 @@ components: required: - job_uuid type: object - Experiment: - additionalProperties: false - properties: - created_at: - format: date-time - type: string - id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - name: - type: string - status: - $ref: '#/components/schemas/ExperimentStatus' - updated_at: - format: date-time - type: string - required: - - id - - name - - status - - created_at - - updated_at - - metadata - type: object - ExperimentStatus: - enum: - - not_started - - running - - completed - - failed - type: string FinetuningAlgorithm: enum: - full @@ -1393,107 +1030,26 @@ components: - bank_id - documents type: object - ListArtifactsRequest: + LogEventRequest: additionalProperties: false properties: - experiment_id: - type: string + event: + oneOf: + - $ref: '#/components/schemas/UnstructuredLogEvent' + - $ref: '#/components/schemas/MetricEvent' + - $ref: '#/components/schemas/StructuredLogEvent' required: - - experiment_id - type: object - Log: - additionalProperties: false - properties: - additional_info: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - level: - type: string - message: - type: string - timestamp: - format: date-time - type: string - required: - - message - - level - - timestamp - - additional_info - type: object - LogMessagesRequest: - additionalProperties: false - properties: - logs: - items: - $ref: '#/components/schemas/Log' - type: array - run_id: - type: string - required: - - logs - type: object - LogMessagesRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/LogMessagesRequest' - required: - - request - type: object - LogMetricsRequest: - additionalProperties: false - properties: - metrics: - items: - $ref: '#/components/schemas/Metric' - type: array - run_id: - type: string - required: - - run_id - - metrics - type: object - LogMetricsRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/LogMetricsRequest' - required: - - request - type: object - LogSearchRequest: - additionalProperties: false - properties: - filters: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - query: - type: string - required: - - query - type: object - LogSearchRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/LogSearchRequest' - required: - - request + - event type: object + LogSeverity: + enum: + - verbose + - debug + - info + - warn + - error + - critical + type: string LoraFinetuningConfig: additionalProperties: false properties: @@ -1635,27 +1191,45 @@ components: - memory_bank_ids - inserted_context type: object - Metric: + MetricEvent: additionalProperties: false properties: - name: + attributes: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + metric: type: string - run_id: + span_id: type: string timestamp: format: date-time type: string + trace_id: + type: string + type: + const: metric + type: string + unit: + type: string value: oneOf: - - type: number - type: integer - - type: string - - type: boolean + - type: number required: - - name - - value + - trace_id + - span_id - timestamp - - run_id + - type + - metric + - value + - unit type: object OnViolationAction: enum: @@ -1782,7 +1356,7 @@ components: - checkpoints title: Status of a finetuning job. type: object - PostTrainingRLHFRequest: + PreferenceOptimizeRequest: additionalProperties: false properties: algorithm: @@ -1832,78 +1406,6 @@ components: - training_config - hyperparam_search_config - logger_config - title: Request to finetune a model. - type: object - PostTrainingRLHFRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/PostTrainingRLHFRequest' - required: - - request - type: object - PostTrainingSFTRequest: - additionalProperties: false - properties: - algorithm: - $ref: '#/components/schemas/FinetuningAlgorithm' - algorithm_config: - oneOf: - - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QLoraFinetuningConfig' - - $ref: '#/components/schemas/DoraFinetuningConfig' - dataset: - $ref: '#/components/schemas/TrainEvalDataset' - hyperparam_search_config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - job_uuid: - type: string - logger_config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - model: - type: string - optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' - training_config: - $ref: '#/components/schemas/TrainingConfig' - validation_dataset: - $ref: '#/components/schemas/TrainEvalDataset' - required: - - job_uuid - - model - - dataset - - validation_dataset - - algorithm - - algorithm_config - - optimizer_config - - training_config - - hyperparam_search_config - - logger_config - title: Request to finetune a model. - type: object - PostTrainingSFTRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/PostTrainingSFTRequest' - required: - - request type: object QLoraFinetuningConfig: additionalProperties: false @@ -2035,7 +1537,7 @@ components: - PUT - DELETE type: string - RewardScoringRequest: + RewardScoreRequest: additionalProperties: false properties: dialog_generations: @@ -2047,16 +1549,6 @@ components: required: - dialog_generations - model - title: Request to score a reward function. A list of prompts and a list of responses - per prompt. - type: object - RewardScoringRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/RewardScoringRequest' - required: - - request type: object RewardScoringResponse: additionalProperties: false @@ -2070,39 +1562,17 @@ components: title: Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold. type: object - Run: + RunShieldResponse: additionalProperties: false properties: - ended_at: - format: date-time - type: string - experiment_id: - type: string - id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - started_at: - format: date-time - type: string - status: - type: string + responses: + items: + $ref: '#/components/schemas/ShieldResponse' + type: array required: - - id - - experiment_id - - status - - started_at - - metadata + - responses type: object - RunShieldRequest: + RunShieldsRequest: additionalProperties: false properties: messages: @@ -2121,24 +1591,6 @@ components: - messages - shields type: object - RunShieldRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/RunShieldRequest' - required: - - request - type: object - RunShieldResponse: - additionalProperties: false - properties: - responses: - items: - $ref: '#/components/schemas/ShieldResponse' - type: array - required: - - responses - type: object SamplingParams: additionalProperties: false properties: @@ -2307,13 +1759,132 @@ components: - shield_type - is_violation type: object + SpanEndPayload: + additionalProperties: false + properties: + status: + $ref: '#/components/schemas/SpanStatus' + type: + const: span_end + type: string + required: + - type + - status + type: object + SpanStartPayload: + additionalProperties: false + properties: + name: + type: string + parent_span_id: + type: string + type: + const: span_start + type: string + required: + - type + - name + type: object + SpanStatus: + enum: + - ok + - error + type: string StopReason: enum: - end_of_turn - end_of_message - out_of_tokens type: string - SyntheticDataGenerationRequest: + StructuredLogEvent: + additionalProperties: false + properties: + attributes: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + payload: + oneOf: + - $ref: '#/components/schemas/SpanStartPayload' + - $ref: '#/components/schemas/SpanEndPayload' + span_id: + type: string + timestamp: + format: date-time + type: string + trace_id: + type: string + type: + const: structured_log + type: string + required: + - trace_id + - span_id + - timestamp + - type + - payload + type: object + SupervisedFineTuneRequest: + additionalProperties: false + properties: + algorithm: + $ref: '#/components/schemas/FinetuningAlgorithm' + algorithm_config: + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + - $ref: '#/components/schemas/QLoraFinetuningConfig' + - $ref: '#/components/schemas/DoraFinetuningConfig' + dataset: + $ref: '#/components/schemas/TrainEvalDataset' + hyperparam_search_config: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + job_uuid: + type: string + logger_config: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + model: + type: string + optimizer_config: + $ref: '#/components/schemas/OptimizerConfig' + training_config: + $ref: '#/components/schemas/TrainingConfig' + validation_dataset: + $ref: '#/components/schemas/TrainEvalDataset' + required: + - job_uuid + - model + - dataset + - validation_dataset + - algorithm + - algorithm_config + - optimizer_config + - training_config + - hyperparam_search_config + - logger_config + type: object + SyntheticDataGenerateRequest: additionalProperties: false properties: dialogs: @@ -2339,16 +1910,6 @@ components: required: - dialogs - filtering_function - title: Request to generate synthetic data. A small batch of prompts and a filtering - function - type: object - SyntheticDataGenerationRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/SyntheticDataGenerationRequest' - required: - - request type: object SyntheticDataGenerationResponse: additionalProperties: false @@ -2580,6 +2141,24 @@ components: - tool_name - content type: object + Trace: + additionalProperties: false + properties: + end_time: + format: date-time + type: string + root_span_id: + type: string + start_time: + format: date-time + type: string + trace_id: + type: string + required: + - trace_id + - root_span_id + - start_time + type: object TrainEvalDataset: additionalProperties: false properties: @@ -2685,6 +2264,41 @@ components: format: uri pattern: ^(https?://|file://|data:) type: string + UnstructuredLogEvent: + additionalProperties: false + properties: + attributes: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + message: + type: string + severity: + $ref: '#/components/schemas/LogSeverity' + span_id: + type: string + timestamp: + format: date-time + type: string + trace_id: + type: string + type: + const: unstructured_log + type: string + required: + - trace_id + - span_id + - timestamp + - type + - message + - severity + type: object UpdateDocumentsRequest: additionalProperties: false properties: @@ -2698,101 +2312,6 @@ components: - bank_id - documents type: object - UpdateExperimentRequest: - additionalProperties: false - properties: - experiment_id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - status: - $ref: '#/components/schemas/ExperimentStatus' - required: - - experiment_id - type: object - UpdateExperimentRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/UpdateExperimentRequest' - required: - - request - type: object - UpdateRunRequest: - additionalProperties: false - properties: - ended_at: - format: date-time - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - run_id: - type: string - status: - type: string - required: - - run_id - type: object - UpdateRunRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/UpdateRunRequest' - required: - - request - type: object - UploadArtifactRequest: - additionalProperties: false - properties: - artifact_type: - type: string - content: - contentEncoding: base64 - type: string - experiment_id: - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - name: - type: string - required: - - experiment_id - - name - - artifact_type - - content - type: object - UploadArtifactRequestWrapper: - additionalProperties: false - properties: - request: - $ref: '#/components/schemas/UploadArtifactRequest' - required: - - request - type: object UserMessage: additionalProperties: false properties: @@ -2838,7 +2357,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-09-10 16:42:15.870336" + \ draft and subject to change.\n Generated at 2024-09-11 16:05:23.016090" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -2970,7 +2489,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/AgenticSystemTurnCreateRequestWrapper' + $ref: '#/components/schemas/CreateAgenticSystemTurnRequest' required: true responses: '200': @@ -3003,23 +2522,6 @@ paths: description: OK tags: - AgenticSystem - /artifacts/get: - get: - parameters: - - in: query - name: artifact_id - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Artifact' - description: OK - tags: - - Telemetry /batch_inference/chat_completion: post: parameters: [] @@ -3027,7 +2529,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/BatchChatCompletionRequestWrapper' + $ref: '#/components/schemas/BatchChatCompletionRequest' required: true responses: '200': @@ -3045,7 +2547,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/BatchCompletionRequestWrapper' + $ref: '#/components/schemas/BatchCompletionRequest' required: true responses: '200': @@ -3063,7 +2565,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/CreateDatasetRequestWrapper' + $ref: '#/components/schemas/CreateDatasetRequest' required: true responses: '200': @@ -3185,7 +2687,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/EvaluateQuestionAnsweringRequestWrapper' + $ref: '#/components/schemas/EvaluateQuestionAnsweringRequest' required: true responses: '200': @@ -3203,7 +2705,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/EvaluateSummarizationRequestWrapper' + $ref: '#/components/schemas/EvaluateSummarizationRequest' required: true responses: '200': @@ -3221,7 +2723,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/EvaluateTextGenerationRequestWrapper' + $ref: '#/components/schemas/EvaluateTextGenerationRequest' required: true responses: '200': @@ -3232,125 +2734,6 @@ paths: description: OK tags: - Evaluations - /experiments/artifacts/get: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ListArtifactsRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Artifact' - description: OK - tags: - - Telemetry - /experiments/artifacts/upload: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UploadArtifactRequestWrapper' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Artifact' - description: OK - tags: - - Telemetry - /experiments/create: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateExperimentRequestWrapper' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Experiment' - description: OK - tags: - - Telemetry - /experiments/create_run: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateRunRequestWrapper' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Run' - description: OK - tags: - - Telemetry - /experiments/get: - get: - parameters: - - in: query - name: experiment_id - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Experiment' - description: OK - tags: - - Telemetry - /experiments/list: - get: - parameters: [] - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Experiment' - description: OK - tags: - - Telemetry - /experiments/update: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateExperimentRequestWrapper' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Experiment' - description: OK - tags: - - Telemetry /inference/chat_completion: post: parameters: [] @@ -3358,15 +2741,17 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ChatCompletionRequestWrapper' + $ref: '#/components/schemas/ChatCompletionRequest' required: true responses: '200': content: text/event-stream: schema: - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' - description: SSE-stream of these events. + oneOf: + - $ref: '#/components/schemas/ChatCompletionResponse' + - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' + description: Chat completion response. **OR** SSE-stream of these events. tags: - Inference /inference/completion: @@ -3376,15 +2761,17 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/CompletionRequestWrapper' + $ref: '#/components/schemas/CompletionRequest' required: true responses: '200': content: application/json: schema: - $ref: '#/components/schemas/CompletionResponseStreamChunk' - description: streamed completion response. + oneOf: + - $ref: '#/components/schemas/CompletionResponse' + - $ref: '#/components/schemas/CompletionResponseStreamChunk' + description: Completion response. **OR** streamed completion response. tags: - Inference /inference/embeddings: @@ -3405,38 +2792,6 @@ paths: description: OK tags: - Inference - /logging/get_logs: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/LogSearchRequestWrapper' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Log' - description: OK - tags: - - Telemetry - /logging/log_messages: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/LogMessagesRequestWrapper' - required: true - responses: - '200': - description: OK - tags: - - Telemetry /memory_bank/documents/delete: post: parameters: [] @@ -3671,7 +3026,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/PostTrainingRLHFRequestWrapper' + $ref: '#/components/schemas/PreferenceOptimizeRequest' required: true responses: '200': @@ -3689,7 +3044,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/PostTrainingSFTRequestWrapper' + $ref: '#/components/schemas/SupervisedFineTuneRequest' required: true responses: '200': @@ -3707,7 +3062,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RewardScoringRequestWrapper' + $ref: '#/components/schemas/RewardScoreRequest' required: true responses: '200': @@ -3718,55 +3073,6 @@ paths: description: OK tags: - RewardScoring - /runs/log_metrics: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/LogMetricsRequestWrapper' - required: true - responses: - '200': - description: OK - tags: - - Telemetry - /runs/metrics: - get: - parameters: - - in: query - name: run_id - required: true - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/Metric' - description: OK - tags: - - Telemetry - /runs/update: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateRunRequestWrapper' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Run' - description: OK - tags: - - Telemetry /safety/run_shields: post: parameters: [] @@ -3774,7 +3080,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RunShieldRequestWrapper' + $ref: '#/components/schemas/RunShieldsRequest' required: true responses: '200': @@ -3792,7 +3098,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/SyntheticDataGenerationRequestWrapper' + $ref: '#/components/schemas/SyntheticDataGenerateRequest' required: true responses: '200': @@ -3803,25 +3109,53 @@ paths: description: OK tags: - SyntheticDataGeneration + /telemetry/get_trace: + get: + parameters: + - in: query + name: trace_id + required: true + schema: + type: string + responses: + '200': + content: + application/json: + schema: + $ref: '#/components/schemas/Trace' + description: OK + tags: + - Telemetry + /telemetry/log_event: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/LogEventRequest' + required: true + responses: + '200': + description: OK + tags: + - Telemetry security: - Default: [] servers: - url: http://any-hosted-llama-stack.com tags: -- name: BatchInference -- name: Safety -- name: Telemetry -- name: RewardScoring -- name: Memory - name: SyntheticDataGeneration -- name: Inference -- name: Evaluations -- name: PostTraining - name: Datasets +- name: Evaluations +- name: Safety +- name: Inference +- name: Telemetry +- name: PostTraining +- name: Memory +- name: RewardScoring +- name: BatchInference - name: AgenticSystem -- description: - name: BatchChatCompletionRequest - description: name: BuiltinTool - description: name: UserMessage -- description: - name: BatchChatCompletionRequestWrapper + name: BatchChatCompletionRequest - description: name: BatchChatCompletionResponse - description: name: BatchCompletionRequest -- description: - name: BatchCompletionRequestWrapper - description: name: BatchCompletionResponse @@ -3885,9 +3216,11 @@ tags: - description: name: ChatCompletionRequest -- description: - name: ChatCompletionRequestWrapper +- description: 'Chat completion response. + + + ' + name: ChatCompletionResponse - description: 'Chat completion response event. @@ -3913,9 +3246,11 @@ tags: - description: name: CompletionRequest -- description: - name: CompletionRequestWrapper +- description: 'Completion response. + + + ' + name: CompletionResponse - description: 'streamed completion response. @@ -3966,14 +3301,11 @@ tags: - description: name: AgenticSystemSessionCreateResponse -- description: - name: AgenticSystemTurnCreateRequest - description: name: Attachment -- description: - name: AgenticSystemTurnCreateRequestWrapper + name: CreateAgenticSystemTurnRequest - description: 'Streamed agent execution response. @@ -4017,11 +3349,6 @@ tags: ' name: Turn -- description: 'Request to create a dataset. - - - ' - name: CreateDatasetRequest - description: 'Dataset to be used for training or evaluating language models. @@ -4030,33 +3357,14 @@ tags: - description: name: TrainEvalDatasetColumnType -- description: - name: CreateDatasetRequestWrapper -- description: - name: CreateExperimentRequest -- description: - name: CreateExperimentRequestWrapper -- description: - name: Experiment -- description: - name: ExperimentStatus + name: CreateDatasetRequest - description: name: CreateMemoryBankRequest - description: name: MemoryBank -- description: - name: CreateRunRequest -- description: - name: CreateRunRequestWrapper -- description: - name: Run - description: name: DeleteAgenticSystemRequest @@ -4078,40 +3386,17 @@ tags: - description: name: EmbeddingsResponse -- description: 'Checkpoint created during training runs - - - ' - name: Checkpoint -- description: 'Request to evaluate question answering. - - - ' - name: EvaluateQuestionAnsweringRequest -- description: - name: EvaluateQuestionAnsweringRequestWrapper + name: EvaluateQuestionAnsweringRequest - description: name: EvaluationJob -- description: 'Request to evaluate summarization. - - - ' +- description: name: EvaluateSummarizationRequest -- description: - name: EvaluateSummarizationRequestWrapper -- description: 'Request to evaluate text generation. - - - ' name: EvaluateTextGenerationRequest -- description: - name: EvaluateTextGenerationRequestWrapper - description: name: GetAgenticSystemSessionRequest @@ -4123,10 +3408,6 @@ tags: - description: name: AgenticSystemStepResponse -- description: - name: Artifact -- description: - name: ArtifactType - description: name: GetDocumentsRequest @@ -4145,16 +3426,13 @@ tags: - description: name: EvaluationJobStatusResponse -- description: - name: LogSearchRequest -- description: - name: LogSearchRequestWrapper -- description: - name: Log -- description: - name: Metric +- description: + name: Trace +- description: 'Checkpoint created during training runs + + + ' + name: Checkpoint - description: 'Artifacts of a finetuning job. @@ -4181,39 +3459,39 @@ tags: - description: name: InsertDocumentsRequest -- description: + name: LogSeverity +- description: + name: MetricEvent +- description: + name: SpanEndPayload +- description: - name: ListArtifactsRequest -- description: + name: SpanStatus +- description: - name: LogMessagesRequest -- description: - name: LogMessagesRequestWrapper -- description: - name: LogMetricsRequest -- description: - name: LogMetricsRequestWrapper + name: LogEventRequest - description: name: DPOAlignmentConfig - description: name: OptimizerConfig -- description: 'Request to finetune a model. - - - ' - name: PostTrainingRLHFRequest - description: name: RLHFAlgorithm - description: name: TrainingConfig -- description: - name: PostTrainingRLHFRequestWrapper + name: PreferenceOptimizeRequest - description: name: QueryDocumentsRequest @@ -4223,15 +3501,9 @@ tags: - description: name: DialogGenerations -- description: 'Request to score a reward function. A list of prompts and a list of - responses per prompt. - - - ' - name: RewardScoringRequest -- description: - name: RewardScoringRequestWrapper + name: RewardScoreRequest - description: 'Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold. @@ -4243,12 +3515,9 @@ tags: name: ScoredDialogGenerations - description: name: ScoredMessage -- description: - name: RunShieldRequest -- description: - name: RunShieldRequestWrapper + name: RunShieldsRequest - description: name: RunShieldResponse @@ -4261,27 +3530,15 @@ tags: - description: name: LoraFinetuningConfig -- description: 'Request to finetune a model. - - - ' - name: PostTrainingSFTRequest - description: name: QLoraFinetuningConfig -- description: - name: PostTrainingSFTRequestWrapper -- description: 'Request to generate synthetic data. A small batch of prompts and a - filtering function - - - ' - name: SyntheticDataGenerationRequest -- description: - name: SyntheticDataGenerationRequestWrapper + name: SyntheticDataGenerateRequest - description: 'Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold. @@ -4292,24 +3549,6 @@ tags: - description: name: UpdateDocumentsRequest -- description: - name: UpdateExperimentRequest -- description: - name: UpdateExperimentRequestWrapper -- description: - name: UpdateRunRequest -- description: - name: UpdateRunRequestWrapper -- description: - name: UploadArtifactRequest -- description: - name: UploadArtifactRequestWrapper x-tagGroups: - name: Operations tags: @@ -4330,8 +3569,6 @@ x-tagGroups: - AgenticSystemCreateResponse - AgenticSystemSessionCreateResponse - AgenticSystemStepResponse - - AgenticSystemTurnCreateRequest - - AgenticSystemTurnCreateRequestWrapper - AgenticSystemTurnResponseEvent - AgenticSystemTurnResponseStepCompletePayload - AgenticSystemTurnResponseStepProgressPayload @@ -4339,21 +3576,17 @@ x-tagGroups: - AgenticSystemTurnResponseStreamChunk - AgenticSystemTurnResponseTurnCompletePayload - AgenticSystemTurnResponseTurnStartPayload - - Artifact - - ArtifactType - Attachment - BatchChatCompletionRequest - - BatchChatCompletionRequestWrapper - BatchChatCompletionResponse - BatchCompletionRequest - - BatchCompletionRequestWrapper - BatchCompletionResponse - BuiltinShield - BuiltinTool - CancelEvaluationJobRequest - CancelTrainingJobRequest - ChatCompletionRequest - - ChatCompletionRequestWrapper + - ChatCompletionResponse - ChatCompletionResponseEvent - ChatCompletionResponseEventType - ChatCompletionResponseStreamChunk @@ -4361,17 +3594,13 @@ x-tagGroups: - CodeInterpreterToolDefinition - CompletionMessage - CompletionRequest - - CompletionRequestWrapper + - CompletionResponse - CompletionResponseStreamChunk - CreateAgenticSystemRequest - CreateAgenticSystemSessionRequest + - CreateAgenticSystemTurnRequest - CreateDatasetRequest - - CreateDatasetRequestWrapper - - CreateExperimentRequest - - CreateExperimentRequestWrapper - CreateMemoryBankRequest - - CreateRunRequest - - CreateRunRequestWrapper - DPOAlignmentConfig - DeleteAgenticSystemRequest - DeleteAgenticSystemSessionRequest @@ -4383,36 +3612,25 @@ x-tagGroups: - EmbeddingsRequest - EmbeddingsResponse - EvaluateQuestionAnsweringRequest - - EvaluateQuestionAnsweringRequestWrapper - EvaluateSummarizationRequest - - EvaluateSummarizationRequestWrapper - EvaluateTextGenerationRequest - - EvaluateTextGenerationRequestWrapper - EvaluationJob - EvaluationJobArtifactsResponse - EvaluationJobLogStream - EvaluationJobStatusResponse - - Experiment - - ExperimentStatus - FinetuningAlgorithm - FunctionCallToolDefinition - GetAgenticSystemSessionRequest - GetDocumentsRequest - InferenceStep - InsertDocumentsRequest - - ListArtifactsRequest - - Log - - LogMessagesRequest - - LogMessagesRequestWrapper - - LogMetricsRequest - - LogMetricsRequestWrapper - - LogSearchRequest - - LogSearchRequestWrapper + - LogEventRequest + - LogSeverity - LoraFinetuningConfig - MemoryBank - MemoryBankDocument - MemoryRetrievalStep - - Metric + - MetricEvent - OnViolationAction - OptimizerConfig - PhotogenToolDefinition @@ -4421,23 +3639,17 @@ x-tagGroups: - PostTrainingJobLogStream - PostTrainingJobStatus - PostTrainingJobStatusResponse - - PostTrainingRLHFRequest - - PostTrainingRLHFRequestWrapper - - PostTrainingSFTRequest - - PostTrainingSFTRequestWrapper + - PreferenceOptimizeRequest - QLoraFinetuningConfig - QueryDocumentsRequest - QueryDocumentsResponse - RLHFAlgorithm - RestAPIExecutionConfig - RestAPIMethod - - RewardScoringRequest - - RewardScoringRequestWrapper + - RewardScoreRequest - RewardScoringResponse - - Run - - RunShieldRequest - - RunShieldRequestWrapper - RunShieldResponse + - RunShieldsRequest - SamplingParams - SamplingStrategy - ScoredDialogGenerations @@ -4447,9 +3659,13 @@ x-tagGroups: - ShieldCallStep - ShieldDefinition - ShieldResponse + - SpanEndPayload + - SpanStartPayload + - SpanStatus - StopReason - - SyntheticDataGenerationRequest - - SyntheticDataGenerationRequestWrapper + - StructuredLogEvent + - SupervisedFineTuneRequest + - SyntheticDataGenerateRequest - SyntheticDataGenerationResponse - SystemMessage - TokenLogProbs @@ -4463,17 +3679,13 @@ x-tagGroups: - ToolPromptFormat - ToolResponse - ToolResponseMessage + - Trace - TrainEvalDataset - TrainEvalDatasetColumnType - TrainingConfig - Turn - URL + - UnstructuredLogEvent - UpdateDocumentsRequest - - UpdateExperimentRequest - - UpdateExperimentRequestWrapper - - UpdateRunRequest - - UpdateRunRequestWrapper - - UploadArtifactRequest - - UploadArtifactRequestWrapper - UserMessage - WolframAlphaToolDefinition diff --git a/rfcs/openapi_generator/pyopenapi/generator.py b/rfcs/openapi_generator/pyopenapi/generator.py index a711d9f68..f6be71854 100644 --- a/rfcs/openapi_generator/pyopenapi/generator.py +++ b/rfcs/openapi_generator/pyopenapi/generator.py @@ -471,14 +471,9 @@ class Generator: from dataclasses import make_dataclass - if len(op.request_params) == 1 and "Request" in first[1].__name__: - # TODO(ashwin): Undo the "Request" hack and this entire block eventually - request_name = first[1].__name__ + "Wrapper" - request_type = make_dataclass(request_name, op.request_params) - else: - op_name = "".join(word.capitalize() for word in op.name.split("_")) - request_name = f"{op_name}Request" - request_type = make_dataclass(request_name, op.request_params) + op_name = "".join(word.capitalize() for word in op.name.split("_")) + request_name = f"{op_name}Request" + request_type = make_dataclass(request_name, op.request_params) requestBody = RequestBody( content={ diff --git a/tests/test_inference.py b/tests/test_inference.py index 277cf7e8a..800046355 100644 --- a/tests/test_inference.py +++ b/tests/test_inference.py @@ -249,7 +249,12 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase): stream=True, tools=[ToolDefinition(tool_name=BuiltinTool.brave_search)], ) - iterator = self.api.chat_completion(request) + iterator = self.api.chat_completion( + request.model, + request.messages, + stream=request.stream, + tools=request.tools, + ) events = [] async for chunk in iterator: diff --git a/tests/test_ollama_inference.py b/tests/test_ollama_inference.py index f5b172e69..c3cef3a10 100644 --- a/tests/test_ollama_inference.py +++ b/tests/test_ollama_inference.py @@ -61,7 +61,9 @@ class OllamaInferenceTests(unittest.IsolatedAsyncioTestCase): ], stream=False, ) - iterator = self.api.chat_completion(request) + iterator = self.api.chat_completion( + request.model, request.messages, stream=request.stream + ) async for r in iterator: response = r print(response.completion_message.content)