diff --git a/source/agentic_system_types.py b/source/agentic_system_types.py deleted file mode 100644 index 91823586f..000000000 --- a/source/agentic_system_types.py +++ /dev/null @@ -1,98 +0,0 @@ -from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Dict, List, Optional, Set, Union - -from model_types import ( - BuiltinTool, - Content, - InstructModel, - Message, - PretrainedModel, - SamplingParams, - SafetyViolation, - StopReason, - ToolCall, - ToolDefinition, - ToolResponse, -) - -from strong_typing.schema import json_schema_type - - -class ExecutionStepType(Enum): - """The type of execution step.""" - - model_inference = "model_inference" - tool_execution = "tool_execution" - safety_filtering = "safety_filtering" - memory_retrieval = "memory_retrieval" - - -@dataclass -class ExecutionStepBase: - """An agentic system turn can consist of one or more such execution steps.""" - - step_type: ExecutionStepType - uuid: str - - -@dataclass -class ModelInferenceStep(ExecutionStepBase): - step_type = ExecutionStepType.model_inference - text: str - logprobs: Optional[Dict[str, Any]] = None - - -@dataclass -class ToolExecutionStep(ExecutionStepBase): - step_type = ExecutionStepType.tool_execution - - # we could be calling multiple tools in a single step (in parallel) - tool_calls: List[ToolCall] - tool_responses: List[ToolResponse] - - -@dataclass -class SafetyFilteringStep(ExecutionStepBase): - step_type = ExecutionStepType.safety_filtering - violation: Optional[SafetyViolation] = None - - -@json_schema_type -@dataclass -class MemoryBank: - uuid: str - name: str - - -@dataclass -class MemoryBankDocument: - uuid: str - content: bytes - metadata: Dict[str, Any] - mime_type: str - - -@dataclass -class MemoryRetrievalStep(ExecutionStepBase): - step_type = ExecutionStepType.memory_retrieval - documents: List[MemoryBankDocument] - scores: List[float] - - -ExecutionStep = Union[ - ModelInferenceStep, - ToolExecutionStep, - SafetyFilteringStep, - MemoryRetrievalStep, -] - - -@json_schema_type -@dataclass -class AgenticSystemTurn: - """A single turn in an interaction with an Agentic System.""" - - user_messages: List[Message] - steps: List[ExecutionStep] - response_message: Message diff --git a/source/api_definitions.py b/source/api_definitions.py deleted file mode 100644 index d09ccb5fd..000000000 --- a/source/api_definitions.py +++ /dev/null @@ -1,563 +0,0 @@ -from dataclasses import dataclass, field -from datetime import datetime -from enum import Enum -from typing import Any, Dict, List, Optional, Protocol, Set, Tuple, Union - -import yaml -from agentic_system_types import ( - AgenticSystemTurn, - ExecutionStepType, - MemoryBank, - MemoryBankDocument, - SafetyViolation, -) - -from model_types import ( - BuiltinTool, - Content, - Dialog, - InstructModel, - Message, - PretrainedModel, - RewardModel, - SamplingParams, - ShieldConfig, - StopReason, - ToolCall, - ToolDefinition, - ToolResponse, - URL, -) - -from post_training_types import ( - Checkpoint, - Dataset, - DoraFinetuningConfig, - DPOAlignmentConfig, - FinetuningAlgorithm, - LoraFinetuningConfig, - OptimizerConfig, - PostTrainingJobLogStream, - PostTrainingJobStatus, - QLoraFinetuningConfig, - RLHFAlgorithm, - TrainingConfig, -) - -from pyopenapi import Info, Options, Server, Specification, webmethod -from strong_typing.schema import json_schema_type - - -@json_schema_type -@dataclass -class CompletionRequest: - content: Content - model: PretrainedModel - sampling_params: SamplingParams = SamplingParams() - max_tokens: int = 0 - stream: bool = False - logprobs: bool = False - - -@json_schema_type -@dataclass -class CompletionResponse: - """Normal completion response.""" - - content: Content - stop_reason: Optional[StopReason] = None - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class CompletionResponseStreamChunk: - """streamed completion response.""" - - text_delta: str - stop_reason: Optional[StopReason] = None - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class ChatCompletionRequest: - model: InstructModel - dialog: Dialog - sampling_params: SamplingParams = SamplingParams() - - # zero-shot tool definitions as input to the model - available_tools: List[ToolDefinition] = field(default_factory=list) - - max_tokens: int = 0 - stream: bool = False - logprobs: bool = False - - -@json_schema_type -@dataclass -class ChatCompletionResponse: - """Normal chat completion response.""" - - content: Content - - # note: multiple tool calls can be generated in a single response - tool_calls: List[ToolCall] = field(default_factory=list) - - stop_reason: Optional[StopReason] = None - logprobs: Optional[Dict[str, Any]] = None - - -@json_schema_type -@dataclass -class ChatCompletionResponseStreamChunk: - """Streamed chat completion response. The actual response is a series of such objects.""" - - text_delta: str - stop_reason: Optional[StopReason] = None - tool_call: Optional[ToolCall] = None - - -@json_schema_type -@dataclass -class BatchCompletionRequest: - model: PretrainedModel - content_batch: List[Content] - sampling_params: SamplingParams = SamplingParams() - max_tokens: int = 0 - logprobs: bool = False - - -@json_schema_type -@dataclass -class BatchChatCompletionRequest: - model: InstructModel - batch_dialogs: List[Dialog] - sampling_params: SamplingParams = SamplingParams() - - # zero-shot tool definitions as input to the model - available_tools: List[ToolDefinition] = field(default_factory=list) - - max_tokens: int = 0 - logprobs: bool = False - - -class Inference(Protocol): - - @webmethod(route="/inference/completion") - def post_completion( - self, - request: CompletionRequest, - ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ... - - @webmethod(route="/inference/chat_completion") - def post_chat_completion( - self, - request: ChatCompletionRequest, - ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ... - - @webmethod(route="/inference/batch_completion") - def post_batch_completion( - self, - request: BatchCompletionRequest, - ) -> List[CompletionResponse]: ... - - @webmethod(route="/inference/batch_chat_completion") - def post_batch_chat_completion( - self, - request: BatchChatCompletionRequest, - ) -> List[ChatCompletionResponse]: ... - - -@dataclass -class AgenticSystemCreateRequest: - uuid: str - - instructions: str - model: InstructModel - - # zero-shot or built-in tool configurations as input to the model - available_tools: List[ToolDefinition] = field(default_factory=list) - - # tools which aren't executable are emitted as tool calls which the users can - # execute themselves. - executable_tools: Set[str] = field(default_factory=set) - - memory_bank_uuids: List[str] = field(default_factory=list) - - input_shields: List[ShieldConfig] = field(default_factory=list) - output_shields: List[ShieldConfig] = field(default_factory=list) - - -@json_schema_type -@dataclass -class AgenticSystemCreateResponse: - agent_uuid: str - - -@json_schema_type -@dataclass -class AgenticSystemExecuteRequest: - agent_uuid: str - messages: List[Message] - turn_history: List[AgenticSystemTurn] = None - stream: bool = False - - -@json_schema_type -@dataclass -class AgenticSystemExecuteResponse: - """non-stream response from the agentic system.""" - - turn: AgenticSystemTurn - - -class AgenticSystemExecuteResponseEventType(Enum): - """The type of event.""" - - step_start = "step_start" - step_end = "step_end" - step_progress = "step_progress" - - -@json_schema_type -@dataclass -class AgenticSystemExecuteResponseStreamChunk: - """Streamed agent execution response.""" - - event_type: AgenticSystemExecuteResponseEventType - - step_uuid: str - step_type: ExecutionStepType - - # TODO(ashwin): maybe add more structure here and do this as a proper tagged union - violation: Optional[SafetyViolation] = None - tool_call: Optional[ToolCall] = None - tool_response_delta: Optional[ToolResponse] = None - response_text_delta: Optional[str] = None - retrieved_document: Optional[MemoryBankDocument] = None - - stop_reason: Optional[StopReason] = None - - -class AgenticSystem(Protocol): - - @webmethod(route="/agentic_system/create") - def create_agentic_system( - self, - request: AgenticSystemCreateRequest, - ) -> AgenticSystemCreateResponse: ... - - @webmethod(route="/agentic_system/execute") - def create_agentic_system_execute( - self, - request: AgenticSystemExecuteRequest, - ) -> Union[ - AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk - ]: ... - - @webmethod(route="/agentic_system/delete") - def delete_agentic_system( - self, - agent_id: str, - ) -> None: ... - - -class MemoryBanks(Protocol): - @webmethod(route="/memory_banks/create") - def post_create_memory_bank( - self, - bank_uuid: str, - bank_name: str, - documents: List[MemoryBankDocument], - ) -> None: ... - - @webmethod(route="/memory_banks/get") - def get_memory_banks( - self - ) -> List[MemoryBank]: ... - - @webmethod(route="/memory_banks/drop") - def delete_memory_bank( - self, - bank_uuid: str, - ) -> str: ... - - @webmethod(route="/memory_bank/insert") - def post_insert_memory_documents( - self, - bank_uuid: str, - documents: List[MemoryBankDocument], - ) -> None: ... - - @webmethod(route="/memory_bank/update") - def post_update_memory_documents( - self, - bank_uuid: str, - documents: List[MemoryBankDocument], - ) -> None: ... - - @webmethod(route="/memory_bank/get") - def get_memory_documents( - self, - bank_uuid: str, - document_uuids: List[str], - ) -> List[MemoryBankDocument]: ... - - @webmethod(route="/memory_bank/delete") - def delete_memory_documents( - self, - bank_uuid: str, - document_uuids: List[str], - ) -> List[str]: ... - - -@dataclass -class KPromptGenerations: - dialog: Dialog - k_generations: List[Message] - - -@json_schema_type -@dataclass -class ScoredMessage: - message: Message - score: float - - -@json_schema_type -@dataclass -class KScoredPromptGenerations: - prompt: Message - k_scored_generations: List[ScoredMessage] - - -@json_schema_type -@dataclass -class RewardScoringRequest: - """Request to score a reward function. A list of prompts and a list of responses per prompt.""" - - prompt_generations: List[KPromptGenerations] - model: RewardModel - - -@json_schema_type -@dataclass -class RewardScoringResponse: - """Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.""" - - scored_generations: List[KScoredPromptGenerations] - - -class RewardScoring(Protocol): - @webmethod(route="/reward_scoring/score") - def post_score( - self, - request: RewardScoringRequest, - ) -> Union[RewardScoringResponse]: ... - - -class FilteringFunction(Enum): - """The type of filtering function.""" - - none = "none" - random = "random" - top_k = "top_k" - top_p = "top_p" - top_k_top_p = "top_k_top_p" - sigmoid = "sigmoid" - - -@json_schema_type -@dataclass -class SyntheticDataGenerationRequest: - """Request to generate synthetic data. A small batch of prompts and a filtering function""" - - prompts: List[Message] - filtering_function: FilteringFunction = FilteringFunction.none - reward_scoring: Optional[RewardScoring] = None - - -@json_schema_type -@dataclass -class SyntheticDataGenerationResponse: - """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.""" - - synthetic_data: List[KScoredPromptGenerations] - statistics: Optional[Dict[str, Any]] = None - - -class SyntheticDataGeneration(Protocol): - @webmethod(route="/synthetic_data_generation/generate") - def post_generate( - self, - request: SyntheticDataGenerationRequest, - ) -> Union[SyntheticDataGenerationResponse]: ... - - -@json_schema_type -@dataclass -class CreateDatasetRequest: - """Request to create a dataset.""" - - uuid: str - dataset: Dataset - - -class Datasets(Protocol): - @webmethod(route="/datasets/create") - def create_dataset( - self, - request: CreateDatasetRequest, - ) -> None: ... - - @webmethod(route="/datasets/get") - def get_dataset( - self, - dataset_id: str, - ) -> Dataset: ... - - @webmethod(route="/datasets/delete") - def delete_dataset( - self, - dataset_id: str, - ) -> None: ... - - -@json_schema_type -@dataclass -class PostTrainingSFTRequest: - """Request to finetune a model.""" - - job_uuid: str - - model: PretrainedModel - dataset: Dataset - validation_dataset: Dataset - - algorithm: FinetuningAlgorithm - algorithm_config: Union[ - LoraFinetuningConfig, QLoraFinetuningConfig, DoraFinetuningConfig - ] - - optimizer_config: OptimizerConfig - training_config: TrainingConfig - - # TODO: define these - hyperparam_search_config: Dict[str, Any] - logger_config: Dict[str, Any] - - -@json_schema_type -@dataclass -class PostTrainingRLHFRequest: - """Request to finetune a model.""" - - job_uuid: str - - finetuned_model: URL - - dataset: Dataset - validation_dataset: Dataset - - algorithm: RLHFAlgorithm - algorithm_config: Union[DPOAlignmentConfig] - - optimizer_config: OptimizerConfig - training_config: TrainingConfig - - # TODO: define these - hyperparam_search_config: Dict[str, Any] - logger_config: Dict[str, Any] - - -@json_schema_type -@dataclass -class PostTrainingJobStatusResponse: - """Status of a finetuning job.""" - - job_uuid: str - status: PostTrainingJobStatus - - scheduled_at: Optional[datetime] = None - started_at: Optional[datetime] = None - completed_at: Optional[datetime] = None - - resources_allocated: Optional[Dict[str, Any]] = None - - checkpoints: List[Checkpoint] = field(default_factory=list) - - -@json_schema_type -@dataclass -class PostTrainingJobArtifactsResponse: - """Artifacts of a finetuning job.""" - - job_uuid: str - checkpoints: List[Checkpoint] = field(default_factory=list) - - # TODO(ashwin): metrics, evals - - -class PostTraining(Protocol): - @webmethod(route="/post_training/supervised_fine_tune/") - def post_supervised_fine_tune( - self, - request: PostTrainingSFTRequest, - ) -> None: ... - - @webmethod(route="/post_training/preference_optimize/") - def post_preference_optimize( - self, - request: PostTrainingRLHFRequest, - ) -> None: ... - - # sends SSE stream of logs - @webmethod(route="/post_training/job/logs") - def get_training_log_stream(self, job_uuid: str) -> PostTrainingJobLogStream: ... - - @webmethod(route="/post_training/job/status") - def get_training_job_status( - self, job_uuid: str - ) -> PostTrainingJobStatusResponse: ... - - @webmethod(route="/post_training/job/cancel") - def cancel_training_job(self, job_uuid: str) -> None: ... - - @webmethod(route="/post_training/job/artifacts") - def get_training_job_artifacts( - self, job_uuid: str - ) -> PostTrainingJobArtifactsResponse: ... - - -class LlamaStackEndpoints( - Inference, - AgenticSystem, - RewardScoring, - SyntheticDataGeneration, - Datasets, - PostTraining, - MemoryBanks, -): ... - - -if __name__ == "__main__": - print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)") - spec = Specification( - LlamaStackEndpoints, - Options( - server=Server(url="http://any-hosted-llama-stack.com"), - info=Info( - title="[DRAFT] Llama Stack Specification", - version="0.0.1", - description="""This is the specification of the llama stack that provides - a set of endpoints and their corresponding interfaces that are tailored to - best leverage Llama Models. The specification is still in draft and subject to change.""", - ), - ), - ) - with open("openapi.yaml", "w", encoding="utf-8") as fp: - yaml.dump(spec.get_json(), fp, allow_unicode=True) - - with open("openapi.html", "w") as fp: - spec.write_html(fp, pretty_print=True) diff --git a/source/client.py b/source/client.py deleted file mode 100644 index 222845b91..000000000 --- a/source/client.py +++ /dev/null @@ -1,59 +0,0 @@ -import requests -from dataclasses import dataclass, field, asdict -from typing import List, Set, Optional, Union, Protocol -from enum import Enum - -import json - -from model_types import * -from agentic_system_types import * -from api_definitions import * - -class EnumEncoder(json.JSONEncoder): - def default(self, obj): - if isinstance(obj, Enum): - return obj.value - elif isinstance(obj, set): - return list(obj) - return json.JSONEncoder.default(self, obj) - - -class AgenticSystemClient: - def __init__(self, base_url: str): - self.base_url = base_url - - def create_agentic_system(self, request: AgenticSystemCreateRequest) -> AgenticSystemCreateResponse: - response = requests.post(f"{self.base_url}/agentic_system/create", data=json.dumps(asdict(request), cls=EnumEncoder), headers={'Content-Type': 'application/json'}) - response.raise_for_status() - return AgenticSystemCreateResponse(**response.json()) - - def execute_agentic_system(self, request: AgenticSystemExecuteRequest) -> Union[AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk]: - response = requests.post(f"{self.base_url}/agentic_system/execute", data=json.dumps(asdict(request), cls=EnumEncoder), headers={'Content-Type': 'application/json'}) - response.raise_for_status() - response_json = response.json() - if 'turn' in response_json: - return AgenticSystemExecuteResponse(**response_json) - else: - return AgenticSystemExecuteResponseStreamChunk(**response_json) - -# Example usage -if __name__ == "__main__": - client = AgenticSystemClient("http://localhost:5000") - - # Create a new agentic system - create_request = AgenticSystemCreateRequest( - instructions="Your instructions here", - model=InstructModel.llama3_8b_chat, - ) - create_response = client.create_agentic_system(create_request) - print("Agent ID:", create_response.agent_id) - - # Execute the agentic system - execute_request = AgenticSystemExecuteRequest( - agent_id=create_response.agent_id, - messages=[Message(role="user", content="Tell me a joke")], - turn_history=[], - stream=False - ) - execute_response = client.execute_agentic_system(execute_request) - print("Execute Response:", execute_response) diff --git a/source/codegen/openapi-generator-cli.jar b/source/codegen/openapi-generator-cli.jar deleted file mode 100644 index 7102a497d..000000000 Binary files a/source/codegen/openapi-generator-cli.jar and /dev/null differ diff --git a/source/create_code.sh b/source/create_code.sh deleted file mode 100644 index fa927ba7d..000000000 --- a/source/create_code.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -set -euo pipefail -set -x - -export JAVA_HOME=/usr/local/java-runtime/impl/11 - -$JAVA_HOME/bin/java -jar codegen/openapi-generator-cli.jar \ - generate \ - -i openapi.yaml \ - -g python-flask \ - -o /tmp/foo \ - --log-to-stderr \ - --global-property debugModels,debugOperations,debugOpenAPI,debugSupportingFiles diff --git a/source/generate.sh b/source/generate.sh deleted file mode 100644 index b30929cfb..000000000 --- a/source/generate.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -PYTHONPATH=. python3 api_definitions.py diff --git a/source/model_types.py b/source/model_types.py deleted file mode 100644 index 9e6e3dc4b..000000000 --- a/source/model_types.py +++ /dev/null @@ -1,149 +0,0 @@ -from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Dict, List, Optional, Set, Union - -from strong_typing.schema import json_schema_type - - -class ShieldType(Enum): - """The type of safety shield.""" - - llama_guard = "llama_guard" - prompt_guard = "prompt_guard" - code_guard = "code_guard" - - -@json_schema_type -@dataclass -class ShieldConfig: - shield_type: ShieldType - params: Dict[str, Any] = field(default_factory=dict) - - -@dataclass -class SafetyViolation: - violation_type: str - details: str - suggested_user_response: Optional[str] = None - - -@json_schema_type( - schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"} -) -@dataclass -class URL: - url: str - - def __str__(self) -> str: - return self.url - - -@json_schema_type -@dataclass -class Attachment: - """ - Attachments are used to refer to external resources, such as images, videos, audio, etc. - - """ - - url: URL - mime_type: str - -# TODO(ashwin): make this better named maybe InterleavedTextMedia -Content = Union[ - str, - Attachment, - List[Union[str, Attachment]], -] - - -class Role(Enum): - system = "system" - user = "user" - assistant = "assistant" - tool = "tool" - - -@dataclass -class ToolCall: - """ - A tool call is a request to a tool. - """ - - tool_name: str - arguments: Dict[str, Any] - - -@dataclass -class ToolResponse: - tool_name: str - content: Content - - -# TODO: we need to document the parameters for the tool calls -class BuiltinTool(Enum): - web_search = "web_search" - math = "math" - image_gen = "image_gen" - code_interpreter = "code_interpreter" - - -@dataclass -class ToolDefinition: - tool_name: Union[BuiltinTool, str] - parameters: Optional[Dict[str, Any]] = None - input_shields: List[ShieldConfig] = field(default_factory=list) - output_shields: List[ShieldConfig] = field(default_factory=list) - - -class StopReason(Enum): - """ - Stop reasons are used to indicate why the model stopped generating text. - """ - - not_stopped = "not_stopped" - finished_ok = "finished_ok" - max_tokens = "max_tokens" - - -@json_schema_type -@dataclass -class Message: - role: Role - - # input to the model or output from the model - content: Content - - # output from the model - tool_calls: List[ToolCall] = field(default_factory=list) - - # input to the model - tool_responses: List[ToolResponse] = field(default_factory=list) - - -@json_schema_type -@dataclass -class Dialog: - message: Message - message_history: List[Message] = None - - -@dataclass -class SamplingParams: - temperature: float = 0.0 - strategy: str = "greedy" - top_p: float = 0.95 - top_k: int = 0 - - -class PretrainedModel(Enum): - llama3_8b = "llama3_8b" - llama3_70b = "llama3_70b" - - -class InstructModel(Enum): - llama3_8b_chat = "llama3_8b_chat" - llama3_70b_chat = "llama3_70b_chat" - -class RewardModel(Enum): - llama3_405b_reward = "llama3_405b_reward" diff --git a/source/openapi.html b/source/openapi.html deleted file mode 100644 index a8b758635..000000000 --- a/source/openapi.html +++ /dev/null @@ -1,3597 +0,0 @@ - - - - - - - OpenAPI specification - - - - - - - -
- - - diff --git a/source/openapi.yaml b/source/openapi.yaml deleted file mode 100644 index 418cbb1b4..000000000 --- a/source/openapi.yaml +++ /dev/null @@ -1,2249 +0,0 @@ -components: - responses: {} - schemas: - AgenticSystemCreateRequest: - additionalProperties: false - properties: - available_tools: - items: - additionalProperties: false - properties: - input_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - output_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - parameters: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - oneOf: - - enum: - - web_search - - math - - image_gen - - code_interpreter - type: string - - type: string - required: - - tool_name - - input_shields - - output_shields - type: object - type: array - executable_tools: - items: - type: string - type: array - uniqueItems: true - input_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - instructions: - type: string - memory_bank_uuids: - items: - type: string - type: array - model: - enum: - - llama3_8b_chat - - llama3_70b_chat - type: string - output_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - uuid: - type: string - required: - - uuid - - instructions - - model - - available_tools - - executable_tools - - memory_bank_uuids - - input_shields - - output_shields - type: object - AgenticSystemCreateResponse: - additionalProperties: false - properties: - agent_uuid: - type: string - required: - - agent_uuid - type: object - AgenticSystemExecuteRequest: - additionalProperties: false - properties: - agent_uuid: - type: string - messages: - items: - $ref: '#/components/schemas/Message' - type: array - stream: - default: false - type: boolean - turn_history: - items: - $ref: '#/components/schemas/AgenticSystemTurn' - type: array - required: - - agent_uuid - - messages - - turn_history - - stream - type: object - AgenticSystemExecuteResponse: - additionalProperties: false - properties: - turn: - $ref: '#/components/schemas/AgenticSystemTurn' - required: - - turn - title: non-stream response from the agentic system. - type: object - AgenticSystemExecuteResponseStreamChunk: - additionalProperties: false - properties: - event_type: - enum: - - step_start - - step_end - - step_progress - title: The type of event. - type: string - response_text_delta: - type: string - retrieved_document: - additionalProperties: false - properties: - content: - contentEncoding: base64 - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - mime_type: - type: string - uuid: - type: string - required: - - uuid - - content - - metadata - - mime_type - type: object - step_type: - enum: - - model_inference - - tool_execution - - safety_filtering - - memory_retrieval - title: The type of execution step. - type: string - step_uuid: - type: string - stop_reason: - enum: - - not_stopped - - finished_ok - - max_tokens - title: Stop reasons are used to indicate why the model stopped generating - text. - type: string - tool_call: - additionalProperties: false - properties: - arguments: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - arguments - title: A tool call is a request to a tool. - type: object - tool_response_delta: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - tool_name: - type: string - required: - - tool_name - - content - type: object - violation: - additionalProperties: false - properties: - details: - type: string - suggested_user_response: - type: string - violation_type: - type: string - required: - - violation_type - - details - type: object - required: - - event_type - - step_uuid - - step_type - title: Streamed agent execution response. - type: object - AgenticSystemTurn: - additionalProperties: false - properties: - response_message: - $ref: '#/components/schemas/Message' - steps: - items: - oneOf: - - additionalProperties: false - properties: - logprobs: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - step_type: - default: model_inference - enum: - - model_inference - - tool_execution - - safety_filtering - - memory_retrieval - title: The type of execution step. - type: string - text: - type: string - uuid: - type: string - required: - - step_type - - uuid - - text - type: object - - additionalProperties: false - properties: - step_type: - default: tool_execution - enum: - - model_inference - - tool_execution - - safety_filtering - - memory_retrieval - title: The type of execution step. - type: string - tool_calls: - items: - additionalProperties: false - properties: - arguments: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - arguments - title: A tool call is a request to a tool. - type: object - type: array - tool_responses: - items: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - tool_name: - type: string - required: - - tool_name - - content - type: object - type: array - uuid: - type: string - required: - - step_type - - uuid - - tool_calls - - tool_responses - type: object - - additionalProperties: false - properties: - step_type: - default: safety_filtering - enum: - - model_inference - - tool_execution - - safety_filtering - - memory_retrieval - title: The type of execution step. - type: string - uuid: - type: string - violation: - additionalProperties: false - properties: - details: - type: string - suggested_user_response: - type: string - violation_type: - type: string - required: - - violation_type - - details - type: object - required: - - step_type - - uuid - type: object - - additionalProperties: false - properties: - documents: - items: - additionalProperties: false - properties: - content: - contentEncoding: base64 - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - mime_type: - type: string - uuid: - type: string - required: - - uuid - - content - - metadata - - mime_type - type: object - type: array - scores: - items: - type: number - type: array - step_type: - default: memory_retrieval - enum: - - model_inference - - tool_execution - - safety_filtering - - memory_retrieval - title: The type of execution step. - type: string - uuid: - type: string - required: - - step_type - - uuid - - documents - - scores - type: object - type: array - user_messages: - items: - $ref: '#/components/schemas/Message' - type: array - required: - - user_messages - - steps - - response_message - title: A single turn in an interaction with an Agentic System. - type: object - Attachment: - additionalProperties: false - properties: - mime_type: - type: string - url: - $ref: '#/components/schemas/URL' - required: - - url - - mime_type - title: Attachments are used to refer to external resources, such as images, - videos, audio, etc. - type: object - BatchChatCompletionRequest: - additionalProperties: false - properties: - available_tools: - items: - additionalProperties: false - properties: - input_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - output_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - parameters: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - oneOf: - - enum: - - web_search - - math - - image_gen - - code_interpreter - type: string - - type: string - required: - - tool_name - - input_shields - - output_shields - type: object - type: array - batch_dialogs: - items: - $ref: '#/components/schemas/Dialog' - type: array - logprobs: - default: false - type: boolean - max_tokens: - default: 0 - type: integer - model: - enum: - - llama3_8b_chat - - llama3_70b_chat - type: string - sampling_params: - additionalProperties: false - properties: - strategy: - default: greedy - type: string - temperature: - default: 0.0 - type: number - top_k: - default: 0 - type: integer - top_p: - default: 0.95 - type: number - required: - - temperature - - strategy - - top_p - - top_k - type: object - required: - - model - - batch_dialogs - - sampling_params - - available_tools - - max_tokens - - logprobs - type: object - BatchCompletionRequest: - additionalProperties: false - properties: - content_batch: - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - type: array - logprobs: - default: false - type: boolean - max_tokens: - default: 0 - type: integer - model: - enum: - - llama3_8b - - llama3_70b - type: string - sampling_params: - additionalProperties: false - properties: - strategy: - default: greedy - type: string - temperature: - default: 0.0 - type: number - top_k: - default: 0 - type: integer - top_p: - default: 0.95 - type: number - required: - - temperature - - strategy - - top_p - - top_k - type: object - required: - - model - - content_batch - - sampling_params - - max_tokens - - logprobs - type: object - ChatCompletionRequest: - additionalProperties: false - properties: - available_tools: - items: - additionalProperties: false - properties: - input_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - output_shields: - items: - $ref: '#/components/schemas/ShieldConfig' - type: array - parameters: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - oneOf: - - enum: - - web_search - - math - - image_gen - - code_interpreter - type: string - - type: string - required: - - tool_name - - input_shields - - output_shields - type: object - type: array - dialog: - $ref: '#/components/schemas/Dialog' - logprobs: - default: false - type: boolean - max_tokens: - default: 0 - type: integer - model: - enum: - - llama3_8b_chat - - llama3_70b_chat - type: string - sampling_params: - additionalProperties: false - properties: - strategy: - default: greedy - type: string - temperature: - default: 0.0 - type: number - top_k: - default: 0 - type: integer - top_p: - default: 0.95 - type: number - required: - - temperature - - strategy - - top_p - - top_k - type: object - stream: - default: false - type: boolean - required: - - model - - dialog - - sampling_params - - available_tools - - max_tokens - - stream - - logprobs - type: object - ChatCompletionResponse: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - logprobs: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - stop_reason: - enum: - - not_stopped - - finished_ok - - max_tokens - title: Stop reasons are used to indicate why the model stopped generating - text. - type: string - tool_calls: - items: - additionalProperties: false - properties: - arguments: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - arguments - title: A tool call is a request to a tool. - type: object - type: array - required: - - content - - tool_calls - title: Normal chat completion response. - type: object - ChatCompletionResponseStreamChunk: - additionalProperties: false - properties: - stop_reason: - enum: - - not_stopped - - finished_ok - - max_tokens - title: Stop reasons are used to indicate why the model stopped generating - text. - type: string - text_delta: - type: string - tool_call: - additionalProperties: false - properties: - arguments: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - arguments - title: A tool call is a request to a tool. - type: object - required: - - text_delta - title: Streamed chat completion response. The actual response is a series of - such objects. - type: object - CompletionRequest: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - logprobs: - default: false - type: boolean - max_tokens: - default: 0 - type: integer - model: - enum: - - llama3_8b - - llama3_70b - type: string - sampling_params: - additionalProperties: false - properties: - strategy: - default: greedy - type: string - temperature: - default: 0.0 - type: number - top_k: - default: 0 - type: integer - top_p: - default: 0.95 - type: number - required: - - temperature - - strategy - - top_p - - top_k - type: object - stream: - default: false - type: boolean - required: - - content - - model - - sampling_params - - max_tokens - - stream - - logprobs - type: object - CompletionResponse: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - logprobs: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - stop_reason: - enum: - - not_stopped - - finished_ok - - max_tokens - title: Stop reasons are used to indicate why the model stopped generating - text. - type: string - required: - - content - title: Normal completion response. - type: object - CompletionResponseStreamChunk: - additionalProperties: false - properties: - logprobs: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - stop_reason: - enum: - - not_stopped - - finished_ok - - max_tokens - title: Stop reasons are used to indicate why the model stopped generating - text. - type: string - text_delta: - type: string - required: - - text_delta - title: streamed completion response. - type: object - CreateDatasetRequest: - additionalProperties: false - properties: - dataset: - $ref: '#/components/schemas/Dataset' - uuid: - type: string - required: - - uuid - - dataset - title: Request to create a dataset. - type: object - DPOAlignmentConfig: - additionalProperties: false - properties: - epsilon: - type: number - gamma: - type: number - reward_clip: - type: number - reward_scale: - type: number - required: - - reward_scale - - reward_clip - - epsilon - - gamma - type: object - Dataset: - additionalProperties: false - properties: - columns: - additionalProperties: - enum: - - dialog - - text - - media - - number - - json - type: string - type: object - content_url: - $ref: '#/components/schemas/URL' - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - required: - - columns - - content_url - - metadata - title: Dataset to be used for training or evaluating language models. - type: object - Dialog: - additionalProperties: false - properties: - message: - $ref: '#/components/schemas/Message' - message_history: - items: - $ref: '#/components/schemas/Message' - type: array - required: - - message - - message_history - type: object - DoraFinetuningConfig: - additionalProperties: false - properties: - alpha: - type: integer - apply_lora_to_mlp: - type: boolean - apply_lora_to_output: - type: boolean - lora_attn_modules: - items: - type: string - type: array - rank: - type: integer - required: - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha - type: object - KScoredPromptGenerations: - additionalProperties: false - properties: - k_scored_generations: - items: - $ref: '#/components/schemas/ScoredMessage' - type: array - prompt: - $ref: '#/components/schemas/Message' - required: - - prompt - - k_scored_generations - type: object - LoraFinetuningConfig: - additionalProperties: false - properties: - alpha: - type: integer - apply_lora_to_mlp: - type: boolean - apply_lora_to_output: - type: boolean - lora_attn_modules: - items: - type: string - type: array - rank: - type: integer - required: - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha - type: object - MemoryBank: - additionalProperties: false - properties: - name: - type: string - uuid: - type: string - required: - - uuid - - name - type: object - MemoryBankDocument: - additionalProperties: false - properties: - content: - contentEncoding: base64 - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - mime_type: - type: string - uuid: - type: string - required: - - uuid - - content - - metadata - - mime_type - type: object - Message: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - role: - enum: - - system - - user - - assistant - - tool - type: string - tool_calls: - items: - additionalProperties: false - properties: - arguments: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - tool_name: - type: string - required: - - tool_name - - arguments - title: A tool call is a request to a tool. - type: object - type: array - tool_responses: - items: - additionalProperties: false - properties: - content: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - - items: - oneOf: - - type: string - - $ref: '#/components/schemas/Attachment' - type: array - tool_name: - type: string - required: - - tool_name - - content - type: object - type: array - required: - - role - - content - - tool_calls - - tool_responses - type: object - OptimizerConfig: - additionalProperties: false - properties: - lr: - type: number - lr_min: - type: number - optimizer_type: - enum: - - adam - - adamw - - sgd - type: string - weight_decay: - type: number - required: - - optimizer_type - - lr - - lr_min - - weight_decay - type: object - PostTrainingJobArtifactsResponse: - additionalProperties: false - properties: - checkpoints: - items: - additionalProperties: false - properties: - iters: - type: integer - path: - $ref: '#/components/schemas/URL' - required: - - iters - - path - type: object - type: array - job_uuid: - type: string - required: - - job_uuid - - checkpoints - title: Artifacts of a finetuning job. - type: object - PostTrainingJobLogStream: - additionalProperties: false - properties: - job_uuid: - type: string - log_lines: - items: - type: string - type: array - required: - - job_uuid - - log_lines - title: Stream of logs from a finetuning job. - type: object - PostTrainingJobStatusResponse: - additionalProperties: false - properties: - checkpoints: - items: - additionalProperties: false - properties: - iters: - type: integer - path: - $ref: '#/components/schemas/URL' - required: - - iters - - path - type: object - type: array - completed_at: - format: date-time - type: string - job_uuid: - type: string - resources_allocated: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - scheduled_at: - format: date-time - type: string - started_at: - format: date-time - type: string - status: - enum: - - running - - completed - - failed - - scheduled - type: string - required: - - job_uuid - - status - - checkpoints - title: Status of a finetuning job. - type: object - PostTrainingRLHFRequest: - additionalProperties: false - properties: - algorithm: - enum: - - dpo - type: string - algorithm_config: - $ref: '#/components/schemas/DPOAlignmentConfig' - dataset: - $ref: '#/components/schemas/Dataset' - finetuned_model: - $ref: '#/components/schemas/URL' - hyperparam_search_config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - job_uuid: - type: string - logger_config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' - training_config: - $ref: '#/components/schemas/TrainingConfig' - validation_dataset: - $ref: '#/components/schemas/Dataset' - required: - - job_uuid - - finetuned_model - - dataset - - validation_dataset - - algorithm - - algorithm_config - - optimizer_config - - training_config - - hyperparam_search_config - - logger_config - title: Request to finetune a model. - type: object - PostTrainingSFTRequest: - additionalProperties: false - properties: - algorithm: - enum: - - full - - lora - - qlora - - dora - type: string - algorithm_config: - oneOf: - - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QLoraFinetuningConfig' - - $ref: '#/components/schemas/DoraFinetuningConfig' - dataset: - $ref: '#/components/schemas/Dataset' - hyperparam_search_config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - job_uuid: - type: string - logger_config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - model: - enum: - - llama3_8b - - llama3_70b - type: string - optimizer_config: - $ref: '#/components/schemas/OptimizerConfig' - training_config: - $ref: '#/components/schemas/TrainingConfig' - validation_dataset: - $ref: '#/components/schemas/Dataset' - required: - - job_uuid - - model - - dataset - - validation_dataset - - algorithm - - algorithm_config - - optimizer_config - - training_config - - hyperparam_search_config - - logger_config - title: Request to finetune a model. - type: object - QLoraFinetuningConfig: - additionalProperties: false - properties: - alpha: - type: integer - apply_lora_to_mlp: - type: boolean - apply_lora_to_output: - type: boolean - lora_attn_modules: - items: - type: string - type: array - rank: - type: integer - required: - - lora_attn_modules - - apply_lora_to_mlp - - apply_lora_to_output - - rank - - alpha - type: object - RewardScoringRequest: - additionalProperties: false - properties: - model: - enum: - - llama3_405b_reward - type: string - prompt_generations: - items: - additionalProperties: false - properties: - dialog: - $ref: '#/components/schemas/Dialog' - k_generations: - items: - $ref: '#/components/schemas/Message' - type: array - required: - - dialog - - k_generations - type: object - type: array - required: - - prompt_generations - - model - title: Request to score a reward function. A list of prompts and a list of responses - per prompt. - type: object - RewardScoringResponse: - additionalProperties: false - properties: - scored_generations: - items: - $ref: '#/components/schemas/KScoredPromptGenerations' - type: array - required: - - scored_generations - title: Response from the reward scoring. Batch of (prompt, response, score) - tuples that pass the threshold. - type: object - ScoredMessage: - additionalProperties: false - properties: - message: - $ref: '#/components/schemas/Message' - score: - type: number - required: - - message - - score - type: object - ShieldConfig: - additionalProperties: false - properties: - params: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - shield_type: - enum: - - llama_guard - - prompt_guard - - code_guard - title: The type of safety shield. - type: string - required: - - shield_type - - params - type: object - SyntheticDataGenerationRequest: - additionalProperties: false - properties: - filtering_function: - default: none - enum: - - none - - random - - top_k - - top_p - - top_k_top_p - - sigmoid - title: The type of filtering function. - type: string - prompts: - items: - $ref: '#/components/schemas/Message' - type: array - reward_scoring: - type: object - required: - - prompts - - filtering_function - title: Request to generate synthetic data. A small batch of prompts and a filtering - function - type: object - SyntheticDataGenerationResponse: - additionalProperties: false - properties: - statistics: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - synthetic_data: - items: - $ref: '#/components/schemas/KScoredPromptGenerations' - type: array - required: - - synthetic_data - title: Response from the synthetic data generation. Batch of (prompt, response, - score) tuples that pass the threshold. - type: object - TrainingConfig: - additionalProperties: false - properties: - batch_size: - type: integer - enable_activation_checkpointing: - type: boolean - fsdp_cpu_offload: - type: boolean - memory_efficient_fsdp_wrap: - type: boolean - n_epochs: - type: integer - n_iters: - type: integer - shuffle: - type: boolean - required: - - n_epochs - - batch_size - - shuffle - - n_iters - - enable_activation_checkpointing - - memory_efficient_fsdp_wrap - - fsdp_cpu_offload - type: object - URL: - format: uri - pattern: ^(https?://|file://|data:) - type: string -info: - description: "This is the specification of the llama stack that provides \n \ - \ a set of endpoints and their corresponding interfaces that are tailored\ - \ to \n best leverage Llama Models. The specification is still\ - \ in draft and subject to change." - title: '[DRAFT] Llama Stack Specification' - version: 0.0.1 -jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema -openapi: 3.1.0 -paths: - /agentic_system/create: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/AgenticSystemCreateRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/AgenticSystemCreateResponse' - description: OK - tags: - - AgenticSystem - /agentic_system/delete: - delete: - parameters: - - in: query - name: agent_id - required: true - schema: - type: string - responses: - '200': - description: OK - tags: - - AgenticSystem - /agentic_system/execute: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/AgenticSystemExecuteRequest' - required: true - responses: - '200': - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/AgenticSystemExecuteResponse' - - $ref: '#/components/schemas/AgenticSystemExecuteResponseStreamChunk' - description: non-stream response from the agentic system. **OR** Streamed - agent execution response. - tags: - - AgenticSystem - /datasets/create: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CreateDatasetRequest' - required: true - responses: - '200': - description: OK - tags: - - Datasets - /datasets/delete: - delete: - parameters: - - in: query - name: dataset_id - required: true - schema: - type: string - responses: - '200': - description: OK - tags: - - Datasets - /datasets/get: - get: - parameters: - - in: query - name: dataset_id - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/Dataset' - description: OK - tags: - - Datasets - /inference/batch_chat_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/BatchChatCompletionRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/ChatCompletionResponse' - description: OK - tags: - - Inference - /inference/batch_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/BatchCompletionRequest' - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/CompletionResponse' - description: OK - tags: - - Inference - /inference/chat_completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/ChatCompletionRequest' - required: true - responses: - '200': - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/ChatCompletionResponse' - - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' - description: Normal chat completion response. **OR** Streamed chat completion - response. The actual response is a series of such objects. - tags: - - Inference - /inference/completion: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/CompletionRequest' - required: true - responses: - '200': - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/CompletionResponse' - - $ref: '#/components/schemas/CompletionResponseStreamChunk' - description: Normal completion response. **OR** streamed completion response. - tags: - - Inference - /memory_bank/delete: - post: - parameters: - - in: query - name: bank_uuid - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - items: - type: string - type: array - required: true - responses: - '200': - content: - application/jsonl: - schema: - type: string - description: OK - tags: - - MemoryBanks - /memory_bank/get: - post: - parameters: - - in: query - name: bank_uuid - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - items: - type: string - type: array - required: true - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/MemoryBankDocument' - description: OK - tags: - - MemoryBanks - /memory_bank/insert: - post: - parameters: - - in: query - name: bank_uuid - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - items: - additionalProperties: false - properties: - content: - contentEncoding: base64 - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - mime_type: - type: string - uuid: - type: string - required: - - uuid - - content - - metadata - - mime_type - type: object - type: array - required: true - responses: - '200': - description: OK - tags: - - MemoryBanks - /memory_bank/update: - post: - parameters: - - in: query - name: bank_uuid - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - items: - additionalProperties: false - properties: - content: - contentEncoding: base64 - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - mime_type: - type: string - uuid: - type: string - required: - - uuid - - content - - metadata - - mime_type - type: object - type: array - required: true - responses: - '200': - description: OK - tags: - - MemoryBanks - /memory_banks/create: - post: - parameters: - - in: query - name: bank_uuid - required: true - schema: - type: string - - in: query - name: bank_name - required: true - schema: - type: string - requestBody: - content: - application/json: - schema: - items: - additionalProperties: false - properties: - content: - contentEncoding: base64 - type: string - metadata: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - mime_type: - type: string - uuid: - type: string - required: - - uuid - - content - - metadata - - mime_type - type: object - type: array - required: true - responses: - '200': - description: OK - tags: - - MemoryBanks - /memory_banks/drop: - delete: - parameters: - - in: query - name: bank_uuid - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - type: string - description: OK - tags: - - MemoryBanks - /memory_banks/get: - get: - parameters: [] - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/MemoryBank' - description: OK - tags: - - MemoryBanks - /post_training/job/artifacts: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' - description: OK - tags: - - PostTraining - /post_training/job/logs: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingJobLogStream' - description: OK - tags: - - PostTraining - /post_training/job/status: - get: - parameters: - - in: query - name: job_uuid - required: true - schema: - type: string - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingJobStatusResponse' - description: OK - tags: - - PostTraining - /post_training/preference_optimize/: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingRLHFRequest' - required: true - responses: - '200': - description: OK - tags: - - PostTraining - /post_training/supervised_fine_tune/: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/PostTrainingSFTRequest' - required: true - responses: - '200': - description: OK - tags: - - PostTraining - /reward_scoring/score: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/RewardScoringRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/RewardScoringResponse' - description: OK - tags: - - RewardScoring - /synthetic_data_generation/generate: - post: - parameters: [] - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerationRequest' - required: true - responses: - '200': - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerationResponse' - description: OK - tags: - - SyntheticDataGeneration -security: -- Default: [] -servers: -- url: http://any-hosted-llama-stack.com -tags: -- name: Datasets -- name: SyntheticDataGeneration -- name: Inference -- name: MemoryBanks -- name: RewardScoring -- name: AgenticSystem -- name: PostTraining -- description: - name: ShieldConfig -- description: - name: AgenticSystemCreateRequest -- description: - name: AgenticSystemCreateResponse -- description: - name: AgenticSystemExecuteRequest -- description: 'A single turn in an interaction with an Agentic System. - - - ' - name: AgenticSystemTurn -- description: 'Attachments are used to refer to external resources, such as images, - videos, audio, etc. - - - ' - name: Attachment -- description: - name: Message -- description: - name: URL -- description: 'non-stream response from the agentic system. - - - ' - name: AgenticSystemExecuteResponse -- description: 'Streamed agent execution response. - - - ' - name: AgenticSystemExecuteResponseStreamChunk -- description: 'Request to create a dataset. - - - ' - name: CreateDatasetRequest -- description: 'Dataset to be used for training or evaluating language models. - - - ' - name: Dataset -- description: - name: MemoryBank -- description: - name: MemoryBankDocument -- description: 'Artifacts of a finetuning job. - - - ' - name: PostTrainingJobArtifactsResponse -- description: 'Status of a finetuning job. - - - ' - name: PostTrainingJobStatusResponse -- description: 'Stream of logs from a finetuning job. - - - ' - name: PostTrainingJobLogStream -- description: - name: BatchChatCompletionRequest -- description: - name: Dialog -- description: 'Normal chat completion response. - - - ' - name: ChatCompletionResponse -- description: - name: BatchCompletionRequest -- description: 'Normal completion response. - - - ' - name: CompletionResponse -- description: - name: ChatCompletionRequest -- description: 'Streamed chat completion response. The actual response is a series - of such objects. - - - ' - name: ChatCompletionResponseStreamChunk -- description: - name: CompletionRequest -- description: 'streamed completion response. - - - ' - name: CompletionResponseStreamChunk -- description: 'Request to generate synthetic data. A small batch of prompts and a - filtering function - - - ' - name: SyntheticDataGenerationRequest -- description: - name: KScoredPromptGenerations -- description: - name: ScoredMessage -- description: 'Response from the synthetic data generation. Batch of (prompt, response, - score) tuples that pass the threshold. - - - ' - name: SyntheticDataGenerationResponse -- description: - name: DPOAlignmentConfig -- description: - name: OptimizerConfig -- description: 'Request to finetune a model. - - - ' - name: PostTrainingRLHFRequest -- description: - name: TrainingConfig -- description: 'Request to score a reward function. A list of prompts and a list of - responses per prompt. - - - ' - name: RewardScoringRequest -- description: 'Response from the reward scoring. Batch of (prompt, response, score) - tuples that pass the threshold. - - - ' - name: RewardScoringResponse -- description: - name: DoraFinetuningConfig -- description: - name: LoraFinetuningConfig -- description: 'Request to finetune a model. - - - ' - name: PostTrainingSFTRequest -- description: - name: QLoraFinetuningConfig -x-tagGroups: -- name: Operations - tags: - - AgenticSystem - - Datasets - - Inference - - MemoryBanks - - PostTraining - - RewardScoring - - SyntheticDataGeneration -- name: Types - tags: - - AgenticSystemCreateRequest - - AgenticSystemCreateResponse - - AgenticSystemExecuteRequest - - AgenticSystemExecuteResponse - - AgenticSystemExecuteResponseStreamChunk - - AgenticSystemTurn - - Attachment - - BatchChatCompletionRequest - - BatchCompletionRequest - - ChatCompletionRequest - - ChatCompletionResponse - - ChatCompletionResponseStreamChunk - - CompletionRequest - - CompletionResponse - - CompletionResponseStreamChunk - - CreateDatasetRequest - - DPOAlignmentConfig - - Dataset - - Dialog - - DoraFinetuningConfig - - KScoredPromptGenerations - - LoraFinetuningConfig - - MemoryBank - - MemoryBankDocument - - Message - - OptimizerConfig - - PostTrainingJobArtifactsResponse - - PostTrainingJobLogStream - - PostTrainingJobStatusResponse - - PostTrainingRLHFRequest - - PostTrainingSFTRequest - - QLoraFinetuningConfig - - RewardScoringRequest - - RewardScoringResponse - - ScoredMessage - - ShieldConfig - - SyntheticDataGenerationRequest - - SyntheticDataGenerationResponse - - TrainingConfig - - URL diff --git a/source/post_training_types.py b/source/post_training_types.py deleted file mode 100644 index f67fce4d8..000000000 --- a/source/post_training_types.py +++ /dev/null @@ -1,119 +0,0 @@ -from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Dict, List, Optional, Set, Union - -from model_types import Message, URL - -from strong_typing.schema import json_schema_type - - -class DatasetColumnType(Enum): - dialog = "dialog" - text = "text" - media = "media" - number = "number" - json = "json" - - -@json_schema_type -@dataclass -class Dataset: - """Dataset to be used for training or evaluating language models.""" - - # TODO(ashwin): figure out if we need to add an enum for a "dataset type" - - columns: Dict[str, DatasetColumnType] - content_url: URL - metadata: Dict[str, Any] = field(default_factory=dict) - - -class OptimizerType(Enum): - adam = "adam" - adamw = "adamw" - sgd = "sgd" - - -@json_schema_type -@dataclass -class OptimizerConfig: - optimizer_type: OptimizerType - lr: float - lr_min: float - weight_decay: float - - -@json_schema_type -@dataclass -class TrainingConfig: - n_epochs: int - batch_size: int - shuffle: bool - n_iters: int - - enable_activation_checkpointing: bool - memory_efficient_fsdp_wrap: bool - fsdp_cpu_offload: bool - - -class FinetuningAlgorithm(Enum): - full = "full" - lora = "lora" - qlora = "qlora" - dora = "dora" - - -@json_schema_type -@dataclass -class LoraFinetuningConfig: - lora_attn_modules: List[str] - apply_lora_to_mlp: bool - apply_lora_to_output: bool - rank: int - alpha: int - - -@json_schema_type -@dataclass -class QLoraFinetuningConfig(LoraFinetuningConfig): - pass - - -@json_schema_type -@dataclass -class DoraFinetuningConfig(LoraFinetuningConfig): - pass - - -@json_schema_type -@dataclass -class PostTrainingJobLogStream: - """Stream of logs from a finetuning job.""" - - job_uuid: str - log_lines: List[str] - - -class PostTrainingJobStatus(Enum): - running = "running" - completed = "completed" - failed = "failed" - scheduled = "scheduled" - - -@dataclass -class Checkpoint: - iters: int - path: URL - - -class RLHFAlgorithm(Enum): - dpo = "dpo" - - -@json_schema_type -@dataclass -class DPOAlignmentConfig: - reward_scale: float - reward_clip: float - epsilon: float - gamma: float diff --git a/source/server.py b/source/server.py deleted file mode 100644 index e69e7bb06..000000000 --- a/source/server.py +++ /dev/null @@ -1,47 +0,0 @@ -from flask import Flask, request, jsonify -from dataclasses import dataclass, field -from typing import List, Set, Optional, Union, Protocol -from enum import Enum - -app = Flask(__name__) - -from model_types import * -from agentic_system_types import * -from api_definitions import * - -class AgenticSystemImpl(AgenticSystem): - def create_agentic_system(self, request: AgenticSystemCreateRequest) -> AgenticSystemCreateResponse: - # Mock implementation - return AgenticSystemCreateResponse(agent_id="12345") - - def create_agentic_system_execute(self, request: AgenticSystemExecuteRequest) -> Union[AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk]: - # Mock implementation - return AgenticSystemExecuteResponse( - turn=AgenticSystemTurn( - user_messages=[], - steps=[], - response_message=Message( - role="assistant", - content="Hello, I am an agent. I can help you with your tasks. What can I help you with?", - ) - ) - ) - -agentic_system = AgenticSystemImpl() - -@app.route("/agentic_system/create", methods=["POST"]) -def create_agentic_system(): - data = request.json - create_request = AgenticSystemCreateRequest(**data) - response = agentic_system.create_agentic_system(create_request) - return jsonify(response) - -@app.route("/agentic_system/execute", methods=["POST"]) -def create_agentic_system_execute(): - data = request.json - execute_request = AgenticSystemExecuteRequest(**data) - response = agentic_system.create_agentic_system_execute(execute_request) - return jsonify(response) - -if __name__ == "__main__": - app.run(debug=True)