diff --git a/source/agentic_system_types.py b/source/agentic_system_types.py
deleted file mode 100644
index 91823586f..000000000
--- a/source/agentic_system_types.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, Dict, List, Optional, Set, Union
-
-from model_types import (
- BuiltinTool,
- Content,
- InstructModel,
- Message,
- PretrainedModel,
- SamplingParams,
- SafetyViolation,
- StopReason,
- ToolCall,
- ToolDefinition,
- ToolResponse,
-)
-
-from strong_typing.schema import json_schema_type
-
-
-class ExecutionStepType(Enum):
- """The type of execution step."""
-
- model_inference = "model_inference"
- tool_execution = "tool_execution"
- safety_filtering = "safety_filtering"
- memory_retrieval = "memory_retrieval"
-
-
-@dataclass
-class ExecutionStepBase:
- """An agentic system turn can consist of one or more such execution steps."""
-
- step_type: ExecutionStepType
- uuid: str
-
-
-@dataclass
-class ModelInferenceStep(ExecutionStepBase):
- step_type = ExecutionStepType.model_inference
- text: str
- logprobs: Optional[Dict[str, Any]] = None
-
-
-@dataclass
-class ToolExecutionStep(ExecutionStepBase):
- step_type = ExecutionStepType.tool_execution
-
- # we could be calling multiple tools in a single step (in parallel)
- tool_calls: List[ToolCall]
- tool_responses: List[ToolResponse]
-
-
-@dataclass
-class SafetyFilteringStep(ExecutionStepBase):
- step_type = ExecutionStepType.safety_filtering
- violation: Optional[SafetyViolation] = None
-
-
-@json_schema_type
-@dataclass
-class MemoryBank:
- uuid: str
- name: str
-
-
-@dataclass
-class MemoryBankDocument:
- uuid: str
- content: bytes
- metadata: Dict[str, Any]
- mime_type: str
-
-
-@dataclass
-class MemoryRetrievalStep(ExecutionStepBase):
- step_type = ExecutionStepType.memory_retrieval
- documents: List[MemoryBankDocument]
- scores: List[float]
-
-
-ExecutionStep = Union[
- ModelInferenceStep,
- ToolExecutionStep,
- SafetyFilteringStep,
- MemoryRetrievalStep,
-]
-
-
-@json_schema_type
-@dataclass
-class AgenticSystemTurn:
- """A single turn in an interaction with an Agentic System."""
-
- user_messages: List[Message]
- steps: List[ExecutionStep]
- response_message: Message
diff --git a/source/api_definitions.py b/source/api_definitions.py
deleted file mode 100644
index d09ccb5fd..000000000
--- a/source/api_definitions.py
+++ /dev/null
@@ -1,563 +0,0 @@
-from dataclasses import dataclass, field
-from datetime import datetime
-from enum import Enum
-from typing import Any, Dict, List, Optional, Protocol, Set, Tuple, Union
-
-import yaml
-from agentic_system_types import (
- AgenticSystemTurn,
- ExecutionStepType,
- MemoryBank,
- MemoryBankDocument,
- SafetyViolation,
-)
-
-from model_types import (
- BuiltinTool,
- Content,
- Dialog,
- InstructModel,
- Message,
- PretrainedModel,
- RewardModel,
- SamplingParams,
- ShieldConfig,
- StopReason,
- ToolCall,
- ToolDefinition,
- ToolResponse,
- URL,
-)
-
-from post_training_types import (
- Checkpoint,
- Dataset,
- DoraFinetuningConfig,
- DPOAlignmentConfig,
- FinetuningAlgorithm,
- LoraFinetuningConfig,
- OptimizerConfig,
- PostTrainingJobLogStream,
- PostTrainingJobStatus,
- QLoraFinetuningConfig,
- RLHFAlgorithm,
- TrainingConfig,
-)
-
-from pyopenapi import Info, Options, Server, Specification, webmethod
-from strong_typing.schema import json_schema_type
-
-
-@json_schema_type
-@dataclass
-class CompletionRequest:
- content: Content
- model: PretrainedModel
- sampling_params: SamplingParams = SamplingParams()
- max_tokens: int = 0
- stream: bool = False
- logprobs: bool = False
-
-
-@json_schema_type
-@dataclass
-class CompletionResponse:
- """Normal completion response."""
-
- content: Content
- stop_reason: Optional[StopReason] = None
- logprobs: Optional[Dict[str, Any]] = None
-
-
-@json_schema_type
-@dataclass
-class CompletionResponseStreamChunk:
- """streamed completion response."""
-
- text_delta: str
- stop_reason: Optional[StopReason] = None
- logprobs: Optional[Dict[str, Any]] = None
-
-
-@json_schema_type
-@dataclass
-class ChatCompletionRequest:
- model: InstructModel
- dialog: Dialog
- sampling_params: SamplingParams = SamplingParams()
-
- # zero-shot tool definitions as input to the model
- available_tools: List[ToolDefinition] = field(default_factory=list)
-
- max_tokens: int = 0
- stream: bool = False
- logprobs: bool = False
-
-
-@json_schema_type
-@dataclass
-class ChatCompletionResponse:
- """Normal chat completion response."""
-
- content: Content
-
- # note: multiple tool calls can be generated in a single response
- tool_calls: List[ToolCall] = field(default_factory=list)
-
- stop_reason: Optional[StopReason] = None
- logprobs: Optional[Dict[str, Any]] = None
-
-
-@json_schema_type
-@dataclass
-class ChatCompletionResponseStreamChunk:
- """Streamed chat completion response. The actual response is a series of such objects."""
-
- text_delta: str
- stop_reason: Optional[StopReason] = None
- tool_call: Optional[ToolCall] = None
-
-
-@json_schema_type
-@dataclass
-class BatchCompletionRequest:
- model: PretrainedModel
- content_batch: List[Content]
- sampling_params: SamplingParams = SamplingParams()
- max_tokens: int = 0
- logprobs: bool = False
-
-
-@json_schema_type
-@dataclass
-class BatchChatCompletionRequest:
- model: InstructModel
- batch_dialogs: List[Dialog]
- sampling_params: SamplingParams = SamplingParams()
-
- # zero-shot tool definitions as input to the model
- available_tools: List[ToolDefinition] = field(default_factory=list)
-
- max_tokens: int = 0
- logprobs: bool = False
-
-
-class Inference(Protocol):
-
- @webmethod(route="/inference/completion")
- def post_completion(
- self,
- request: CompletionRequest,
- ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
-
- @webmethod(route="/inference/chat_completion")
- def post_chat_completion(
- self,
- request: ChatCompletionRequest,
- ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
-
- @webmethod(route="/inference/batch_completion")
- def post_batch_completion(
- self,
- request: BatchCompletionRequest,
- ) -> List[CompletionResponse]: ...
-
- @webmethod(route="/inference/batch_chat_completion")
- def post_batch_chat_completion(
- self,
- request: BatchChatCompletionRequest,
- ) -> List[ChatCompletionResponse]: ...
-
-
-@dataclass
-class AgenticSystemCreateRequest:
- uuid: str
-
- instructions: str
- model: InstructModel
-
- # zero-shot or built-in tool configurations as input to the model
- available_tools: List[ToolDefinition] = field(default_factory=list)
-
- # tools which aren't executable are emitted as tool calls which the users can
- # execute themselves.
- executable_tools: Set[str] = field(default_factory=set)
-
- memory_bank_uuids: List[str] = field(default_factory=list)
-
- input_shields: List[ShieldConfig] = field(default_factory=list)
- output_shields: List[ShieldConfig] = field(default_factory=list)
-
-
-@json_schema_type
-@dataclass
-class AgenticSystemCreateResponse:
- agent_uuid: str
-
-
-@json_schema_type
-@dataclass
-class AgenticSystemExecuteRequest:
- agent_uuid: str
- messages: List[Message]
- turn_history: List[AgenticSystemTurn] = None
- stream: bool = False
-
-
-@json_schema_type
-@dataclass
-class AgenticSystemExecuteResponse:
- """non-stream response from the agentic system."""
-
- turn: AgenticSystemTurn
-
-
-class AgenticSystemExecuteResponseEventType(Enum):
- """The type of event."""
-
- step_start = "step_start"
- step_end = "step_end"
- step_progress = "step_progress"
-
-
-@json_schema_type
-@dataclass
-class AgenticSystemExecuteResponseStreamChunk:
- """Streamed agent execution response."""
-
- event_type: AgenticSystemExecuteResponseEventType
-
- step_uuid: str
- step_type: ExecutionStepType
-
- # TODO(ashwin): maybe add more structure here and do this as a proper tagged union
- violation: Optional[SafetyViolation] = None
- tool_call: Optional[ToolCall] = None
- tool_response_delta: Optional[ToolResponse] = None
- response_text_delta: Optional[str] = None
- retrieved_document: Optional[MemoryBankDocument] = None
-
- stop_reason: Optional[StopReason] = None
-
-
-class AgenticSystem(Protocol):
-
- @webmethod(route="/agentic_system/create")
- def create_agentic_system(
- self,
- request: AgenticSystemCreateRequest,
- ) -> AgenticSystemCreateResponse: ...
-
- @webmethod(route="/agentic_system/execute")
- def create_agentic_system_execute(
- self,
- request: AgenticSystemExecuteRequest,
- ) -> Union[
- AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk
- ]: ...
-
- @webmethod(route="/agentic_system/delete")
- def delete_agentic_system(
- self,
- agent_id: str,
- ) -> None: ...
-
-
-class MemoryBanks(Protocol):
- @webmethod(route="/memory_banks/create")
- def post_create_memory_bank(
- self,
- bank_uuid: str,
- bank_name: str,
- documents: List[MemoryBankDocument],
- ) -> None: ...
-
- @webmethod(route="/memory_banks/get")
- def get_memory_banks(
- self
- ) -> List[MemoryBank]: ...
-
- @webmethod(route="/memory_banks/drop")
- def delete_memory_bank(
- self,
- bank_uuid: str,
- ) -> str: ...
-
- @webmethod(route="/memory_bank/insert")
- def post_insert_memory_documents(
- self,
- bank_uuid: str,
- documents: List[MemoryBankDocument],
- ) -> None: ...
-
- @webmethod(route="/memory_bank/update")
- def post_update_memory_documents(
- self,
- bank_uuid: str,
- documents: List[MemoryBankDocument],
- ) -> None: ...
-
- @webmethod(route="/memory_bank/get")
- def get_memory_documents(
- self,
- bank_uuid: str,
- document_uuids: List[str],
- ) -> List[MemoryBankDocument]: ...
-
- @webmethod(route="/memory_bank/delete")
- def delete_memory_documents(
- self,
- bank_uuid: str,
- document_uuids: List[str],
- ) -> List[str]: ...
-
-
-@dataclass
-class KPromptGenerations:
- dialog: Dialog
- k_generations: List[Message]
-
-
-@json_schema_type
-@dataclass
-class ScoredMessage:
- message: Message
- score: float
-
-
-@json_schema_type
-@dataclass
-class KScoredPromptGenerations:
- prompt: Message
- k_scored_generations: List[ScoredMessage]
-
-
-@json_schema_type
-@dataclass
-class RewardScoringRequest:
- """Request to score a reward function. A list of prompts and a list of responses per prompt."""
-
- prompt_generations: List[KPromptGenerations]
- model: RewardModel
-
-
-@json_schema_type
-@dataclass
-class RewardScoringResponse:
- """Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."""
-
- scored_generations: List[KScoredPromptGenerations]
-
-
-class RewardScoring(Protocol):
- @webmethod(route="/reward_scoring/score")
- def post_score(
- self,
- request: RewardScoringRequest,
- ) -> Union[RewardScoringResponse]: ...
-
-
-class FilteringFunction(Enum):
- """The type of filtering function."""
-
- none = "none"
- random = "random"
- top_k = "top_k"
- top_p = "top_p"
- top_k_top_p = "top_k_top_p"
- sigmoid = "sigmoid"
-
-
-@json_schema_type
-@dataclass
-class SyntheticDataGenerationRequest:
- """Request to generate synthetic data. A small batch of prompts and a filtering function"""
-
- prompts: List[Message]
- filtering_function: FilteringFunction = FilteringFunction.none
- reward_scoring: Optional[RewardScoring] = None
-
-
-@json_schema_type
-@dataclass
-class SyntheticDataGenerationResponse:
- """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
-
- synthetic_data: List[KScoredPromptGenerations]
- statistics: Optional[Dict[str, Any]] = None
-
-
-class SyntheticDataGeneration(Protocol):
- @webmethod(route="/synthetic_data_generation/generate")
- def post_generate(
- self,
- request: SyntheticDataGenerationRequest,
- ) -> Union[SyntheticDataGenerationResponse]: ...
-
-
-@json_schema_type
-@dataclass
-class CreateDatasetRequest:
- """Request to create a dataset."""
-
- uuid: str
- dataset: Dataset
-
-
-class Datasets(Protocol):
- @webmethod(route="/datasets/create")
- def create_dataset(
- self,
- request: CreateDatasetRequest,
- ) -> None: ...
-
- @webmethod(route="/datasets/get")
- def get_dataset(
- self,
- dataset_id: str,
- ) -> Dataset: ...
-
- @webmethod(route="/datasets/delete")
- def delete_dataset(
- self,
- dataset_id: str,
- ) -> None: ...
-
-
-@json_schema_type
-@dataclass
-class PostTrainingSFTRequest:
- """Request to finetune a model."""
-
- job_uuid: str
-
- model: PretrainedModel
- dataset: Dataset
- validation_dataset: Dataset
-
- algorithm: FinetuningAlgorithm
- algorithm_config: Union[
- LoraFinetuningConfig, QLoraFinetuningConfig, DoraFinetuningConfig
- ]
-
- optimizer_config: OptimizerConfig
- training_config: TrainingConfig
-
- # TODO: define these
- hyperparam_search_config: Dict[str, Any]
- logger_config: Dict[str, Any]
-
-
-@json_schema_type
-@dataclass
-class PostTrainingRLHFRequest:
- """Request to finetune a model."""
-
- job_uuid: str
-
- finetuned_model: URL
-
- dataset: Dataset
- validation_dataset: Dataset
-
- algorithm: RLHFAlgorithm
- algorithm_config: Union[DPOAlignmentConfig]
-
- optimizer_config: OptimizerConfig
- training_config: TrainingConfig
-
- # TODO: define these
- hyperparam_search_config: Dict[str, Any]
- logger_config: Dict[str, Any]
-
-
-@json_schema_type
-@dataclass
-class PostTrainingJobStatusResponse:
- """Status of a finetuning job."""
-
- job_uuid: str
- status: PostTrainingJobStatus
-
- scheduled_at: Optional[datetime] = None
- started_at: Optional[datetime] = None
- completed_at: Optional[datetime] = None
-
- resources_allocated: Optional[Dict[str, Any]] = None
-
- checkpoints: List[Checkpoint] = field(default_factory=list)
-
-
-@json_schema_type
-@dataclass
-class PostTrainingJobArtifactsResponse:
- """Artifacts of a finetuning job."""
-
- job_uuid: str
- checkpoints: List[Checkpoint] = field(default_factory=list)
-
- # TODO(ashwin): metrics, evals
-
-
-class PostTraining(Protocol):
- @webmethod(route="/post_training/supervised_fine_tune/")
- def post_supervised_fine_tune(
- self,
- request: PostTrainingSFTRequest,
- ) -> None: ...
-
- @webmethod(route="/post_training/preference_optimize/")
- def post_preference_optimize(
- self,
- request: PostTrainingRLHFRequest,
- ) -> None: ...
-
- # sends SSE stream of logs
- @webmethod(route="/post_training/job/logs")
- def get_training_log_stream(self, job_uuid: str) -> PostTrainingJobLogStream: ...
-
- @webmethod(route="/post_training/job/status")
- def get_training_job_status(
- self, job_uuid: str
- ) -> PostTrainingJobStatusResponse: ...
-
- @webmethod(route="/post_training/job/cancel")
- def cancel_training_job(self, job_uuid: str) -> None: ...
-
- @webmethod(route="/post_training/job/artifacts")
- def get_training_job_artifacts(
- self, job_uuid: str
- ) -> PostTrainingJobArtifactsResponse: ...
-
-
-class LlamaStackEndpoints(
- Inference,
- AgenticSystem,
- RewardScoring,
- SyntheticDataGeneration,
- Datasets,
- PostTraining,
- MemoryBanks,
-): ...
-
-
-if __name__ == "__main__":
- print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
- spec = Specification(
- LlamaStackEndpoints,
- Options(
- server=Server(url="http://any-hosted-llama-stack.com"),
- info=Info(
- title="[DRAFT] Llama Stack Specification",
- version="0.0.1",
- description="""This is the specification of the llama stack that provides
- a set of endpoints and their corresponding interfaces that are tailored to
- best leverage Llama Models. The specification is still in draft and subject to change.""",
- ),
- ),
- )
- with open("openapi.yaml", "w", encoding="utf-8") as fp:
- yaml.dump(spec.get_json(), fp, allow_unicode=True)
-
- with open("openapi.html", "w") as fp:
- spec.write_html(fp, pretty_print=True)
diff --git a/source/client.py b/source/client.py
deleted file mode 100644
index 222845b91..000000000
--- a/source/client.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import requests
-from dataclasses import dataclass, field, asdict
-from typing import List, Set, Optional, Union, Protocol
-from enum import Enum
-
-import json
-
-from model_types import *
-from agentic_system_types import *
-from api_definitions import *
-
-class EnumEncoder(json.JSONEncoder):
- def default(self, obj):
- if isinstance(obj, Enum):
- return obj.value
- elif isinstance(obj, set):
- return list(obj)
- return json.JSONEncoder.default(self, obj)
-
-
-class AgenticSystemClient:
- def __init__(self, base_url: str):
- self.base_url = base_url
-
- def create_agentic_system(self, request: AgenticSystemCreateRequest) -> AgenticSystemCreateResponse:
- response = requests.post(f"{self.base_url}/agentic_system/create", data=json.dumps(asdict(request), cls=EnumEncoder), headers={'Content-Type': 'application/json'})
- response.raise_for_status()
- return AgenticSystemCreateResponse(**response.json())
-
- def execute_agentic_system(self, request: AgenticSystemExecuteRequest) -> Union[AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk]:
- response = requests.post(f"{self.base_url}/agentic_system/execute", data=json.dumps(asdict(request), cls=EnumEncoder), headers={'Content-Type': 'application/json'})
- response.raise_for_status()
- response_json = response.json()
- if 'turn' in response_json:
- return AgenticSystemExecuteResponse(**response_json)
- else:
- return AgenticSystemExecuteResponseStreamChunk(**response_json)
-
-# Example usage
-if __name__ == "__main__":
- client = AgenticSystemClient("http://localhost:5000")
-
- # Create a new agentic system
- create_request = AgenticSystemCreateRequest(
- instructions="Your instructions here",
- model=InstructModel.llama3_8b_chat,
- )
- create_response = client.create_agentic_system(create_request)
- print("Agent ID:", create_response.agent_id)
-
- # Execute the agentic system
- execute_request = AgenticSystemExecuteRequest(
- agent_id=create_response.agent_id,
- messages=[Message(role="user", content="Tell me a joke")],
- turn_history=[],
- stream=False
- )
- execute_response = client.execute_agentic_system(execute_request)
- print("Execute Response:", execute_response)
diff --git a/source/codegen/openapi-generator-cli.jar b/source/codegen/openapi-generator-cli.jar
deleted file mode 100644
index 7102a497d..000000000
Binary files a/source/codegen/openapi-generator-cli.jar and /dev/null differ
diff --git a/source/create_code.sh b/source/create_code.sh
deleted file mode 100644
index fa927ba7d..000000000
--- a/source/create_code.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-set -euo pipefail
-set -x
-
-export JAVA_HOME=/usr/local/java-runtime/impl/11
-
-$JAVA_HOME/bin/java -jar codegen/openapi-generator-cli.jar \
- generate \
- -i openapi.yaml \
- -g python-flask \
- -o /tmp/foo \
- --log-to-stderr \
- --global-property debugModels,debugOperations,debugOpenAPI,debugSupportingFiles
diff --git a/source/generate.sh b/source/generate.sh
deleted file mode 100644
index b30929cfb..000000000
--- a/source/generate.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/bash
-
-PYTHONPATH=. python3 api_definitions.py
diff --git a/source/model_types.py b/source/model_types.py
deleted file mode 100644
index 9e6e3dc4b..000000000
--- a/source/model_types.py
+++ /dev/null
@@ -1,149 +0,0 @@
-from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, Dict, List, Optional, Set, Union
-
-from strong_typing.schema import json_schema_type
-
-
-class ShieldType(Enum):
- """The type of safety shield."""
-
- llama_guard = "llama_guard"
- prompt_guard = "prompt_guard"
- code_guard = "code_guard"
-
-
-@json_schema_type
-@dataclass
-class ShieldConfig:
- shield_type: ShieldType
- params: Dict[str, Any] = field(default_factory=dict)
-
-
-@dataclass
-class SafetyViolation:
- violation_type: str
- details: str
- suggested_user_response: Optional[str] = None
-
-
-@json_schema_type(
- schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
-)
-@dataclass
-class URL:
- url: str
-
- def __str__(self) -> str:
- return self.url
-
-
-@json_schema_type
-@dataclass
-class Attachment:
- """
- Attachments are used to refer to external resources, such as images, videos, audio, etc.
-
- """
-
- url: URL
- mime_type: str
-
-# TODO(ashwin): make this better named maybe InterleavedTextMedia
-Content = Union[
- str,
- Attachment,
- List[Union[str, Attachment]],
-]
-
-
-class Role(Enum):
- system = "system"
- user = "user"
- assistant = "assistant"
- tool = "tool"
-
-
-@dataclass
-class ToolCall:
- """
- A tool call is a request to a tool.
- """
-
- tool_name: str
- arguments: Dict[str, Any]
-
-
-@dataclass
-class ToolResponse:
- tool_name: str
- content: Content
-
-
-# TODO: we need to document the parameters for the tool calls
-class BuiltinTool(Enum):
- web_search = "web_search"
- math = "math"
- image_gen = "image_gen"
- code_interpreter = "code_interpreter"
-
-
-@dataclass
-class ToolDefinition:
- tool_name: Union[BuiltinTool, str]
- parameters: Optional[Dict[str, Any]] = None
- input_shields: List[ShieldConfig] = field(default_factory=list)
- output_shields: List[ShieldConfig] = field(default_factory=list)
-
-
-class StopReason(Enum):
- """
- Stop reasons are used to indicate why the model stopped generating text.
- """
-
- not_stopped = "not_stopped"
- finished_ok = "finished_ok"
- max_tokens = "max_tokens"
-
-
-@json_schema_type
-@dataclass
-class Message:
- role: Role
-
- # input to the model or output from the model
- content: Content
-
- # output from the model
- tool_calls: List[ToolCall] = field(default_factory=list)
-
- # input to the model
- tool_responses: List[ToolResponse] = field(default_factory=list)
-
-
-@json_schema_type
-@dataclass
-class Dialog:
- message: Message
- message_history: List[Message] = None
-
-
-@dataclass
-class SamplingParams:
- temperature: float = 0.0
- strategy: str = "greedy"
- top_p: float = 0.95
- top_k: int = 0
-
-
-class PretrainedModel(Enum):
- llama3_8b = "llama3_8b"
- llama3_70b = "llama3_70b"
-
-
-class InstructModel(Enum):
- llama3_8b_chat = "llama3_8b_chat"
- llama3_70b_chat = "llama3_70b_chat"
-
-class RewardModel(Enum):
- llama3_405b_reward = "llama3_405b_reward"
diff --git a/source/openapi.html b/source/openapi.html
deleted file mode 100644
index a8b758635..000000000
--- a/source/openapi.html
+++ /dev/null
@@ -1,3597 +0,0 @@
-
-
-
-
-
-
- OpenAPI specification
-
-
-
-
-
-
-
-
-
-
-
diff --git a/source/openapi.yaml b/source/openapi.yaml
deleted file mode 100644
index 418cbb1b4..000000000
--- a/source/openapi.yaml
+++ /dev/null
@@ -1,2249 +0,0 @@
-components:
- responses: {}
- schemas:
- AgenticSystemCreateRequest:
- additionalProperties: false
- properties:
- available_tools:
- items:
- additionalProperties: false
- properties:
- input_shields:
- items:
- $ref: '#/components/schemas/ShieldConfig'
- type: array
- output_shields:
- items:
- $ref: '#/components/schemas/ShieldConfig'
- type: array
- parameters:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- oneOf:
- - enum:
- - web_search
- - math
- - image_gen
- - code_interpreter
- type: string
- - type: string
- required:
- - tool_name
- - input_shields
- - output_shields
- type: object
- type: array
- executable_tools:
- items:
- type: string
- type: array
- uniqueItems: true
- input_shields:
- items:
- $ref: '#/components/schemas/ShieldConfig'
- type: array
- instructions:
- type: string
- memory_bank_uuids:
- items:
- type: string
- type: array
- model:
- enum:
- - llama3_8b_chat
- - llama3_70b_chat
- type: string
- output_shields:
- items:
- $ref: '#/components/schemas/ShieldConfig'
- type: array
- uuid:
- type: string
- required:
- - uuid
- - instructions
- - model
- - available_tools
- - executable_tools
- - memory_bank_uuids
- - input_shields
- - output_shields
- type: object
- AgenticSystemCreateResponse:
- additionalProperties: false
- properties:
- agent_uuid:
- type: string
- required:
- - agent_uuid
- type: object
- AgenticSystemExecuteRequest:
- additionalProperties: false
- properties:
- agent_uuid:
- type: string
- messages:
- items:
- $ref: '#/components/schemas/Message'
- type: array
- stream:
- default: false
- type: boolean
- turn_history:
- items:
- $ref: '#/components/schemas/AgenticSystemTurn'
- type: array
- required:
- - agent_uuid
- - messages
- - turn_history
- - stream
- type: object
- AgenticSystemExecuteResponse:
- additionalProperties: false
- properties:
- turn:
- $ref: '#/components/schemas/AgenticSystemTurn'
- required:
- - turn
- title: non-stream response from the agentic system.
- type: object
- AgenticSystemExecuteResponseStreamChunk:
- additionalProperties: false
- properties:
- event_type:
- enum:
- - step_start
- - step_end
- - step_progress
- title: The type of event.
- type: string
- response_text_delta:
- type: string
- retrieved_document:
- additionalProperties: false
- properties:
- content:
- contentEncoding: base64
- type: string
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- mime_type:
- type: string
- uuid:
- type: string
- required:
- - uuid
- - content
- - metadata
- - mime_type
- type: object
- step_type:
- enum:
- - model_inference
- - tool_execution
- - safety_filtering
- - memory_retrieval
- title: The type of execution step.
- type: string
- step_uuid:
- type: string
- stop_reason:
- enum:
- - not_stopped
- - finished_ok
- - max_tokens
- title: Stop reasons are used to indicate why the model stopped generating
- text.
- type: string
- tool_call:
- additionalProperties: false
- properties:
- arguments:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- type: string
- required:
- - tool_name
- - arguments
- title: A tool call is a request to a tool.
- type: object
- tool_response_delta:
- additionalProperties: false
- properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- tool_name:
- type: string
- required:
- - tool_name
- - content
- type: object
- violation:
- additionalProperties: false
- properties:
- details:
- type: string
- suggested_user_response:
- type: string
- violation_type:
- type: string
- required:
- - violation_type
- - details
- type: object
- required:
- - event_type
- - step_uuid
- - step_type
- title: Streamed agent execution response.
- type: object
- AgenticSystemTurn:
- additionalProperties: false
- properties:
- response_message:
- $ref: '#/components/schemas/Message'
- steps:
- items:
- oneOf:
- - additionalProperties: false
- properties:
- logprobs:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- step_type:
- default: model_inference
- enum:
- - model_inference
- - tool_execution
- - safety_filtering
- - memory_retrieval
- title: The type of execution step.
- type: string
- text:
- type: string
- uuid:
- type: string
- required:
- - step_type
- - uuid
- - text
- type: object
- - additionalProperties: false
- properties:
- step_type:
- default: tool_execution
- enum:
- - model_inference
- - tool_execution
- - safety_filtering
- - memory_retrieval
- title: The type of execution step.
- type: string
- tool_calls:
- items:
- additionalProperties: false
- properties:
- arguments:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- type: string
- required:
- - tool_name
- - arguments
- title: A tool call is a request to a tool.
- type: object
- type: array
- tool_responses:
- items:
- additionalProperties: false
- properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- tool_name:
- type: string
- required:
- - tool_name
- - content
- type: object
- type: array
- uuid:
- type: string
- required:
- - step_type
- - uuid
- - tool_calls
- - tool_responses
- type: object
- - additionalProperties: false
- properties:
- step_type:
- default: safety_filtering
- enum:
- - model_inference
- - tool_execution
- - safety_filtering
- - memory_retrieval
- title: The type of execution step.
- type: string
- uuid:
- type: string
- violation:
- additionalProperties: false
- properties:
- details:
- type: string
- suggested_user_response:
- type: string
- violation_type:
- type: string
- required:
- - violation_type
- - details
- type: object
- required:
- - step_type
- - uuid
- type: object
- - additionalProperties: false
- properties:
- documents:
- items:
- additionalProperties: false
- properties:
- content:
- contentEncoding: base64
- type: string
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- mime_type:
- type: string
- uuid:
- type: string
- required:
- - uuid
- - content
- - metadata
- - mime_type
- type: object
- type: array
- scores:
- items:
- type: number
- type: array
- step_type:
- default: memory_retrieval
- enum:
- - model_inference
- - tool_execution
- - safety_filtering
- - memory_retrieval
- title: The type of execution step.
- type: string
- uuid:
- type: string
- required:
- - step_type
- - uuid
- - documents
- - scores
- type: object
- type: array
- user_messages:
- items:
- $ref: '#/components/schemas/Message'
- type: array
- required:
- - user_messages
- - steps
- - response_message
- title: A single turn in an interaction with an Agentic System.
- type: object
- Attachment:
- additionalProperties: false
- properties:
- mime_type:
- type: string
- url:
- $ref: '#/components/schemas/URL'
- required:
- - url
- - mime_type
- title: Attachments are used to refer to external resources, such as images,
- videos, audio, etc.
- type: object
- BatchChatCompletionRequest:
- additionalProperties: false
- properties:
- available_tools:
- items:
- additionalProperties: false
- properties:
- input_shields:
- items:
- $ref: '#/components/schemas/ShieldConfig'
- type: array
- output_shields:
- items:
- $ref: '#/components/schemas/ShieldConfig'
- type: array
- parameters:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- oneOf:
- - enum:
- - web_search
- - math
- - image_gen
- - code_interpreter
- type: string
- - type: string
- required:
- - tool_name
- - input_shields
- - output_shields
- type: object
- type: array
- batch_dialogs:
- items:
- $ref: '#/components/schemas/Dialog'
- type: array
- logprobs:
- default: false
- type: boolean
- max_tokens:
- default: 0
- type: integer
- model:
- enum:
- - llama3_8b_chat
- - llama3_70b_chat
- type: string
- sampling_params:
- additionalProperties: false
- properties:
- strategy:
- default: greedy
- type: string
- temperature:
- default: 0.0
- type: number
- top_k:
- default: 0
- type: integer
- top_p:
- default: 0.95
- type: number
- required:
- - temperature
- - strategy
- - top_p
- - top_k
- type: object
- required:
- - model
- - batch_dialogs
- - sampling_params
- - available_tools
- - max_tokens
- - logprobs
- type: object
- BatchCompletionRequest:
- additionalProperties: false
- properties:
- content_batch:
- items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- type: array
- logprobs:
- default: false
- type: boolean
- max_tokens:
- default: 0
- type: integer
- model:
- enum:
- - llama3_8b
- - llama3_70b
- type: string
- sampling_params:
- additionalProperties: false
- properties:
- strategy:
- default: greedy
- type: string
- temperature:
- default: 0.0
- type: number
- top_k:
- default: 0
- type: integer
- top_p:
- default: 0.95
- type: number
- required:
- - temperature
- - strategy
- - top_p
- - top_k
- type: object
- required:
- - model
- - content_batch
- - sampling_params
- - max_tokens
- - logprobs
- type: object
- ChatCompletionRequest:
- additionalProperties: false
- properties:
- available_tools:
- items:
- additionalProperties: false
- properties:
- input_shields:
- items:
- $ref: '#/components/schemas/ShieldConfig'
- type: array
- output_shields:
- items:
- $ref: '#/components/schemas/ShieldConfig'
- type: array
- parameters:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- oneOf:
- - enum:
- - web_search
- - math
- - image_gen
- - code_interpreter
- type: string
- - type: string
- required:
- - tool_name
- - input_shields
- - output_shields
- type: object
- type: array
- dialog:
- $ref: '#/components/schemas/Dialog'
- logprobs:
- default: false
- type: boolean
- max_tokens:
- default: 0
- type: integer
- model:
- enum:
- - llama3_8b_chat
- - llama3_70b_chat
- type: string
- sampling_params:
- additionalProperties: false
- properties:
- strategy:
- default: greedy
- type: string
- temperature:
- default: 0.0
- type: number
- top_k:
- default: 0
- type: integer
- top_p:
- default: 0.95
- type: number
- required:
- - temperature
- - strategy
- - top_p
- - top_k
- type: object
- stream:
- default: false
- type: boolean
- required:
- - model
- - dialog
- - sampling_params
- - available_tools
- - max_tokens
- - stream
- - logprobs
- type: object
- ChatCompletionResponse:
- additionalProperties: false
- properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- logprobs:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- stop_reason:
- enum:
- - not_stopped
- - finished_ok
- - max_tokens
- title: Stop reasons are used to indicate why the model stopped generating
- text.
- type: string
- tool_calls:
- items:
- additionalProperties: false
- properties:
- arguments:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- type: string
- required:
- - tool_name
- - arguments
- title: A tool call is a request to a tool.
- type: object
- type: array
- required:
- - content
- - tool_calls
- title: Normal chat completion response.
- type: object
- ChatCompletionResponseStreamChunk:
- additionalProperties: false
- properties:
- stop_reason:
- enum:
- - not_stopped
- - finished_ok
- - max_tokens
- title: Stop reasons are used to indicate why the model stopped generating
- text.
- type: string
- text_delta:
- type: string
- tool_call:
- additionalProperties: false
- properties:
- arguments:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- type: string
- required:
- - tool_name
- - arguments
- title: A tool call is a request to a tool.
- type: object
- required:
- - text_delta
- title: Streamed chat completion response. The actual response is a series of
- such objects.
- type: object
- CompletionRequest:
- additionalProperties: false
- properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- logprobs:
- default: false
- type: boolean
- max_tokens:
- default: 0
- type: integer
- model:
- enum:
- - llama3_8b
- - llama3_70b
- type: string
- sampling_params:
- additionalProperties: false
- properties:
- strategy:
- default: greedy
- type: string
- temperature:
- default: 0.0
- type: number
- top_k:
- default: 0
- type: integer
- top_p:
- default: 0.95
- type: number
- required:
- - temperature
- - strategy
- - top_p
- - top_k
- type: object
- stream:
- default: false
- type: boolean
- required:
- - content
- - model
- - sampling_params
- - max_tokens
- - stream
- - logprobs
- type: object
- CompletionResponse:
- additionalProperties: false
- properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- logprobs:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- stop_reason:
- enum:
- - not_stopped
- - finished_ok
- - max_tokens
- title: Stop reasons are used to indicate why the model stopped generating
- text.
- type: string
- required:
- - content
- title: Normal completion response.
- type: object
- CompletionResponseStreamChunk:
- additionalProperties: false
- properties:
- logprobs:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- stop_reason:
- enum:
- - not_stopped
- - finished_ok
- - max_tokens
- title: Stop reasons are used to indicate why the model stopped generating
- text.
- type: string
- text_delta:
- type: string
- required:
- - text_delta
- title: streamed completion response.
- type: object
- CreateDatasetRequest:
- additionalProperties: false
- properties:
- dataset:
- $ref: '#/components/schemas/Dataset'
- uuid:
- type: string
- required:
- - uuid
- - dataset
- title: Request to create a dataset.
- type: object
- DPOAlignmentConfig:
- additionalProperties: false
- properties:
- epsilon:
- type: number
- gamma:
- type: number
- reward_clip:
- type: number
- reward_scale:
- type: number
- required:
- - reward_scale
- - reward_clip
- - epsilon
- - gamma
- type: object
- Dataset:
- additionalProperties: false
- properties:
- columns:
- additionalProperties:
- enum:
- - dialog
- - text
- - media
- - number
- - json
- type: string
- type: object
- content_url:
- $ref: '#/components/schemas/URL'
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- required:
- - columns
- - content_url
- - metadata
- title: Dataset to be used for training or evaluating language models.
- type: object
- Dialog:
- additionalProperties: false
- properties:
- message:
- $ref: '#/components/schemas/Message'
- message_history:
- items:
- $ref: '#/components/schemas/Message'
- type: array
- required:
- - message
- - message_history
- type: object
- DoraFinetuningConfig:
- additionalProperties: false
- properties:
- alpha:
- type: integer
- apply_lora_to_mlp:
- type: boolean
- apply_lora_to_output:
- type: boolean
- lora_attn_modules:
- items:
- type: string
- type: array
- rank:
- type: integer
- required:
- - lora_attn_modules
- - apply_lora_to_mlp
- - apply_lora_to_output
- - rank
- - alpha
- type: object
- KScoredPromptGenerations:
- additionalProperties: false
- properties:
- k_scored_generations:
- items:
- $ref: '#/components/schemas/ScoredMessage'
- type: array
- prompt:
- $ref: '#/components/schemas/Message'
- required:
- - prompt
- - k_scored_generations
- type: object
- LoraFinetuningConfig:
- additionalProperties: false
- properties:
- alpha:
- type: integer
- apply_lora_to_mlp:
- type: boolean
- apply_lora_to_output:
- type: boolean
- lora_attn_modules:
- items:
- type: string
- type: array
- rank:
- type: integer
- required:
- - lora_attn_modules
- - apply_lora_to_mlp
- - apply_lora_to_output
- - rank
- - alpha
- type: object
- MemoryBank:
- additionalProperties: false
- properties:
- name:
- type: string
- uuid:
- type: string
- required:
- - uuid
- - name
- type: object
- MemoryBankDocument:
- additionalProperties: false
- properties:
- content:
- contentEncoding: base64
- type: string
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- mime_type:
- type: string
- uuid:
- type: string
- required:
- - uuid
- - content
- - metadata
- - mime_type
- type: object
- Message:
- additionalProperties: false
- properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- role:
- enum:
- - system
- - user
- - assistant
- - tool
- type: string
- tool_calls:
- items:
- additionalProperties: false
- properties:
- arguments:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- tool_name:
- type: string
- required:
- - tool_name
- - arguments
- title: A tool call is a request to a tool.
- type: object
- type: array
- tool_responses:
- items:
- additionalProperties: false
- properties:
- content:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- - items:
- oneOf:
- - type: string
- - $ref: '#/components/schemas/Attachment'
- type: array
- tool_name:
- type: string
- required:
- - tool_name
- - content
- type: object
- type: array
- required:
- - role
- - content
- - tool_calls
- - tool_responses
- type: object
- OptimizerConfig:
- additionalProperties: false
- properties:
- lr:
- type: number
- lr_min:
- type: number
- optimizer_type:
- enum:
- - adam
- - adamw
- - sgd
- type: string
- weight_decay:
- type: number
- required:
- - optimizer_type
- - lr
- - lr_min
- - weight_decay
- type: object
- PostTrainingJobArtifactsResponse:
- additionalProperties: false
- properties:
- checkpoints:
- items:
- additionalProperties: false
- properties:
- iters:
- type: integer
- path:
- $ref: '#/components/schemas/URL'
- required:
- - iters
- - path
- type: object
- type: array
- job_uuid:
- type: string
- required:
- - job_uuid
- - checkpoints
- title: Artifacts of a finetuning job.
- type: object
- PostTrainingJobLogStream:
- additionalProperties: false
- properties:
- job_uuid:
- type: string
- log_lines:
- items:
- type: string
- type: array
- required:
- - job_uuid
- - log_lines
- title: Stream of logs from a finetuning job.
- type: object
- PostTrainingJobStatusResponse:
- additionalProperties: false
- properties:
- checkpoints:
- items:
- additionalProperties: false
- properties:
- iters:
- type: integer
- path:
- $ref: '#/components/schemas/URL'
- required:
- - iters
- - path
- type: object
- type: array
- completed_at:
- format: date-time
- type: string
- job_uuid:
- type: string
- resources_allocated:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- scheduled_at:
- format: date-time
- type: string
- started_at:
- format: date-time
- type: string
- status:
- enum:
- - running
- - completed
- - failed
- - scheduled
- type: string
- required:
- - job_uuid
- - status
- - checkpoints
- title: Status of a finetuning job.
- type: object
- PostTrainingRLHFRequest:
- additionalProperties: false
- properties:
- algorithm:
- enum:
- - dpo
- type: string
- algorithm_config:
- $ref: '#/components/schemas/DPOAlignmentConfig'
- dataset:
- $ref: '#/components/schemas/Dataset'
- finetuned_model:
- $ref: '#/components/schemas/URL'
- hyperparam_search_config:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- job_uuid:
- type: string
- logger_config:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- optimizer_config:
- $ref: '#/components/schemas/OptimizerConfig'
- training_config:
- $ref: '#/components/schemas/TrainingConfig'
- validation_dataset:
- $ref: '#/components/schemas/Dataset'
- required:
- - job_uuid
- - finetuned_model
- - dataset
- - validation_dataset
- - algorithm
- - algorithm_config
- - optimizer_config
- - training_config
- - hyperparam_search_config
- - logger_config
- title: Request to finetune a model.
- type: object
- PostTrainingSFTRequest:
- additionalProperties: false
- properties:
- algorithm:
- enum:
- - full
- - lora
- - qlora
- - dora
- type: string
- algorithm_config:
- oneOf:
- - $ref: '#/components/schemas/LoraFinetuningConfig'
- - $ref: '#/components/schemas/QLoraFinetuningConfig'
- - $ref: '#/components/schemas/DoraFinetuningConfig'
- dataset:
- $ref: '#/components/schemas/Dataset'
- hyperparam_search_config:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- job_uuid:
- type: string
- logger_config:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- model:
- enum:
- - llama3_8b
- - llama3_70b
- type: string
- optimizer_config:
- $ref: '#/components/schemas/OptimizerConfig'
- training_config:
- $ref: '#/components/schemas/TrainingConfig'
- validation_dataset:
- $ref: '#/components/schemas/Dataset'
- required:
- - job_uuid
- - model
- - dataset
- - validation_dataset
- - algorithm
- - algorithm_config
- - optimizer_config
- - training_config
- - hyperparam_search_config
- - logger_config
- title: Request to finetune a model.
- type: object
- QLoraFinetuningConfig:
- additionalProperties: false
- properties:
- alpha:
- type: integer
- apply_lora_to_mlp:
- type: boolean
- apply_lora_to_output:
- type: boolean
- lora_attn_modules:
- items:
- type: string
- type: array
- rank:
- type: integer
- required:
- - lora_attn_modules
- - apply_lora_to_mlp
- - apply_lora_to_output
- - rank
- - alpha
- type: object
- RewardScoringRequest:
- additionalProperties: false
- properties:
- model:
- enum:
- - llama3_405b_reward
- type: string
- prompt_generations:
- items:
- additionalProperties: false
- properties:
- dialog:
- $ref: '#/components/schemas/Dialog'
- k_generations:
- items:
- $ref: '#/components/schemas/Message'
- type: array
- required:
- - dialog
- - k_generations
- type: object
- type: array
- required:
- - prompt_generations
- - model
- title: Request to score a reward function. A list of prompts and a list of responses
- per prompt.
- type: object
- RewardScoringResponse:
- additionalProperties: false
- properties:
- scored_generations:
- items:
- $ref: '#/components/schemas/KScoredPromptGenerations'
- type: array
- required:
- - scored_generations
- title: Response from the reward scoring. Batch of (prompt, response, score)
- tuples that pass the threshold.
- type: object
- ScoredMessage:
- additionalProperties: false
- properties:
- message:
- $ref: '#/components/schemas/Message'
- score:
- type: number
- required:
- - message
- - score
- type: object
- ShieldConfig:
- additionalProperties: false
- properties:
- params:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- shield_type:
- enum:
- - llama_guard
- - prompt_guard
- - code_guard
- title: The type of safety shield.
- type: string
- required:
- - shield_type
- - params
- type: object
- SyntheticDataGenerationRequest:
- additionalProperties: false
- properties:
- filtering_function:
- default: none
- enum:
- - none
- - random
- - top_k
- - top_p
- - top_k_top_p
- - sigmoid
- title: The type of filtering function.
- type: string
- prompts:
- items:
- $ref: '#/components/schemas/Message'
- type: array
- reward_scoring:
- type: object
- required:
- - prompts
- - filtering_function
- title: Request to generate synthetic data. A small batch of prompts and a filtering
- function
- type: object
- SyntheticDataGenerationResponse:
- additionalProperties: false
- properties:
- statistics:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- synthetic_data:
- items:
- $ref: '#/components/schemas/KScoredPromptGenerations'
- type: array
- required:
- - synthetic_data
- title: Response from the synthetic data generation. Batch of (prompt, response,
- score) tuples that pass the threshold.
- type: object
- TrainingConfig:
- additionalProperties: false
- properties:
- batch_size:
- type: integer
- enable_activation_checkpointing:
- type: boolean
- fsdp_cpu_offload:
- type: boolean
- memory_efficient_fsdp_wrap:
- type: boolean
- n_epochs:
- type: integer
- n_iters:
- type: integer
- shuffle:
- type: boolean
- required:
- - n_epochs
- - batch_size
- - shuffle
- - n_iters
- - enable_activation_checkpointing
- - memory_efficient_fsdp_wrap
- - fsdp_cpu_offload
- type: object
- URL:
- format: uri
- pattern: ^(https?://|file://|data:)
- type: string
-info:
- description: "This is the specification of the llama stack that provides \n \
- \ a set of endpoints and their corresponding interfaces that are tailored\
- \ to \n best leverage Llama Models. The specification is still\
- \ in draft and subject to change."
- title: '[DRAFT] Llama Stack Specification'
- version: 0.0.1
-jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
-openapi: 3.1.0
-paths:
- /agentic_system/create:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/AgenticSystemCreateRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/AgenticSystemCreateResponse'
- description: OK
- tags:
- - AgenticSystem
- /agentic_system/delete:
- delete:
- parameters:
- - in: query
- name: agent_id
- required: true
- schema:
- type: string
- responses:
- '200':
- description: OK
- tags:
- - AgenticSystem
- /agentic_system/execute:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/AgenticSystemExecuteRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- oneOf:
- - $ref: '#/components/schemas/AgenticSystemExecuteResponse'
- - $ref: '#/components/schemas/AgenticSystemExecuteResponseStreamChunk'
- description: non-stream response from the agentic system. **OR** Streamed
- agent execution response.
- tags:
- - AgenticSystem
- /datasets/create:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/CreateDatasetRequest'
- required: true
- responses:
- '200':
- description: OK
- tags:
- - Datasets
- /datasets/delete:
- delete:
- parameters:
- - in: query
- name: dataset_id
- required: true
- schema:
- type: string
- responses:
- '200':
- description: OK
- tags:
- - Datasets
- /datasets/get:
- get:
- parameters:
- - in: query
- name: dataset_id
- required: true
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/Dataset'
- description: OK
- tags:
- - Datasets
- /inference/batch_chat_completion:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/BatchChatCompletionRequest'
- required: true
- responses:
- '200':
- content:
- application/jsonl:
- schema:
- $ref: '#/components/schemas/ChatCompletionResponse'
- description: OK
- tags:
- - Inference
- /inference/batch_completion:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/BatchCompletionRequest'
- required: true
- responses:
- '200':
- content:
- application/jsonl:
- schema:
- $ref: '#/components/schemas/CompletionResponse'
- description: OK
- tags:
- - Inference
- /inference/chat_completion:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/ChatCompletionRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- oneOf:
- - $ref: '#/components/schemas/ChatCompletionResponse'
- - $ref: '#/components/schemas/ChatCompletionResponseStreamChunk'
- description: Normal chat completion response. **OR** Streamed chat completion
- response. The actual response is a series of such objects.
- tags:
- - Inference
- /inference/completion:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/CompletionRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- oneOf:
- - $ref: '#/components/schemas/CompletionResponse'
- - $ref: '#/components/schemas/CompletionResponseStreamChunk'
- description: Normal completion response. **OR** streamed completion response.
- tags:
- - Inference
- /memory_bank/delete:
- post:
- parameters:
- - in: query
- name: bank_uuid
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- items:
- type: string
- type: array
- required: true
- responses:
- '200':
- content:
- application/jsonl:
- schema:
- type: string
- description: OK
- tags:
- - MemoryBanks
- /memory_bank/get:
- post:
- parameters:
- - in: query
- name: bank_uuid
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- items:
- type: string
- type: array
- required: true
- responses:
- '200':
- content:
- application/jsonl:
- schema:
- $ref: '#/components/schemas/MemoryBankDocument'
- description: OK
- tags:
- - MemoryBanks
- /memory_bank/insert:
- post:
- parameters:
- - in: query
- name: bank_uuid
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- items:
- additionalProperties: false
- properties:
- content:
- contentEncoding: base64
- type: string
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- mime_type:
- type: string
- uuid:
- type: string
- required:
- - uuid
- - content
- - metadata
- - mime_type
- type: object
- type: array
- required: true
- responses:
- '200':
- description: OK
- tags:
- - MemoryBanks
- /memory_bank/update:
- post:
- parameters:
- - in: query
- name: bank_uuid
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- items:
- additionalProperties: false
- properties:
- content:
- contentEncoding: base64
- type: string
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- mime_type:
- type: string
- uuid:
- type: string
- required:
- - uuid
- - content
- - metadata
- - mime_type
- type: object
- type: array
- required: true
- responses:
- '200':
- description: OK
- tags:
- - MemoryBanks
- /memory_banks/create:
- post:
- parameters:
- - in: query
- name: bank_uuid
- required: true
- schema:
- type: string
- - in: query
- name: bank_name
- required: true
- schema:
- type: string
- requestBody:
- content:
- application/json:
- schema:
- items:
- additionalProperties: false
- properties:
- content:
- contentEncoding: base64
- type: string
- metadata:
- additionalProperties:
- oneOf:
- - type: 'null'
- - type: boolean
- - type: number
- - type: string
- - type: array
- - type: object
- type: object
- mime_type:
- type: string
- uuid:
- type: string
- required:
- - uuid
- - content
- - metadata
- - mime_type
- type: object
- type: array
- required: true
- responses:
- '200':
- description: OK
- tags:
- - MemoryBanks
- /memory_banks/drop:
- delete:
- parameters:
- - in: query
- name: bank_uuid
- required: true
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- type: string
- description: OK
- tags:
- - MemoryBanks
- /memory_banks/get:
- get:
- parameters: []
- responses:
- '200':
- content:
- application/jsonl:
- schema:
- $ref: '#/components/schemas/MemoryBank'
- description: OK
- tags:
- - MemoryBanks
- /post_training/job/artifacts:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/PostTrainingJobArtifactsResponse'
- description: OK
- tags:
- - PostTraining
- /post_training/job/logs:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/PostTrainingJobLogStream'
- description: OK
- tags:
- - PostTraining
- /post_training/job/status:
- get:
- parameters:
- - in: query
- name: job_uuid
- required: true
- schema:
- type: string
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/PostTrainingJobStatusResponse'
- description: OK
- tags:
- - PostTraining
- /post_training/preference_optimize/:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/PostTrainingRLHFRequest'
- required: true
- responses:
- '200':
- description: OK
- tags:
- - PostTraining
- /post_training/supervised_fine_tune/:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/PostTrainingSFTRequest'
- required: true
- responses:
- '200':
- description: OK
- tags:
- - PostTraining
- /reward_scoring/score:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/RewardScoringRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/RewardScoringResponse'
- description: OK
- tags:
- - RewardScoring
- /synthetic_data_generation/generate:
- post:
- parameters: []
- requestBody:
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/SyntheticDataGenerationRequest'
- required: true
- responses:
- '200':
- content:
- application/json:
- schema:
- $ref: '#/components/schemas/SyntheticDataGenerationResponse'
- description: OK
- tags:
- - SyntheticDataGeneration
-security:
-- Default: []
-servers:
-- url: http://any-hosted-llama-stack.com
-tags:
-- name: Datasets
-- name: SyntheticDataGeneration
-- name: Inference
-- name: MemoryBanks
-- name: RewardScoring
-- name: AgenticSystem
-- name: PostTraining
-- description:
- name: ShieldConfig
-- description:
- name: AgenticSystemCreateRequest
-- description:
- name: AgenticSystemCreateResponse
-- description:
- name: AgenticSystemExecuteRequest
-- description: 'A single turn in an interaction with an Agentic System.
-
-
- '
- name: AgenticSystemTurn
-- description: 'Attachments are used to refer to external resources, such as images,
- videos, audio, etc.
-
-
- '
- name: Attachment
-- description:
- name: Message
-- description:
- name: URL
-- description: 'non-stream response from the agentic system.
-
-
- '
- name: AgenticSystemExecuteResponse
-- description: 'Streamed agent execution response.
-
-
- '
- name: AgenticSystemExecuteResponseStreamChunk
-- description: 'Request to create a dataset.
-
-
- '
- name: CreateDatasetRequest
-- description: 'Dataset to be used for training or evaluating language models.
-
-
- '
- name: Dataset
-- description:
- name: MemoryBank
-- description:
- name: MemoryBankDocument
-- description: 'Artifacts of a finetuning job.
-
-
- '
- name: PostTrainingJobArtifactsResponse
-- description: 'Status of a finetuning job.
-
-
- '
- name: PostTrainingJobStatusResponse
-- description: 'Stream of logs from a finetuning job.
-
-
- '
- name: PostTrainingJobLogStream
-- description:
- name: BatchChatCompletionRequest
-- description:
- name: Dialog
-- description: 'Normal chat completion response.
-
-
- '
- name: ChatCompletionResponse
-- description:
- name: BatchCompletionRequest
-- description: 'Normal completion response.
-
-
- '
- name: CompletionResponse
-- description:
- name: ChatCompletionRequest
-- description: 'Streamed chat completion response. The actual response is a series
- of such objects.
-
-
- '
- name: ChatCompletionResponseStreamChunk
-- description:
- name: CompletionRequest
-- description: 'streamed completion response.
-
-
- '
- name: CompletionResponseStreamChunk
-- description: 'Request to generate synthetic data. A small batch of prompts and a
- filtering function
-
-
- '
- name: SyntheticDataGenerationRequest
-- description:
- name: KScoredPromptGenerations
-- description:
- name: ScoredMessage
-- description: 'Response from the synthetic data generation. Batch of (prompt, response,
- score) tuples that pass the threshold.
-
-
- '
- name: SyntheticDataGenerationResponse
-- description:
- name: DPOAlignmentConfig
-- description:
- name: OptimizerConfig
-- description: 'Request to finetune a model.
-
-
- '
- name: PostTrainingRLHFRequest
-- description:
- name: TrainingConfig
-- description: 'Request to score a reward function. A list of prompts and a list of
- responses per prompt.
-
-
- '
- name: RewardScoringRequest
-- description: 'Response from the reward scoring. Batch of (prompt, response, score)
- tuples that pass the threshold.
-
-
- '
- name: RewardScoringResponse
-- description:
- name: DoraFinetuningConfig
-- description:
- name: LoraFinetuningConfig
-- description: 'Request to finetune a model.
-
-
- '
- name: PostTrainingSFTRequest
-- description:
- name: QLoraFinetuningConfig
-x-tagGroups:
-- name: Operations
- tags:
- - AgenticSystem
- - Datasets
- - Inference
- - MemoryBanks
- - PostTraining
- - RewardScoring
- - SyntheticDataGeneration
-- name: Types
- tags:
- - AgenticSystemCreateRequest
- - AgenticSystemCreateResponse
- - AgenticSystemExecuteRequest
- - AgenticSystemExecuteResponse
- - AgenticSystemExecuteResponseStreamChunk
- - AgenticSystemTurn
- - Attachment
- - BatchChatCompletionRequest
- - BatchCompletionRequest
- - ChatCompletionRequest
- - ChatCompletionResponse
- - ChatCompletionResponseStreamChunk
- - CompletionRequest
- - CompletionResponse
- - CompletionResponseStreamChunk
- - CreateDatasetRequest
- - DPOAlignmentConfig
- - Dataset
- - Dialog
- - DoraFinetuningConfig
- - KScoredPromptGenerations
- - LoraFinetuningConfig
- - MemoryBank
- - MemoryBankDocument
- - Message
- - OptimizerConfig
- - PostTrainingJobArtifactsResponse
- - PostTrainingJobLogStream
- - PostTrainingJobStatusResponse
- - PostTrainingRLHFRequest
- - PostTrainingSFTRequest
- - QLoraFinetuningConfig
- - RewardScoringRequest
- - RewardScoringResponse
- - ScoredMessage
- - ShieldConfig
- - SyntheticDataGenerationRequest
- - SyntheticDataGenerationResponse
- - TrainingConfig
- - URL
diff --git a/source/post_training_types.py b/source/post_training_types.py
deleted file mode 100644
index f67fce4d8..000000000
--- a/source/post_training_types.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, Dict, List, Optional, Set, Union
-
-from model_types import Message, URL
-
-from strong_typing.schema import json_schema_type
-
-
-class DatasetColumnType(Enum):
- dialog = "dialog"
- text = "text"
- media = "media"
- number = "number"
- json = "json"
-
-
-@json_schema_type
-@dataclass
-class Dataset:
- """Dataset to be used for training or evaluating language models."""
-
- # TODO(ashwin): figure out if we need to add an enum for a "dataset type"
-
- columns: Dict[str, DatasetColumnType]
- content_url: URL
- metadata: Dict[str, Any] = field(default_factory=dict)
-
-
-class OptimizerType(Enum):
- adam = "adam"
- adamw = "adamw"
- sgd = "sgd"
-
-
-@json_schema_type
-@dataclass
-class OptimizerConfig:
- optimizer_type: OptimizerType
- lr: float
- lr_min: float
- weight_decay: float
-
-
-@json_schema_type
-@dataclass
-class TrainingConfig:
- n_epochs: int
- batch_size: int
- shuffle: bool
- n_iters: int
-
- enable_activation_checkpointing: bool
- memory_efficient_fsdp_wrap: bool
- fsdp_cpu_offload: bool
-
-
-class FinetuningAlgorithm(Enum):
- full = "full"
- lora = "lora"
- qlora = "qlora"
- dora = "dora"
-
-
-@json_schema_type
-@dataclass
-class LoraFinetuningConfig:
- lora_attn_modules: List[str]
- apply_lora_to_mlp: bool
- apply_lora_to_output: bool
- rank: int
- alpha: int
-
-
-@json_schema_type
-@dataclass
-class QLoraFinetuningConfig(LoraFinetuningConfig):
- pass
-
-
-@json_schema_type
-@dataclass
-class DoraFinetuningConfig(LoraFinetuningConfig):
- pass
-
-
-@json_schema_type
-@dataclass
-class PostTrainingJobLogStream:
- """Stream of logs from a finetuning job."""
-
- job_uuid: str
- log_lines: List[str]
-
-
-class PostTrainingJobStatus(Enum):
- running = "running"
- completed = "completed"
- failed = "failed"
- scheduled = "scheduled"
-
-
-@dataclass
-class Checkpoint:
- iters: int
- path: URL
-
-
-class RLHFAlgorithm(Enum):
- dpo = "dpo"
-
-
-@json_schema_type
-@dataclass
-class DPOAlignmentConfig:
- reward_scale: float
- reward_clip: float
- epsilon: float
- gamma: float
diff --git a/source/server.py b/source/server.py
deleted file mode 100644
index e69e7bb06..000000000
--- a/source/server.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from flask import Flask, request, jsonify
-from dataclasses import dataclass, field
-from typing import List, Set, Optional, Union, Protocol
-from enum import Enum
-
-app = Flask(__name__)
-
-from model_types import *
-from agentic_system_types import *
-from api_definitions import *
-
-class AgenticSystemImpl(AgenticSystem):
- def create_agentic_system(self, request: AgenticSystemCreateRequest) -> AgenticSystemCreateResponse:
- # Mock implementation
- return AgenticSystemCreateResponse(agent_id="12345")
-
- def create_agentic_system_execute(self, request: AgenticSystemExecuteRequest) -> Union[AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk]:
- # Mock implementation
- return AgenticSystemExecuteResponse(
- turn=AgenticSystemTurn(
- user_messages=[],
- steps=[],
- response_message=Message(
- role="assistant",
- content="Hello, I am an agent. I can help you with your tasks. What can I help you with?",
- )
- )
- )
-
-agentic_system = AgenticSystemImpl()
-
-@app.route("/agentic_system/create", methods=["POST"])
-def create_agentic_system():
- data = request.json
- create_request = AgenticSystemCreateRequest(**data)
- response = agentic_system.create_agentic_system(create_request)
- return jsonify(response)
-
-@app.route("/agentic_system/execute", methods=["POST"])
-def create_agentic_system_execute():
- data = request.json
- execute_request = AgenticSystemExecuteRequest(**data)
- response = agentic_system.create_agentic_system_execute(execute_request)
- return jsonify(response)
-
-if __name__ == "__main__":
- app.run(debug=True)