diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..e3d9f3f5c
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+json-strong-typing
+python-openapi
diff --git a/source/defn.py b/source/defn.py
new file mode 100644
index 000000000..48289fd0e
--- /dev/null
+++ b/source/defn.py
@@ -0,0 +1,226 @@
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Protocol, Union
+
+import yaml
+
+from pyopenapi import Info, Options, Server, Specification, webmethod
+from strong_typing.schema import json_schema_type
+
+
+@json_schema_type(
+ schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
+)
+@dataclass
+class URL:
+ url: str
+
+ def __str__(self) -> str:
+ return self.url
+
+
+@json_schema_type
+@dataclass
+class Attachment:
+ """
+ Attachments are used to refer to external resources, such as images, videos, audio, etc.
+
+ """
+
+ url: URL
+ mime_type: str
+
+
+Content = Union[
+ str,
+ Attachment,
+ List[Union[str, Attachment]],
+]
+
+
+class Role(Enum):
+ system = "system"
+ user = "user"
+ assistant = "assistant"
+ tool = "tool"
+
+
+class StopReason(Enum):
+ """
+ Stop reasons are used to indicate why the model stopped generating text.
+ """
+
+ not_stopped = "not_stopped"
+ finished_ok = "finished_ok"
+ max_tokens = "max_tokens"
+
+
+@dataclass
+class ToolCall:
+ """
+ A tool call is a request to a tool.
+ """
+
+ tool_name: str
+ arguments: Dict[str, Any]
+
+
+@dataclass
+class ToolResponse:
+ tool_name: str
+ response: str
+
+
+@dataclass
+class ToolDefinition:
+ tool_name: str
+ parameters: Dict[str, Any]
+
+
+@json_schema_type
+@dataclass
+class Message:
+ role: Role
+
+ # input to the model or output from the model
+ content: Content
+
+ # zero-shot tool definitions as input to the model
+ tool_definitions: List[ToolDefinition] = field(default_factory=list)
+
+ # output from the model
+ tool_calls: List[ToolCall] = field(default_factory=list)
+
+ # input to the model
+ tool_responses: List[ToolResponse] = field(default_factory=list)
+
+
+@json_schema_type
+@dataclass
+class CompletionResponse:
+ """Normal completion response."""
+ content: Content
+ stop_reason: StopReason
+ logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class StreamedCompletionResponse:
+ """streamed completion response."""
+ text_delta: str
+ stop_reason: StopReason
+ logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionResponse:
+ """Normal chat completion response."""
+
+ content: Content
+ stop_reason: StopReason
+ tool_calls: List[ToolCall] = field(default_factory=list)
+ logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class StreamedChatCompletionResponse:
+ """Streamed chat completion response."""
+
+ text_delta: str
+ stop_reason: StopReason
+ tool_call: Optional[ToolCall] = None
+
+
+@dataclass
+class SamplingParams:
+ temperature: float = 0.0
+ strategy: str = "greedy"
+ top_p: float = 0.95
+ top_k: int = 0
+
+
+class PretrainedModel(Enum):
+ llama3_8b = "llama3_8b"
+ llama3_70b = "llama3_70b"
+
+
+class InstructModel(Enum):
+ llama3_8b_chat = "llama3_8b_chat"
+ llama3_70b_chat = "llama3_70b_chat"
+
+
+@json_schema_type
+@dataclass
+class CompletionRequest:
+ content: Content
+ model: PretrainedModel = PretrainedModel.llama3_8b
+ sampling_params: SamplingParams = SamplingParams()
+ max_tokens: int = 0
+ stream: bool = False
+ logprobs: bool = False
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionRequest:
+ message: Message
+ message_history: List[Message] = None
+ model: InstructModel = InstructModel.llama3_8b_chat
+ sampling_params: SamplingParams = SamplingParams()
+ max_tokens: int = 0
+ stream: bool = False
+ logprobs: bool = False
+
+
+class Inference(Protocol):
+
+ def post_completion(
+ self,
+ request: CompletionRequest,
+ ) -> Union[CompletionResponse, StreamedCompletionResponse]: ...
+
+ def post_chat_completion(
+ self,
+ request: ChatCompletionRequest,
+ ) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ...
+
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemExecuteRequest:
+ message: Message
+ message_history: List[Message] = None
+ model: InstructModel = InstructModel.llama3_8b_chat
+ sampling_params: SamplingParams = SamplingParams()
+
+class AgenticSystem(Protocol):
+
+ @webmethod(route="/agentic/system/execute")
+ def create_agentic_system_execute(self,) -> str: ...
+
+
+class Endpoint(Inference, AgenticSystem): ...
+
+
+if __name__ == "__main__":
+ print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
+ spec = Specification(
+ Endpoint,
+ Options(
+ server=Server(url="http://llama.meta.com"),
+ info=Info(
+ title="Llama Stack specification",
+ version="0.1",
+ description="This is the llama stack",
+ ),
+ ),
+ )
+ with open("openapi.yaml", "w", encoding="utf-8") as fp:
+ yaml.dump(spec.get_json(), fp, allow_unicode=True)
+
+ with open("openapi.html", "w") as fp:
+ spec.write_html(fp, pretty_print=True)
diff --git a/source/openapi.html b/source/openapi.html
new file mode 100644
index 000000000..c4290cb7f
--- /dev/null
+++ b/source/openapi.html
@@ -0,0 +1,842 @@
+
+
+
+
+
+
+ OpenAPI specification
+
+
+
+
+
+
+
+
+
+
+
diff --git a/source/openapi.yaml b/source/openapi.yaml
new file mode 100644
index 000000000..4ec2c1e31
--- /dev/null
+++ b/source/openapi.yaml
@@ -0,0 +1,492 @@
+components:
+ responses: {}
+ schemas:
+ Attachment:
+ additionalProperties: false
+ properties:
+ mime_type:
+ type: string
+ url:
+ $ref: '#/components/schemas/URL'
+ required:
+ - url
+ - mime_type
+ title: Attachments are used to refer to external resources, such as images,
+ videos, audio, etc.
+ type: object
+ ChatCompletionRequest:
+ additionalProperties: false
+ properties:
+ logprobs:
+ default: false
+ type: boolean
+ max_tokens:
+ default: 0
+ type: integer
+ message:
+ $ref: '#/components/schemas/Message'
+ message_history:
+ items:
+ $ref: '#/components/schemas/Message'
+ type: array
+ model:
+ default: llama3_8b_chat
+ enum:
+ - llama3_8b_chat
+ - llama3_70b_chat
+ type: string
+ sampling_params:
+ additionalProperties: false
+ properties:
+ strategy:
+ default: greedy
+ type: string
+ temperature:
+ default: 0.0
+ type: number
+ top_k:
+ default: 0
+ type: integer
+ top_p:
+ default: 0.95
+ type: number
+ required:
+ - temperature
+ - strategy
+ - top_p
+ - top_k
+ type: object
+ stream:
+ default: false
+ type: boolean
+ required:
+ - message
+ - message_history
+ - model
+ - sampling_params
+ - max_tokens
+ - stream
+ - logprobs
+ type: object
+ ChatCompletionResponse:
+ additionalProperties: false
+ properties:
+ content:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/Attachment'
+ - items:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/Attachment'
+ type: array
+ logprobs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ stop_reason:
+ enum:
+ - not_stopped
+ - finished_ok
+ - max_tokens
+ title: Stop reasons are used to indicate why the model stopped generating
+ text.
+ type: string
+ tool_calls:
+ items:
+ additionalProperties: false
+ properties:
+ arguments:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ tool_name:
+ type: string
+ required:
+ - tool_name
+ - arguments
+ title: A tool call is a request to a tool.
+ type: object
+ type: array
+ required:
+ - content
+ - stop_reason
+ - tool_calls
+ title: Normal chat completion response.
+ type: object
+ CompletionRequest:
+ additionalProperties: false
+ properties:
+ content:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/Attachment'
+ - items:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/Attachment'
+ type: array
+ logprobs:
+ default: false
+ type: boolean
+ max_tokens:
+ default: 0
+ type: integer
+ model:
+ default: llama3_8b
+ enum:
+ - llama3_8b
+ - llama3_70b
+ type: string
+ sampling_params:
+ additionalProperties: false
+ properties:
+ strategy:
+ default: greedy
+ type: string
+ temperature:
+ default: 0.0
+ type: number
+ top_k:
+ default: 0
+ type: integer
+ top_p:
+ default: 0.95
+ type: number
+ required:
+ - temperature
+ - strategy
+ - top_p
+ - top_k
+ type: object
+ stream:
+ default: false
+ type: boolean
+ required:
+ - content
+ - model
+ - sampling_params
+ - max_tokens
+ - stream
+ - logprobs
+ type: object
+ CompletionResponse:
+ additionalProperties: false
+ properties:
+ content:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/Attachment'
+ - items:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/Attachment'
+ type: array
+ logprobs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ stop_reason:
+ enum:
+ - not_stopped
+ - finished_ok
+ - max_tokens
+ title: Stop reasons are used to indicate why the model stopped generating
+ text.
+ type: string
+ required:
+ - content
+ - stop_reason
+ title: Normal completion response.
+ type: object
+ Message:
+ additionalProperties: false
+ properties:
+ content:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/Attachment'
+ - items:
+ oneOf:
+ - type: string
+ - $ref: '#/components/schemas/Attachment'
+ type: array
+ role:
+ enum:
+ - system
+ - user
+ - assistant
+ - tool
+ type: string
+ tool_calls:
+ items:
+ additionalProperties: false
+ properties:
+ arguments:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ tool_name:
+ type: string
+ required:
+ - tool_name
+ - arguments
+ title: A tool call is a request to a tool.
+ type: object
+ type: array
+ tool_definitions:
+ items:
+ additionalProperties: false
+ properties:
+ parameters:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ tool_name:
+ type: string
+ required:
+ - tool_name
+ - parameters
+ type: object
+ type: array
+ tool_responses:
+ items:
+ additionalProperties: false
+ properties:
+ response:
+ type: string
+ tool_name:
+ type: string
+ required:
+ - tool_name
+ - response
+ type: object
+ type: array
+ required:
+ - role
+ - content
+ - tool_definitions
+ - tool_calls
+ - tool_responses
+ type: object
+ StreamedChatCompletionResponse:
+ additionalProperties: false
+ properties:
+ stop_reason:
+ enum:
+ - not_stopped
+ - finished_ok
+ - max_tokens
+ title: Stop reasons are used to indicate why the model stopped generating
+ text.
+ type: string
+ text_delta:
+ type: string
+ tool_call:
+ additionalProperties: false
+ properties:
+ arguments:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ tool_name:
+ type: string
+ required:
+ - tool_name
+ - arguments
+ title: A tool call is a request to a tool.
+ type: object
+ required:
+ - text_delta
+ - stop_reason
+ title: Streamed chat completion response.
+ type: object
+ StreamedCompletionResponse:
+ additionalProperties: false
+ properties:
+ logprobs:
+ additionalProperties:
+ oneOf:
+ - type: 'null'
+ - type: boolean
+ - type: number
+ - type: string
+ - type: array
+ - type: object
+ type: object
+ stop_reason:
+ enum:
+ - not_stopped
+ - finished_ok
+ - max_tokens
+ title: Stop reasons are used to indicate why the model stopped generating
+ text.
+ type: string
+ text_delta:
+ type: string
+ required:
+ - text_delta
+ - stop_reason
+ title: streamed completion response.
+ type: object
+ URL:
+ format: uri
+ pattern: ^(https?://|file://|data:)
+ type: string
+info:
+ description: This is the llama stack
+ title: Llama Stack specification
+ version: '0.1'
+jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
+openapi: 3.1.0
+paths:
+ /agentic/system/execute:
+ get:
+ parameters: []
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ type: string
+ description: OK
+ tags:
+ - AgenticSystem
+ /chat_completion:
+ post:
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/ChatCompletionRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/ChatCompletionResponse'
+ - $ref: '#/components/schemas/StreamedChatCompletionResponse'
+ description: Normal chat completion response. **OR** Streamed chat completion
+ response.
+ tags:
+ - Inference
+ /completion:
+ post:
+ parameters: []
+ requestBody:
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CompletionRequest'
+ required: true
+ responses:
+ '200':
+ content:
+ application/json:
+ schema:
+ oneOf:
+ - $ref: '#/components/schemas/CompletionResponse'
+ - $ref: '#/components/schemas/StreamedCompletionResponse'
+ description: Normal completion response. **OR** streamed completion response.
+ tags:
+ - Inference
+security:
+- Default: []
+servers:
+- url: http://llama.meta.com
+tags:
+- name: AgenticSystem
+- name: Inference
+- description: 'Attachments are used to refer to external resources, such as images,
+ videos, audio, etc.
+
+
+ '
+ name: Attachment
+- description:
+ name: ChatCompletionRequest
+- description:
+ name: Message
+- description:
+ name: URL
+- description: 'Normal chat completion response.
+
+
+ '
+ name: ChatCompletionResponse
+- description: 'Streamed chat completion response.
+
+
+ '
+ name: StreamedChatCompletionResponse
+- description:
+ name: CompletionRequest
+- description: 'Normal completion response.
+
+
+ '
+ name: CompletionResponse
+- description: 'streamed completion response.
+
+
+ '
+ name: StreamedCompletionResponse
+x-tagGroups:
+- name: Operations
+ tags:
+ - AgenticSystem
+ - Inference
+- name: Types
+ tags:
+ - Attachment
+ - ChatCompletionRequest
+ - ChatCompletionResponse
+ - CompletionRequest
+ - CompletionResponse
+ - Message
+ - StreamedChatCompletionResponse
+ - StreamedCompletionResponse
+ - URL
diff --git a/source/run.sh b/source/run.sh
new file mode 100644
index 000000000..980d979d2
--- /dev/null
+++ b/source/run.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+PYTHONPATH=. python3 defn.py