making the API python based with a converter script

2025-06-28 19:04:19 +00:00 · 2024-07-07 17:01:14 -07:00 · 2024-07-07 17:01:14 -07:00 · 722d20c6de
commit 722d20c6de
parent 1a2b17af7f
5 changed files with 1565 additions and 0 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+json-strong-typing
+python-openapi
--- a/source/defn.py
+++ b/source/defn.py
@ -0,0 +1,226 @@
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Protocol, Union
+
+import yaml
+
+from pyopenapi import Info, Options, Server, Specification, webmethod
+from strong_typing.schema import json_schema_type
+
+
+@json_schema_type(
+    schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
+)
+@dataclass
+class URL:
+    url: str
+
+    def __str__(self) -> str:
+        return self.url
+
+
+@json_schema_type
+@dataclass
+class Attachment:
+    """
+    Attachments are used to refer to external resources, such as images, videos, audio, etc.
+
+    """
+
+    url: URL
+    mime_type: str
+
+
+Content = Union[
+    str,
+    Attachment,
+    List[Union[str, Attachment]],
+]
+
+
+class Role(Enum):
+    system = "system"
+    user = "user"
+    assistant = "assistant"
+    tool = "tool"
+
+
+class StopReason(Enum):
+    """
+    Stop reasons are used to indicate why the model stopped generating text.
+    """
+
+    not_stopped = "not_stopped"
+    finished_ok = "finished_ok"
+    max_tokens = "max_tokens"
+
+
+@dataclass
+class ToolCall:
+    """
+    A tool call is a request to a tool.
+    """
+
+    tool_name: str
+    arguments: Dict[str, Any]
+
+
+@dataclass
+class ToolResponse:
+    tool_name: str
+    response: str
+
+
+@dataclass
+class ToolDefinition:
+    tool_name: str
+    parameters: Dict[str, Any]
+
+
+@json_schema_type
+@dataclass
+class Message:
+    role: Role
+
+    # input to the model or output from the model
+    content: Content
+
+    # zero-shot tool definitions as input to the model
+    tool_definitions: List[ToolDefinition] = field(default_factory=list)
+
+    # output from the model
+    tool_calls: List[ToolCall] = field(default_factory=list)
+
+    # input to the model
+    tool_responses: List[ToolResponse] = field(default_factory=list)
+
+
+@json_schema_type
+@dataclass
+class CompletionResponse:
+    """Normal completion response."""
+    content: Content
+    stop_reason: StopReason
+    logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class StreamedCompletionResponse:
+    """streamed completion response."""
+    text_delta: str
+    stop_reason: StopReason
+    logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionResponse:
+    """Normal chat completion response."""
+
+    content: Content
+    stop_reason: StopReason
+    tool_calls: List[ToolCall] = field(default_factory=list)
+    logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class StreamedChatCompletionResponse:
+    """Streamed chat completion response."""
+
+    text_delta: str
+    stop_reason: StopReason
+    tool_call: Optional[ToolCall] = None
+
+
+@dataclass
+class SamplingParams:
+    temperature: float = 0.0
+    strategy: str = "greedy"
+    top_p: float = 0.95
+    top_k: int = 0
+
+
+class PretrainedModel(Enum):
+    llama3_8b = "llama3_8b"
+    llama3_70b = "llama3_70b"
+
+
+class InstructModel(Enum):
+    llama3_8b_chat = "llama3_8b_chat"
+    llama3_70b_chat = "llama3_70b_chat"
+
+
+@json_schema_type
+@dataclass
+class CompletionRequest:
+    content: Content
+    model: PretrainedModel = PretrainedModel.llama3_8b
+    sampling_params: SamplingParams = SamplingParams()
+    max_tokens: int = 0
+    stream: bool = False
+    logprobs: bool = False
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionRequest:
+    message: Message
+    message_history: List[Message] = None
+    model: InstructModel = InstructModel.llama3_8b_chat
+    sampling_params: SamplingParams = SamplingParams()
+    max_tokens: int = 0
+    stream: bool = False
+    logprobs: bool = False
+
+
+class Inference(Protocol):
+
+    def post_completion(
+        self,
+        request: CompletionRequest,
+    ) -> Union[CompletionResponse, StreamedCompletionResponse]: ...
+
+    def post_chat_completion(
+        self,
+        request: ChatCompletionRequest,
+    ) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ...
+
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemExecuteRequest:
+    message: Message
+    message_history: List[Message] = None
+    model: InstructModel = InstructModel.llama3_8b_chat
+    sampling_params: SamplingParams = SamplingParams()
+
+class AgenticSystem(Protocol):
+
+    @webmethod(route="/agentic/system/execute")
+    def create_agentic_system_execute(self,) -> str: ...
+
+
+class Endpoint(Inference, AgenticSystem): ...
+
+
+if __name__ == "__main__":
+    print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
+    spec = Specification(
+        Endpoint,
+        Options(
+            server=Server(url="http://llama.meta.com"),
+            info=Info(
+                title="Llama Stack specification",
+                version="0.1",
+                description="This is the llama stack",
+            ),
+        ),
+    )
+    with open("openapi.yaml", "w", encoding="utf-8") as fp:
+        yaml.dump(spec.get_json(), fp, allow_unicode=True)
+
+    with open("openapi.html", "w") as fp:
+        spec.write_html(fp, pretty_print=True)
--- a/source/openapi.html
+++ b/source/openapi.html
@ -0,0 +1,842 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>OpenAPI specification</title>
+    <link href="https://fonts.googleapis.com/css?family=Montserrat:300,400,700|Roboto:300,400,700" rel="stylesheet">
+    <style>
+        body {
+            margin: 0;
+            padding: 0;
+        }
+
+    </style>
+    <script defer="defer" src="https://cdn.redoc.ly/redoc/latest/bundles/redoc.standalone.js"></script>
+    <script defer="defer">
+        document.addEventListener("DOMContentLoaded", function () {
+            spec = {
+    "openapi": "3.1.0",
+    "info": {
+        "title": "Llama Stack specification",
+        "version": "0.1",
+        "description": "This is the llama stack"
+    },
+    "servers": [
+        {
+            "url": "http://llama.meta.com"
+        }
+    ],
+    "paths": {
+        "/agentic/system/execute": {
+            "get": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "type": "string"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "AgenticSystem"
+                ],
+                "parameters": []
+            }
+        },
+        "/chat_completion": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "Normal chat completion response. **OR** Streamed chat completion response.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/ChatCompletionResponse"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/StreamedChatCompletionResponse"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/ChatCompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
+        "/completion": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "Normal completion response. **OR** streamed completion response.",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "oneOf": [
+                                        {
+                                            "$ref": "#/components/schemas/CompletionResponse"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/StreamedCompletionResponse"
+                                        }
+                                    ]
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Inference"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/CompletionRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        }
+    },
+    "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
+    "components": {
+        "schemas": {
+            "Attachment": {
+                "type": "object",
+                "properties": {
+                    "url": {
+                        "$ref": "#/components/schemas/URL"
+                    },
+                    "mime_type": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "url",
+                    "mime_type"
+                ],
+                "title": "Attachments are used to refer to external resources, such as images, videos, audio, etc."
+            },
+            "ChatCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "message": {
+                        "$ref": "#/components/schemas/Message"
+                    },
+                    "message_history": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/Message"
+                        }
+                    },
+                    "model": {
+                        "type": "string",
+                        "enum": [
+                            "llama3_8b_chat",
+                            "llama3_70b_chat"
+                        ],
+                        "default": "llama3_8b_chat"
+                    },
+                    "sampling_params": {
+                        "type": "object",
+                        "properties": {
+                            "temperature": {
+                                "type": "number",
+                                "default": 0.0
+                            },
+                            "strategy": {
+                                "type": "string",
+                                "default": "greedy"
+                            },
+                            "top_p": {
+                                "type": "number",
+                                "default": 0.95
+                            },
+                            "top_k": {
+                                "type": "integer",
+                                "default": 0
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "temperature",
+                            "strategy",
+                            "top_p",
+                            "top_k"
+                        ]
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "default": 0
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "logprobs": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "message",
+                    "message_history",
+                    "model",
+                    "sampling_params",
+                    "max_tokens",
+                    "stream",
+                    "logprobs"
+                ]
+            },
+            "Message": {
+                "type": "object",
+                "properties": {
+                    "role": {
+                        "type": "string",
+                        "enum": [
+                            "system",
+                            "user",
+                            "assistant",
+                            "tool"
+                        ]
+                    },
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/Attachment"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/Attachment"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "tool_definitions": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "tool_name": {
+                                    "type": "string"
+                                },
+                                "parameters": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "tool_name",
+                                "parameters"
+                            ]
+                        }
+                    },
+                    "tool_calls": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "tool_name": {
+                                    "type": "string"
+                                },
+                                "arguments": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "tool_name",
+                                "arguments"
+                            ],
+                            "title": "A tool call is a request to a tool."
+                        }
+                    },
+                    "tool_responses": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "tool_name": {
+                                    "type": "string"
+                                },
+                                "response": {
+                                    "type": "string"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "tool_name",
+                                "response"
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "role",
+                    "content",
+                    "tool_definitions",
+                    "tool_calls",
+                    "tool_responses"
+                ]
+            },
+            "URL": {
+                "type": "string",
+                "format": "uri",
+                "pattern": "^(https?://|file://|data:)"
+            },
+            "ChatCompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/Attachment"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/Attachment"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "stop_reason": {
+                        "type": "string",
+                        "enum": [
+                            "not_stopped",
+                            "finished_ok",
+                            "max_tokens"
+                        ],
+                        "title": "Stop reasons are used to indicate why the model stopped generating text."
+                    },
+                    "tool_calls": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "tool_name": {
+                                    "type": "string"
+                                },
+                                "arguments": {
+                                    "type": "object",
+                                    "additionalProperties": {
+                                        "oneOf": [
+                                            {
+                                                "type": "null"
+                                            },
+                                            {
+                                                "type": "boolean"
+                                            },
+                                            {
+                                                "type": "number"
+                                            },
+                                            {
+                                                "type": "string"
+                                            },
+                                            {
+                                                "type": "array"
+                                            },
+                                            {
+                                                "type": "object"
+                                            }
+                                        ]
+                                    }
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "tool_name",
+                                "arguments"
+                            ],
+                            "title": "A tool call is a request to a tool."
+                        }
+                    },
+                    "logprobs": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "stop_reason",
+                    "tool_calls"
+                ],
+                "title": "Normal chat completion response."
+            },
+            "StreamedChatCompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "text_delta": {
+                        "type": "string"
+                    },
+                    "stop_reason": {
+                        "type": "string",
+                        "enum": [
+                            "not_stopped",
+                            "finished_ok",
+                            "max_tokens"
+                        ],
+                        "title": "Stop reasons are used to indicate why the model stopped generating text."
+                    },
+                    "tool_call": {
+                        "type": "object",
+                        "properties": {
+                            "tool_name": {
+                                "type": "string"
+                            },
+                            "arguments": {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "tool_name",
+                            "arguments"
+                        ],
+                        "title": "A tool call is a request to a tool."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "text_delta",
+                    "stop_reason"
+                ],
+                "title": "Streamed chat completion response."
+            },
+            "CompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/Attachment"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/Attachment"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "model": {
+                        "type": "string",
+                        "enum": [
+                            "llama3_8b",
+                            "llama3_70b"
+                        ],
+                        "default": "llama3_8b"
+                    },
+                    "sampling_params": {
+                        "type": "object",
+                        "properties": {
+                            "temperature": {
+                                "type": "number",
+                                "default": 0.0
+                            },
+                            "strategy": {
+                                "type": "string",
+                                "default": "greedy"
+                            },
+                            "top_p": {
+                                "type": "number",
+                                "default": 0.95
+                            },
+                            "top_k": {
+                                "type": "integer",
+                                "default": 0
+                            }
+                        },
+                        "additionalProperties": false,
+                        "required": [
+                            "temperature",
+                            "strategy",
+                            "top_p",
+                            "top_k"
+                        ]
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "default": 0
+                    },
+                    "stream": {
+                        "type": "boolean",
+                        "default": false
+                    },
+                    "logprobs": {
+                        "type": "boolean",
+                        "default": false
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "model",
+                    "sampling_params",
+                    "max_tokens",
+                    "stream",
+                    "logprobs"
+                ]
+            },
+            "CompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "$ref": "#/components/schemas/Attachment"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "oneOf": [
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "$ref": "#/components/schemas/Attachment"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "stop_reason": {
+                        "type": "string",
+                        "enum": [
+                            "not_stopped",
+                            "finished_ok",
+                            "max_tokens"
+                        ],
+                        "title": "Stop reasons are used to indicate why the model stopped generating text."
+                    },
+                    "logprobs": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "content",
+                    "stop_reason"
+                ],
+                "title": "Normal completion response."
+            },
+            "StreamedCompletionResponse": {
+                "type": "object",
+                "properties": {
+                    "text_delta": {
+                        "type": "string"
+                    },
+                    "stop_reason": {
+                        "type": "string",
+                        "enum": [
+                            "not_stopped",
+                            "finished_ok",
+                            "max_tokens"
+                        ],
+                        "title": "Stop reasons are used to indicate why the model stopped generating text."
+                    },
+                    "logprobs": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "text_delta",
+                    "stop_reason"
+                ],
+                "title": "streamed completion response."
+            }
+        },
+        "responses": {}
+    },
+    "security": [
+        {
+            "Default": []
+        }
+    ],
+    "tags": [
+        {
+            "name": "AgenticSystem"
+        },
+        {
+            "name": "Inference"
+        },
+        {
+            "name": "Attachment",
+            "description": "Attachments are used to refer to external resources, such as images, videos, audio, etc.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/Attachment\" />"
+        },
+        {
+            "name": "ChatCompletionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionRequest\" />"
+        },
+        {
+            "name": "Message",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/Message\" />"
+        },
+        {
+            "name": "URL",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/URL\" />"
+        },
+        {
+            "name": "ChatCompletionResponse",
+            "description": "Normal chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/ChatCompletionResponse\" />"
+        },
+        {
+            "name": "StreamedChatCompletionResponse",
+            "description": "Streamed chat completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedChatCompletionResponse\" />"
+        },
+        {
+            "name": "CompletionRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/CompletionRequest\" />"
+        },
+        {
+            "name": "CompletionResponse",
+            "description": "Normal completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponse\" />"
+        },
+        {
+            "name": "StreamedCompletionResponse",
+            "description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/StreamedCompletionResponse\" />"
+        }
+    ],
+    "x-tagGroups": [
+        {
+            "name": "Operations",
+            "tags": [
+                "AgenticSystem",
+                "Inference"
+            ]
+        },
+        {
+            "name": "Types",
+            "tags": [
+                "Attachment",
+                "ChatCompletionRequest",
+                "ChatCompletionResponse",
+                "CompletionRequest",
+                "CompletionResponse",
+                "Message",
+                "StreamedChatCompletionResponse",
+                "StreamedCompletionResponse",
+                "URL"
+            ]
+        }
+    ]
+};
+            options = {
+                downloadFileName: "openapi.json",
+                expandResponses: "200",
+                expandSingleSchemaField: true,
+                jsonSampleExpandLevel: "all",
+                schemaExpansionLevel: "all",
+            };
+            element = document.getElementById("openapi-container");
+            Redoc.init(spec, options, element);
+
+            if (spec.info && spec.info.title) {
+                document.title = spec.info.title;
+            }
+        });
+    </script>
+</head>
+
+<body>
+    <div id="openapi-container"></div>
+</body>
+
+</html>
--- a/source/openapi.yaml
+++ b/source/openapi.yaml
@ -0,0 +1,492 @@
+components:
+  responses: {}
+  schemas:
+    Attachment:
+      additionalProperties: false
+      properties:
+        mime_type:
+          type: string
+        url:
+          $ref: '#/components/schemas/URL'
+      required:
+      - url
+      - mime_type
+      title: Attachments are used to refer to external resources, such as images,
+        videos, audio, etc.
+      type: object
+    ChatCompletionRequest:
+      additionalProperties: false
+      properties:
+        logprobs:
+          default: false
+          type: boolean
+        max_tokens:
+          default: 0
+          type: integer
+        message:
+          $ref: '#/components/schemas/Message'
+        message_history:
+          items:
+            $ref: '#/components/schemas/Message'
+          type: array
+        model:
+          default: llama3_8b_chat
+          enum:
+          - llama3_8b_chat
+          - llama3_70b_chat
+          type: string
+        sampling_params:
+          additionalProperties: false
+          properties:
+            strategy:
+              default: greedy
+              type: string
+            temperature:
+              default: 0.0
+              type: number
+            top_k:
+              default: 0
+              type: integer
+            top_p:
+              default: 0.95
+              type: number
+          required:
+          - temperature
+          - strategy
+          - top_p
+          - top_k
+          type: object
+        stream:
+          default: false
+          type: boolean
+      required:
+      - message
+      - message_history
+      - model
+      - sampling_params
+      - max_tokens
+      - stream
+      - logprobs
+      type: object
+    ChatCompletionResponse:
+      additionalProperties: false
+      properties:
+        content:
+          oneOf:
+          - type: string
+          - $ref: '#/components/schemas/Attachment'
+          - items:
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/Attachment'
+            type: array
+        logprobs:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        stop_reason:
+          enum:
+          - not_stopped
+          - finished_ok
+          - max_tokens
+          title: Stop reasons are used to indicate why the model stopped generating
+            text.
+          type: string
+        tool_calls:
+          items:
+            additionalProperties: false
+            properties:
+              arguments:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              tool_name:
+                type: string
+            required:
+            - tool_name
+            - arguments
+            title: A tool call is a request to a tool.
+            type: object
+          type: array
+      required:
+      - content
+      - stop_reason
+      - tool_calls
+      title: Normal chat completion response.
+      type: object
+    CompletionRequest:
+      additionalProperties: false
+      properties:
+        content:
+          oneOf:
+          - type: string
+          - $ref: '#/components/schemas/Attachment'
+          - items:
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/Attachment'
+            type: array
+        logprobs:
+          default: false
+          type: boolean
+        max_tokens:
+          default: 0
+          type: integer
+        model:
+          default: llama3_8b
+          enum:
+          - llama3_8b
+          - llama3_70b
+          type: string
+        sampling_params:
+          additionalProperties: false
+          properties:
+            strategy:
+              default: greedy
+              type: string
+            temperature:
+              default: 0.0
+              type: number
+            top_k:
+              default: 0
+              type: integer
+            top_p:
+              default: 0.95
+              type: number
+          required:
+          - temperature
+          - strategy
+          - top_p
+          - top_k
+          type: object
+        stream:
+          default: false
+          type: boolean
+      required:
+      - content
+      - model
+      - sampling_params
+      - max_tokens
+      - stream
+      - logprobs
+      type: object
+    CompletionResponse:
+      additionalProperties: false
+      properties:
+        content:
+          oneOf:
+          - type: string
+          - $ref: '#/components/schemas/Attachment'
+          - items:
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/Attachment'
+            type: array
+        logprobs:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        stop_reason:
+          enum:
+          - not_stopped
+          - finished_ok
+          - max_tokens
+          title: Stop reasons are used to indicate why the model stopped generating
+            text.
+          type: string
+      required:
+      - content
+      - stop_reason
+      title: Normal completion response.
+      type: object
+    Message:
+      additionalProperties: false
+      properties:
+        content:
+          oneOf:
+          - type: string
+          - $ref: '#/components/schemas/Attachment'
+          - items:
+              oneOf:
+              - type: string
+              - $ref: '#/components/schemas/Attachment'
+            type: array
+        role:
+          enum:
+          - system
+          - user
+          - assistant
+          - tool
+          type: string
+        tool_calls:
+          items:
+            additionalProperties: false
+            properties:
+              arguments:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              tool_name:
+                type: string
+            required:
+            - tool_name
+            - arguments
+            title: A tool call is a request to a tool.
+            type: object
+          type: array
+        tool_definitions:
+          items:
+            additionalProperties: false
+            properties:
+              parameters:
+                additionalProperties:
+                  oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+                type: object
+              tool_name:
+                type: string
+            required:
+            - tool_name
+            - parameters
+            type: object
+          type: array
+        tool_responses:
+          items:
+            additionalProperties: false
+            properties:
+              response:
+                type: string
+              tool_name:
+                type: string
+            required:
+            - tool_name
+            - response
+            type: object
+          type: array
+      required:
+      - role
+      - content
+      - tool_definitions
+      - tool_calls
+      - tool_responses
+      type: object
+    StreamedChatCompletionResponse:
+      additionalProperties: false
+      properties:
+        stop_reason:
+          enum:
+          - not_stopped
+          - finished_ok
+          - max_tokens
+          title: Stop reasons are used to indicate why the model stopped generating
+            text.
+          type: string
+        text_delta:
+          type: string
+        tool_call:
+          additionalProperties: false
+          properties:
+            arguments:
+              additionalProperties:
+                oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+              type: object
+            tool_name:
+              type: string
+          required:
+          - tool_name
+          - arguments
+          title: A tool call is a request to a tool.
+          type: object
+      required:
+      - text_delta
+      - stop_reason
+      title: Streamed chat completion response.
+      type: object
+    StreamedCompletionResponse:
+      additionalProperties: false
+      properties:
+        logprobs:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        stop_reason:
+          enum:
+          - not_stopped
+          - finished_ok
+          - max_tokens
+          title: Stop reasons are used to indicate why the model stopped generating
+            text.
+          type: string
+        text_delta:
+          type: string
+      required:
+      - text_delta
+      - stop_reason
+      title: streamed completion response.
+      type: object
+    URL:
+      format: uri
+      pattern: ^(https?://|file://|data:)
+      type: string
+info:
+  description: This is the llama stack
+  title: Llama Stack specification
+  version: '0.1'
+jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
+openapi: 3.1.0
+paths:
+  /agentic/system/execute:
+    get:
+      parameters: []
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                type: string
+          description: OK
+      tags:
+      - AgenticSystem
+  /chat_completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ChatCompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                oneOf:
+                - $ref: '#/components/schemas/ChatCompletionResponse'
+                - $ref: '#/components/schemas/StreamedChatCompletionResponse'
+          description: Normal chat completion response. **OR** Streamed chat completion
+            response.
+      tags:
+      - Inference
+  /completion:
+    post:
+      parameters: []
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/CompletionRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                oneOf:
+                - $ref: '#/components/schemas/CompletionResponse'
+                - $ref: '#/components/schemas/StreamedCompletionResponse'
+          description: Normal completion response. **OR** streamed completion response.
+      tags:
+      - Inference
+security:
+- Default: []
+servers:
+- url: http://llama.meta.com
+tags:
+- name: AgenticSystem
+- name: Inference
+- description: 'Attachments are used to refer to external resources, such as images,
+    videos, audio, etc.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/Attachment" />'
+  name: Attachment
+- description: <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionRequest"
+    />
+  name: ChatCompletionRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/Message" />
+  name: Message
+- description: <SchemaDefinition schemaRef="#/components/schemas/URL" />
+  name: URL
+- description: 'Normal chat completion response.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/ChatCompletionResponse" />'
+  name: ChatCompletionResponse
+- description: 'Streamed chat completion response.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/StreamedChatCompletionResponse"
+    />'
+  name: StreamedChatCompletionResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionRequest"
+    />
+  name: CompletionRequest
+- description: 'Normal completion response.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/CompletionResponse" />'
+  name: CompletionResponse
+- description: 'streamed completion response.
+
+
+    <SchemaDefinition schemaRef="#/components/schemas/StreamedCompletionResponse"
+    />'
+  name: StreamedCompletionResponse
+x-tagGroups:
+- name: Operations
+  tags:
+  - AgenticSystem
+  - Inference
+- name: Types
+  tags:
+  - Attachment
+  - ChatCompletionRequest
+  - ChatCompletionResponse
+  - CompletionRequest
+  - CompletionResponse
+  - Message
+  - StreamedChatCompletionResponse
+  - StreamedCompletionResponse
+  - URL
--- a/source/run.sh
+++ b/source/run.sh
@ -0,0 +1,3 @@
+#!/bin/bash
+
+PYTHONPATH=. python3 defn.py