diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..e3d9f3f5c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +json-strong-typing +python-openapi diff --git a/source/defn.py b/source/defn.py new file mode 100644 index 000000000..48289fd0e --- /dev/null +++ b/source/defn.py @@ -0,0 +1,226 @@ +from dataclasses import dataclass, field +from enum import Enum +from typing import Any, Dict, List, Optional, Protocol, Union + +import yaml + +from pyopenapi import Info, Options, Server, Specification, webmethod +from strong_typing.schema import json_schema_type + + +@json_schema_type( + schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"} +) +@dataclass +class URL: + url: str + + def __str__(self) -> str: + return self.url + + +@json_schema_type +@dataclass +class Attachment: + """ + Attachments are used to refer to external resources, such as images, videos, audio, etc. + + """ + + url: URL + mime_type: str + + +Content = Union[ + str, + Attachment, + List[Union[str, Attachment]], +] + + +class Role(Enum): + system = "system" + user = "user" + assistant = "assistant" + tool = "tool" + + +class StopReason(Enum): + """ + Stop reasons are used to indicate why the model stopped generating text. + """ + + not_stopped = "not_stopped" + finished_ok = "finished_ok" + max_tokens = "max_tokens" + + +@dataclass +class ToolCall: + """ + A tool call is a request to a tool. + """ + + tool_name: str + arguments: Dict[str, Any] + + +@dataclass +class ToolResponse: + tool_name: str + response: str + + +@dataclass +class ToolDefinition: + tool_name: str + parameters: Dict[str, Any] + + +@json_schema_type +@dataclass +class Message: + role: Role + + # input to the model or output from the model + content: Content + + # zero-shot tool definitions as input to the model + tool_definitions: List[ToolDefinition] = field(default_factory=list) + + # output from the model + tool_calls: List[ToolCall] = field(default_factory=list) + + # input to the model + tool_responses: List[ToolResponse] = field(default_factory=list) + + +@json_schema_type +@dataclass +class CompletionResponse: + """Normal completion response.""" + content: Content + stop_reason: StopReason + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class StreamedCompletionResponse: + """streamed completion response.""" + text_delta: str + stop_reason: StopReason + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class ChatCompletionResponse: + """Normal chat completion response.""" + + content: Content + stop_reason: StopReason + tool_calls: List[ToolCall] = field(default_factory=list) + logprobs: Optional[Dict[str, Any]] = None + + +@json_schema_type +@dataclass +class StreamedChatCompletionResponse: + """Streamed chat completion response.""" + + text_delta: str + stop_reason: StopReason + tool_call: Optional[ToolCall] = None + + +@dataclass +class SamplingParams: + temperature: float = 0.0 + strategy: str = "greedy" + top_p: float = 0.95 + top_k: int = 0 + + +class PretrainedModel(Enum): + llama3_8b = "llama3_8b" + llama3_70b = "llama3_70b" + + +class InstructModel(Enum): + llama3_8b_chat = "llama3_8b_chat" + llama3_70b_chat = "llama3_70b_chat" + + +@json_schema_type +@dataclass +class CompletionRequest: + content: Content + model: PretrainedModel = PretrainedModel.llama3_8b + sampling_params: SamplingParams = SamplingParams() + max_tokens: int = 0 + stream: bool = False + logprobs: bool = False + + +@json_schema_type +@dataclass +class ChatCompletionRequest: + message: Message + message_history: List[Message] = None + model: InstructModel = InstructModel.llama3_8b_chat + sampling_params: SamplingParams = SamplingParams() + max_tokens: int = 0 + stream: bool = False + logprobs: bool = False + + +class Inference(Protocol): + + def post_completion( + self, + request: CompletionRequest, + ) -> Union[CompletionResponse, StreamedCompletionResponse]: ... + + def post_chat_completion( + self, + request: ChatCompletionRequest, + ) -> Union[ChatCompletionResponse, StreamedChatCompletionResponse]: ... + + + +@json_schema_type +@dataclass +class AgenticSystemExecuteRequest: + message: Message + message_history: List[Message] = None + model: InstructModel = InstructModel.llama3_8b_chat + sampling_params: SamplingParams = SamplingParams() + +class AgenticSystem(Protocol): + + @webmethod(route="/agentic/system/execute") + def create_agentic_system_execute(self,) -> str: ... + + +class Endpoint(Inference, AgenticSystem): ... + + +if __name__ == "__main__": + print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)") + spec = Specification( + Endpoint, + Options( + server=Server(url="http://llama.meta.com"), + info=Info( + title="Llama Stack specification", + version="0.1", + description="This is the llama stack", + ), + ), + ) + with open("openapi.yaml", "w", encoding="utf-8") as fp: + yaml.dump(spec.get_json(), fp, allow_unicode=True) + + with open("openapi.html", "w") as fp: + spec.write_html(fp, pretty_print=True) diff --git a/source/openapi.html b/source/openapi.html new file mode 100644 index 000000000..c4290cb7f --- /dev/null +++ b/source/openapi.html @@ -0,0 +1,842 @@ + + + + + + + OpenAPI specification + + + + + + + +
+ + + diff --git a/source/openapi.yaml b/source/openapi.yaml new file mode 100644 index 000000000..4ec2c1e31 --- /dev/null +++ b/source/openapi.yaml @@ -0,0 +1,492 @@ +components: + responses: {} + schemas: + Attachment: + additionalProperties: false + properties: + mime_type: + type: string + url: + $ref: '#/components/schemas/URL' + required: + - url + - mime_type + title: Attachments are used to refer to external resources, such as images, + videos, audio, etc. + type: object + ChatCompletionRequest: + additionalProperties: false + properties: + logprobs: + default: false + type: boolean + max_tokens: + default: 0 + type: integer + message: + $ref: '#/components/schemas/Message' + message_history: + items: + $ref: '#/components/schemas/Message' + type: array + model: + default: llama3_8b_chat + enum: + - llama3_8b_chat + - llama3_70b_chat + type: string + sampling_params: + additionalProperties: false + properties: + strategy: + default: greedy + type: string + temperature: + default: 0.0 + type: number + top_k: + default: 0 + type: integer + top_p: + default: 0.95 + type: number + required: + - temperature + - strategy + - top_p + - top_k + type: object + stream: + default: false + type: boolean + required: + - message + - message_history + - model + - sampling_params + - max_tokens + - stream + - logprobs + type: object + ChatCompletionResponse: + additionalProperties: false + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + - items: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + type: array + logprobs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + stop_reason: + enum: + - not_stopped + - finished_ok + - max_tokens + title: Stop reasons are used to indicate why the model stopped generating + text. + type: string + tool_calls: + items: + additionalProperties: false + properties: + arguments: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - arguments + title: A tool call is a request to a tool. + type: object + type: array + required: + - content + - stop_reason + - tool_calls + title: Normal chat completion response. + type: object + CompletionRequest: + additionalProperties: false + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + - items: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + type: array + logprobs: + default: false + type: boolean + max_tokens: + default: 0 + type: integer + model: + default: llama3_8b + enum: + - llama3_8b + - llama3_70b + type: string + sampling_params: + additionalProperties: false + properties: + strategy: + default: greedy + type: string + temperature: + default: 0.0 + type: number + top_k: + default: 0 + type: integer + top_p: + default: 0.95 + type: number + required: + - temperature + - strategy + - top_p + - top_k + type: object + stream: + default: false + type: boolean + required: + - content + - model + - sampling_params + - max_tokens + - stream + - logprobs + type: object + CompletionResponse: + additionalProperties: false + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + - items: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + type: array + logprobs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + stop_reason: + enum: + - not_stopped + - finished_ok + - max_tokens + title: Stop reasons are used to indicate why the model stopped generating + text. + type: string + required: + - content + - stop_reason + title: Normal completion response. + type: object + Message: + additionalProperties: false + properties: + content: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + - items: + oneOf: + - type: string + - $ref: '#/components/schemas/Attachment' + type: array + role: + enum: + - system + - user + - assistant + - tool + type: string + tool_calls: + items: + additionalProperties: false + properties: + arguments: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - arguments + title: A tool call is a request to a tool. + type: object + type: array + tool_definitions: + items: + additionalProperties: false + properties: + parameters: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - parameters + type: object + type: array + tool_responses: + items: + additionalProperties: false + properties: + response: + type: string + tool_name: + type: string + required: + - tool_name + - response + type: object + type: array + required: + - role + - content + - tool_definitions + - tool_calls + - tool_responses + type: object + StreamedChatCompletionResponse: + additionalProperties: false + properties: + stop_reason: + enum: + - not_stopped + - finished_ok + - max_tokens + title: Stop reasons are used to indicate why the model stopped generating + text. + type: string + text_delta: + type: string + tool_call: + additionalProperties: false + properties: + arguments: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + tool_name: + type: string + required: + - tool_name + - arguments + title: A tool call is a request to a tool. + type: object + required: + - text_delta + - stop_reason + title: Streamed chat completion response. + type: object + StreamedCompletionResponse: + additionalProperties: false + properties: + logprobs: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + stop_reason: + enum: + - not_stopped + - finished_ok + - max_tokens + title: Stop reasons are used to indicate why the model stopped generating + text. + type: string + text_delta: + type: string + required: + - text_delta + - stop_reason + title: streamed completion response. + type: object + URL: + format: uri + pattern: ^(https?://|file://|data:) + type: string +info: + description: This is the llama stack + title: Llama Stack specification + version: '0.1' +jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema +openapi: 3.1.0 +paths: + /agentic/system/execute: + get: + parameters: [] + responses: + '200': + content: + application/json: + schema: + type: string + description: OK + tags: + - AgenticSystem + /chat_completion: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ChatCompletionRequest' + required: true + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/ChatCompletionResponse' + - $ref: '#/components/schemas/StreamedChatCompletionResponse' + description: Normal chat completion response. **OR** Streamed chat completion + response. + tags: + - Inference + /completion: + post: + parameters: [] + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/CompletionRequest' + required: true + responses: + '200': + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/CompletionResponse' + - $ref: '#/components/schemas/StreamedCompletionResponse' + description: Normal completion response. **OR** streamed completion response. + tags: + - Inference +security: +- Default: [] +servers: +- url: http://llama.meta.com +tags: +- name: AgenticSystem +- name: Inference +- description: 'Attachments are used to refer to external resources, such as images, + videos, audio, etc. + + + ' + name: Attachment +- description: + name: ChatCompletionRequest +- description: + name: Message +- description: + name: URL +- description: 'Normal chat completion response. + + + ' + name: ChatCompletionResponse +- description: 'Streamed chat completion response. + + + ' + name: StreamedChatCompletionResponse +- description: + name: CompletionRequest +- description: 'Normal completion response. + + + ' + name: CompletionResponse +- description: 'streamed completion response. + + + ' + name: StreamedCompletionResponse +x-tagGroups: +- name: Operations + tags: + - AgenticSystem + - Inference +- name: Types + tags: + - Attachment + - ChatCompletionRequest + - ChatCompletionResponse + - CompletionRequest + - CompletionResponse + - Message + - StreamedChatCompletionResponse + - StreamedCompletionResponse + - URL diff --git a/source/run.sh b/source/run.sh new file mode 100644 index 000000000..980d979d2 --- /dev/null +++ b/source/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +PYTHONPATH=. python3 defn.py