more work on agent definitions

2025-10-04 12:07:34 +00:00 · 2024-07-09 13:53:09 -07:00 · 2024-07-09 13:53:09 -07:00 · 97f9b18aca
commit 97f9b18aca
parent 6e4586ba7a
8 changed files with 1079 additions and 695 deletions
--- a/source/api_definitions.py
+++ b/source/api_definitions.py
@ -0,0 +1,197 @@
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Any, Dict, List, Optional, Protocol, Set, Union
+
+import yaml
+
+from model_types import (
+    BuiltinTool,
+    Content,
+    InstructModel,
+    Message,
+    PretrainedModel,
+    SamplingParams,
+    StopReason,
+    ToolCall,
+    ToolDefinition,
+    ToolResponse,
+)
+from agentic_system_types import (
+    AgenticSystemTurn,
+)
+
+from pyopenapi import Info, Options, Server, Specification, webmethod
+from strong_typing.schema import json_schema_type
+
+
+@json_schema_type
+@dataclass
+class CompletionRequest:
+    content: Content
+    model: PretrainedModel
+    sampling_params: SamplingParams = SamplingParams()
+    max_tokens: int = 0
+    stream: bool = False
+    logprobs: bool = False
+
+
+@json_schema_type
+@dataclass
+class CompletionResponse:
+    """Normal completion response."""
+
+    content: Content
+    stop_reason: Optional[StopReason] = None
+    logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class CompletionResponseStreamChunk:
+    """streamed completion response."""
+
+    text_delta: str
+    stop_reason: Optional[StopReason] = None
+    logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionRequest:
+    message: Message
+    model: InstructModel
+    message_history: List[Message] = None
+    sampling_params: SamplingParams = SamplingParams()
+
+    # zero-shot tool definitions as input to the model
+    available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
+        default_factory=list
+    )
+
+    max_tokens: int = 0
+    stream: bool = False
+    logprobs: bool = False
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionResponse:
+    """Normal chat completion response."""
+
+    content: Content
+
+    # note: multiple tool calls can be generated in a single response
+    tool_calls: List[ToolCall] = field(default_factory=list)
+
+    stop_reason: Optional[StopReason] = None
+    logprobs: Optional[Dict[str, Any]] = None
+
+
+@json_schema_type
+@dataclass
+class ChatCompletionResponseStreamChunk:
+    """Streamed chat completion response. The actual response is a series of such objects."""
+
+    text_delta: str
+    stop_reason: Optional[StopReason] = None
+    tool_call: Optional[ToolCall] = None
+
+
+class Inference(Protocol):
+
+    def post_completion(
+        self,
+        request: CompletionRequest,
+    ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
+
+    def post_chat_completion(
+        self,
+        request: ChatCompletionRequest,
+    ) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
+
+
+@dataclass
+class AgenticSystemCreateRequest:
+    instructions: str
+    model: InstructModel
+
+    # zero-shot tool definitions as input to the model
+    available_tools: List[Union[BuiltinTool, ToolDefinition]] = field(
+        default_factory=list
+    )
+
+    executable_tools: Set[str] = field(default_factory=set)
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemCreateResponse:
+    agent_id: str
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemExecuteRequest:
+    agent_id: str
+    messages: List[Message]
+    turn_history: List[AgenticSystemTurn] = None
+    stream: bool = False
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemExecuteResponse:
+    """non-stream response from the agentic system."""
+
+    turn: AgenticSystemTurn
+
+
+@json_schema_type
+@dataclass
+class AgenticSystemExecuteResponseStreamChunk:
+    """Streamed agent execution response."""
+
+    # TODO: make things streamable
+    turn: AgenticSystemTurn
+
+    stop_reason: Optional[StopReason] = None
+
+
+class AgenticSystem(Protocol):
+
+    @webmethod(route="/agentic_system/create")
+    def create_agentic_system(
+        self,
+        request: AgenticSystemCreateRequest,
+    ) -> AgenticSystemCreateResponse: ...
+
+    @webmethod(route="/agentic_system/execute")
+    def create_agentic_system_execute(
+        self,
+        request: AgenticSystemExecuteRequest,
+    ) -> Union[
+        AgenticSystemExecuteResponse, AgenticSystemExecuteResponseStreamChunk
+    ]: ...
+
+
+class LlamaStackEndpoints(Inference, AgenticSystem): ...
+
+
+if __name__ == "__main__":
+    print("Converting the spec to YAML (openapi.yaml) and HTML (openapi.html)")
+    spec = Specification(
+        LlamaStackEndpoints,
+        Options(
+            server=Server(url="http://llama.meta.com"),
+            info=Info(
+                title="Llama Stack specification",
+                version="0.1",
+                description="This is the llama stack",
+            ),
+        ),
+    )
+    with open("openapi.yaml", "w", encoding="utf-8") as fp:
+        yaml.dump(spec.get_json(), fp, allow_unicode=True)
+
+    with open("openapi.html", "w") as fp:
+        spec.write_html(fp, pretty_print=True)