updated chat completion

2025-12-03 09:53:45 +00:00 · 2024-06-26 15:25:52 -07:00 · 2024-06-26 15:25:52 -07:00 · eb81ad1ffd
commit eb81ad1ffd
parent 749e50b288
2 changed files with 177 additions and 122 deletions
--- a/chat_completion.yaml
+++ b/chat_completion.yaml
@ -1,107 +1,13 @@
 openapi: 3.0.0
 info:
-  title: Single Inference API (chat_completion)
+  title: Chat Completion API
  version: 0.0.1
 components:
  schemas:
    Tool:
      type: object
      description: A tool that can be used by an agent to perform specific tasks.
      properties:
        name:
          type: string
          description: The name of the tool.
        description:
          type: string
          description: A brief description of what the tool does and how it should be used.
        parameters:
          type: array
          items:
            $ref: '#/components/schemas/ToolParameter'
        returnValue:
          $ref: '#/components/schemas/ToolReturnValue'
    ToolParameter:
      type: object
      properties:
        type:
          type: string
          enum: [string, int, float, list, bool]
          description: The data type of the parameter.
        itemType:
          type: string
          description: The type of items in the parameter if it is a list.
        description:
          type: string
          description: Details about what the parameter is used for and any constraints.
    ToolReturnValue:
      type: object
      properties:
        type:
          type: string
          enum: [string, int, float, list, bool]
          description: The data type of the return value.
        itemType:
          type: string
          description: The type of items in the return value if it is a list.
        description:
          type: string
          description: Details about the significance of the return value.
    Attachment:
      type: object
      properties:
        uri:
          type: string
          description: URI of the attachment.
        mime-type:
          type: string
          description: MIME type of the attachment.
    Message:
      type: object
      properties:
        role:
          type: string
          description: Role of the entity in the message.
        text:
          type: string
          description: Text content of the message.
        attachments:
          type: array
          items:
            $ref: '#/components/schemas/Attachment'
    Completion:
      type: object
      properties:
        id:
          type: string
          description: Unique identifier for the completion.
        role:
          type: string
          description: Role of the entity generating the completion.
        text:
          type: string
          description: Text content of the completion.
        attachments:
          type: array
          items:
            $ref: '#/components/schemas/Attachment'
        tokens:
          type: array
          items:
            type: integer
        logprobs:
          type: array
          items:
            type: number
        finish_reason:
          type: string
          description: Reason for completion termination.
        # TODO: Add `tool_choice` --
        # for eg. "auto": use model's guess, how to force to use particular tool, how to disbale inbuilt tools
 paths:
  /chat_completion/:
    post:
      summary: Submit a chat completion request
-      description: Submit a chat completion request
+      description: |
        This endpoint allows clients to submit a chat completion request.
      requestBody:
        required: true
        content:
@ -115,31 +21,13 @@ paths:
                    $ref: '#/components/schemas/Message'
                model:
                  type: string
-                  description: Model identifier
+                options:
-                logprobs:
+                  $ref: '#/components/schemas/Options'
                  type: boolean
                  description: Whether to include log probabilities in the output
                max_tokens:
                  type: integer
                  description: Maximum number of tokens to generate
                n_completions:
                  type: integer
                  description: Number of completions to generate
                temperature:
                  type: number
                  format: float
                  description: Temperature setting for the generation
                top_p:
                  type: number
                  format: float
                  description: Top p setting for the generation
                tools:
                  type: array
                  items:
                    $ref: '#/components/schemas/Tool'
      responses:
        '200':
-          description: Chat completion request processed successfully
+          description: Successful response
          content:
            application/json:
              schema:
@ -147,11 +35,106 @@ paths:
                properties:
                  id:
                    type: string
-                    description: Unique identifier for the completion request
+                  candidates:
                  completions:
                    type: array
                    items:
                      $ref: '#/components/schemas/Completion'
-                  model:
+                  model_called:
                    type: string
-                    description: Model used for generating completions
+                  usage:
                    $ref: '#/components/schemas/TokenUsage'
 components:
  schemas:
    Message:
      type: object
      properties:
        role:
          type: string
        text:
          type: string
        attachments:
          type: array
          items:
            $ref: '#/components/schemas/MediaAttachment'
        eot:
          type: boolean
          description: "End of transmission flag."
        tool_call:
          type: boolean
          description: "Indicates if it's a tool call - builtin, custom, or ipython."
        is_complete:
          type: boolean
          description: "For streaming, indicates if the message is complete."
        is_header_complete:
          type: boolean
          description: "For streaming, indicates if the header of the message is complete."
        metadata:
          type: object
          additionalProperties: true
          description: "Additional metadata as JSON."
    MediaAttachment:
      type: object
      properties:
        attachment_type:
          $ref: '#/components/schemas/MediaAttachmentType'
        data_type:
          $ref: '#/components/schemas/MediaAttachmentDataType'
        data:
          type: string
    MediaAttachmentType:
      type: string
      enum:
        - image
        - video
        - audio
        - text
      description: "Type of media attachment."
    MediaAttachmentDataType:
      type: string
      enum:
        - raw_bytes
        - filepath
        - uri
      description: "Data type of the media attachment."
    Completion:
      type: object
      properties:
        id:
          type: string
        message:
          $ref: '#/components/schemas/Message'
        tokens:
          type: array
          items:
            type: integer
        logprobs:
          type: array
          items:
            type: number
        finish_reason:
          type: string
          enum:
            - stop
            - safety
            - max-length
          description: "Reason for completion termination."
    Options:
      type: object
      properties:
        logprobs:
          type: boolean
        max_tokens:
          type: integer
        temperature:
          type: number
        top_p:
          type: number
    TokenUsage:
      type: object
      properties:
        input_tokens:
          type: integer
        output_tokens:
          type: integer
        total_tokens:
          type: integer
--- a/simple_view/chat_completion.yml
+++ b/simple_view/chat_completion.yml
@ -0,0 +1,72 @@
 # Simple bullet form for ease of read and iteration
 # Use LLMs to translate this to a OpenAPI spec.
 == Schema ==
 Message:
  role: str
  text: str
  attachements: List[MediaAttachment]
  eot: bool
  tool_call: bool  # if it's a tool call - builtin or custom or ipython
  # for streaming
  is_complete: bool
  is_header_complete: bool
  metadata: json
 MediaAttachment:
  attachement_type: MediaAttachmentType
  data_type: MediaAttachmentDataType
  data: str
 MediaAttachmentType: # enum [image, video, audio, text(or file)]
 MediaAttachmentDataType:  # enum [raw_bytes, filepath, uri]
 Completion:
  id: str
  message: Message
  tokens: List[int]
  logprobs: List[floats]
  finish_reason: str  # Enum (stop, safety, max-length, etc)
 Options:
  logprobs: bool
  max_tokens: int
  temperature: float
  top_p: float
  #TODO: Get more options from metagen
 TokenUsage:
  input_tokens: int
  output_tokens: int
  total_tokens: int
 == Callsite ==
 callsite:
  chat_completion/
 request_type:
  post
 description:
  submit a chat completion request
 request:
  messages: List[Message]
  model: str
  options: Options
  n_complections: int
  # TODO: how to handle tooling control if any ?
  # Add `tools` and `tool_choice` --
  # for eg. "auto": use model's guess
  # how to force to use particular tool
  # how to disbale inbuilt tools
  # tools: List[Tool]
  # tool_choice: Any
 response:
  id: str
  candidates: List[Completion]  # a list to account for when n_completions > 1
  model_called: str  # info on that model that produced this result
  usage: TokenUsage
 # TODO
 # callsite:
 #   chat_completion_stream/