updates

2025-12-31 05:20:00 +00:00 · 2025-04-11 16:15:59 -07:00 · 2025-04-11 16:15:59 -07:00 · 73d927850e
commit 73d927850e
parent 0cfb2e2473
4 changed files with 43 additions and 316 deletions
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -69,35 +69,6 @@ paths:
            schema:
              $ref: '#/components/schemas/BatchChatCompletionRequest'
        required: true
-  /v1/batch-inference/chat-completion-inline:
-    post:
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchChatCompletionResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - BatchInference (Coming Soon)
-      description: ''
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchChatCompletionInlineRequest'
-        required: true
  /v1/inference/batch-completion:
    post:
      responses:
@ -127,35 +98,6 @@ paths:
            schema:
              $ref: '#/components/schemas/BatchCompletionRequest'
        required: true
-  /v1/batch-inference/completion-inline:
-    post:
-      responses:
-        '200':
-          description: OK
-          content:
-            application/json:
-              schema:
-                $ref: '#/components/schemas/BatchCompletionResponse'
-        '400':
-          $ref: '#/components/responses/BadRequest400'
-        '429':
-          $ref: >-
-            #/components/responses/TooManyRequests429
-        '500':
-          $ref: >-
-            #/components/responses/InternalServerError500
-        default:
-          $ref: '#/components/responses/DefaultError'
-      tags:
-        - BatchInference (Coming Soon)
-      description: ''
-      parameters: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              $ref: '#/components/schemas/BatchCompletionInlineRequest'
-        required: true
  /v1/post-training/job/cancel:
    post:
      responses:
@ -206,7 +148,7 @@ paths:
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
-        - Inference
+        - BatchInference (Coming Soon)
      description: >-
        Generate a chat completion for the given messages using the specified model.
      parameters: []
@ -241,7 +183,7 @@ paths:
        default:
          $ref: '#/components/responses/DefaultError'
      tags:
-        - Inference
+        - BatchInference (Coming Soon)
      description: >-
        Generate a completion for the given content using the specified model.
      parameters: []
@ -3346,42 +3288,6 @@ components:
        - logprobs_by_token
      title: TokenLogProbs
      description: Log probabilities for generated tokens.
-    BatchChatCompletionInlineRequest:
-      type: object
-      properties:
-        model:
-          type: string
-        messages_batch:
-          type: array
-          items:
-            type: array
-            items:
-              $ref: '#/components/schemas/Message'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-        tools:
-          type: array
-          items:
-            $ref: '#/components/schemas/ToolDefinition'
-        tool_config:
-          $ref: '#/components/schemas/ToolConfig'
-        response_format:
-          $ref: '#/components/schemas/ResponseFormat'
-        logprobs:
-          type: object
-          properties:
-            top_k:
-              type: integer
-              default: 0
-              description: >-
-                How many tokens (for each position) to return log probabilities for.
-          additionalProperties: false
-          title: LogProbConfig
-      additionalProperties: false
-      required:
-        - model
-        - messages_batch
-      title: BatchChatCompletionInlineRequest
    BatchCompletionRequest:
      type: object
      properties:
@ -3450,34 +3356,6 @@ components:
        - stop_reason
      title: CompletionResponse
      description: Response from a completion request.
-    BatchCompletionInlineRequest:
-      type: object
-      properties:
-        model:
-          type: string
-        content_batch:
-          type: array
-          items:
-            $ref: '#/components/schemas/InterleavedContent'
-        sampling_params:
-          $ref: '#/components/schemas/SamplingParams'
-        response_format:
-          $ref: '#/components/schemas/ResponseFormat'
-        logprobs:
-          type: object
-          properties:
-            top_k:
-              type: integer
-              default: 0
-              description: >-
-                How many tokens (for each position) to return log probabilities for.
-          additionalProperties: false
-          title: LogProbConfig
-      additionalProperties: false
-      required:
-        - model
-        - content_batch
-      title: BatchCompletionInlineRequest
    CancelTrainingJobRequest:
      type: object
      properties:
@ -7737,6 +7615,17 @@ tags:
    x-displayName: >-
      Agents API for creating and interacting with agentic systems.
  - name: BatchInference (Coming Soon)
+    description: >-
+      This is an asynchronous API. If the request is successful, the response will
+      be a job which can be polled for completion.
+
+
+      NOTE: This API is not yet implemented and is subject to change in concert with
+      other asynchronous APIs
+
+      including (post-training, evals, etc).
+    x-displayName: >-
+      Batch inference API for generating completions and chat completions.
  - name: Benchmarks
  - name: DatasetIO
  - name: Datasets