Make all methods async def again; add completion() for meta-reference (#270)

PR #201 had made several changes while trying to fix issues with getting the stream=False branches of inference and agents API working. As part of this, it made a change which was slightly gratuitous. Namely, making chat_completion() and brethren "def" instead of "async def". The rationale was that this allowed the user (within llama-stack) of this to use it as: ``` async for chunk in api.chat_completion(params) ``` However, it causes unnecessary confusion for several folks. Given that clients (e.g., llama-stack-apps) anyway use the SDK methods (which are completely isolated) this choice was not ideal. Let's revert back so the call now looks like: ``` async for chunk in await api.chat_completion(params) ``` Bonus: Added a completion() implementation for the meta-reference provider. Technically should have been another PR :)
2025-06-27 18:50:41 +00:00 · 2024-10-18 20:50:59 -07:00 · 2024-10-18 20:50:59 -07:00 · 2089427d60
commit 2089427d60
parent 95a96afe34
23 changed files with 330 additions and 213 deletions
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -21,7 +21,7 @@
    "info": {
        "title": "[DRAFT] Llama Stack Specification",
        "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-10 15:29:56.831109"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-18 20:48:17.730988"
    },
    "servers": [
        {
@ -2830,8 +2830,11 @@
            "CompletionResponse": {
                "type": "object",
                "properties": {
-                    "completion_message": {
-                        "$ref": "#/components/schemas/CompletionMessage"
+                    "content": {
+                        "type": "string"
+                    },
+                    "stop_reason": {
+                        "$ref": "#/components/schemas/StopReason"
                    },
                    "logprobs": {
                        "type": "array",
@ -2842,7 +2845,8 @@
                },
                "additionalProperties": false,
                "required": [
-                    "completion_message"
+                    "content",
+                    "stop_reason"
                ],
                "title": "Completion response."
            },
@ -6075,49 +6079,49 @@
    ],
    "tags": [
        {
-            "name": "Evaluations"
-        },
-        {
-            "name": "Inspect"
+            "name": "Models"
        },
        {
            "name": "RewardScoring"
        },
        {
-            "name": "Datasets"
-        },
-        {
-            "name": "Models"
-        },
-        {
-            "name": "Telemetry"
-        },
-        {
-            "name": "PostTraining"
-        },
-        {
-            "name": "SyntheticDataGeneration"
-        },
-        {
-            "name": "BatchInference"
-        },
-        {
-            "name": "Inference"
-        },
-        {
-            "name": "Agents"
-        },
-        {
-            "name": "Memory"
-        },
-        {
-            "name": "Safety"
+            "name": "MemoryBanks"
        },
        {
            "name": "Shields"
        },
        {
-            "name": "MemoryBanks"
+            "name": "SyntheticDataGeneration"
+        },
+        {
+            "name": "Inference"
+        },
+        {
+            "name": "Inspect"
+        },
+        {
+            "name": "BatchInference"
+        },
+        {
+            "name": "Memory"
+        },
+        {
+            "name": "Datasets"
+        },
+        {
+            "name": "Agents"
+        },
+        {
+            "name": "PostTraining"
+        },
+        {
+            "name": "Telemetry"
+        },
+        {
+            "name": "Safety"
+        },
+        {
+            "name": "Evaluations"
        },
        {
            "name": "BuiltinTool",
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -501,14 +501,17 @@ components:
    CompletionResponse:
      additionalProperties: false
      properties:
-        completion_message:
-          $ref: '#/components/schemas/CompletionMessage'
+        content:
+          type: string
        logprobs:
          items:
            $ref: '#/components/schemas/TokenLogProbs'
          type: array
+        stop_reason:
+          $ref: '#/components/schemas/StopReason'
      required:
-      - completion_message
+      - content
+      - stop_reason
      title: Completion response.
      type: object
    CompletionResponseStreamChunk:
@ -2507,7 +2510,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-10-10 15:29:56.831109"
+    \ draft and subject to change.\n                Generated at 2024-10-18 20:48:17.730988"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -3712,21 +3715,21 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
- name: Evaluations
- name: Inspect
- name: RewardScoring
- name: Datasets
 - name: Models
- name: Telemetry
- name: PostTraining
- name: SyntheticDataGeneration
- name: BatchInference
- name: Inference
- name: Agents
- name: Memory
- name: Safety
- name: Shields
+- name: RewardScoring
 - name: MemoryBanks
+- name: Shields
+- name: SyntheticDataGeneration
+- name: Inference
+- name: Inspect
+- name: BatchInference
+- name: Memory
+- name: Datasets
+- name: Agents
+- name: PostTraining
+- name: Telemetry
+- name: Safety
+- name: Evaluations
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
  name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"