full evals / full scoring flow

2025-12-12 20:12:33 +00:00 · 2024-10-15 10:17:45 -07:00 · 2024-10-15 10:17:45 -07:00 · be4f395032
commit be4f395032
parent cccd5be090
4 changed files with 88 additions and 109 deletions
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -21,7 +21,7 @@
    "info": {
        "title": "[DRAFT] Llama Stack Specification",
        "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-15 00:44:26.278642"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-10-15 10:15:15.195382"
    },
    "servers": [
        {
@ -5805,23 +5805,13 @@
            "RunEvalTaskRequest": {
                "type": "object",
                "properties": {
-                    "model": {
-                        "type": "string"
-                    },
-                    "task": {
-                        "type": "string"
-                    },
-                    "dataset": {
-                        "type": "string"
-                    },
                    "eval_task_config": {
                        "$ref": "#/components/schemas/EvaluateTaskConfig"
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "model",
-                    "task"
+                    "eval_task_config"
                ]
            },
            "EvalResult": {
@ -6238,49 +6228,49 @@
    ],
    "tags": [
        {
-            "name": "Models"
-        },
-        {
-            "name": "BatchInference"
-        },
-        {
-            "name": "Inspect"
-        },
-        {
-            "name": "Evals"
-        },
-        {
-            "name": "Safety"
-        },
-        {
-            "name": "Shields"
-        },
-        {
-            "name": "Telemetry"
-        },
-        {
-            "name": "Agents"
-        },
-        {
-            "name": "Memory"
-        },
-        {
-            "name": "SyntheticDataGeneration"
+            "name": "Inference"
        },
        {
            "name": "PostTraining"
        },
        {
-            "name": "Datasets"
+            "name": "Agents"
        },
        {
            "name": "MemoryBanks"
        },
+        {
+            "name": "Inspect"
+        },
+        {
+            "name": "Models"
+        },
+        {
+            "name": "Safety"
+        },
+        {
+            "name": "Evals"
+        },
+        {
+            "name": "BatchInference"
+        },
+        {
+            "name": "Shields"
+        },
+        {
+            "name": "SyntheticDataGeneration"
+        },
+        {
+            "name": "Telemetry"
+        },
        {
            "name": "RewardScoring"
        },
        {
-            "name": "Inference"
+            "name": "Datasets"
+        },
+        {
+            "name": "Memory"
        },
        {
            "name": "BuiltinTool",
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -1785,17 +1785,10 @@ components:
    RunEvalTaskRequest:
      additionalProperties: false
      properties:
-        dataset:
-          type: string
        eval_task_config:
          $ref: '#/components/schemas/EvaluateTaskConfig'
-        model:
-          type: string
-        task:
-          type: string
      required:
-      - model
-      - task
+      - eval_task_config
      type: object
    RunScorerRequest:
      additionalProperties: false
@ -2686,7 +2679,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-10-15 00:44:26.278642"
+    \ draft and subject to change.\n                Generated at 2024-10-15 10:15:15.195382"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -3787,21 +3780,21 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
- name: Models
- name: BatchInference
- name: Inspect
- name: Evals
- name: Safety
- name: Shields
- name: Telemetry
- name: Agents
- name: Memory
- name: SyntheticDataGeneration
- name: PostTraining
- name: Datasets
- name: MemoryBanks
- name: RewardScoring
 - name: Inference
+- name: PostTraining
+- name: Agents
+- name: MemoryBanks
+- name: Inspect
+- name: Models
+- name: Safety
+- name: Evals
+- name: BatchInference
+- name: Shields
+- name: SyntheticDataGeneration
+- name: Telemetry
+- name: RewardScoring
+- name: Datasets
+- name: Memory
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
  name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"