Support model resource updates and deletes (#452)

# What does this PR do? * Changes the registry to store only one RoutableObject per identifier. Before it was a list, which is not really required. * Adds impl for updates and deletes * Updates routing table to handle updates correctly ## Test Plan ``` ❯ llama-stack-client models list +------------------------+---------------+------------------------------------+------------+ | identifier | provider_id | provider_resource_id | metadata | +========================+===============+====================================+============+ | Llama3.1-405B-Instruct | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.1-8B-Instruct | fireworks-0 | fireworks/llama-v3p1-8b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.2-3B-Instruct | fireworks-0 | fireworks/llama-v3p2-1b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ ❯ llama-stack-client models register dineshyv-model --provider-model-id=fireworks/llama-v3p1-70b-instruct Successfully registered model dineshyv-model ❯ llama-stack-client models list +------------------------+---------------+------------------------------------+------------+ | identifier | provider_id | provider_resource_id | metadata | +========================+===============+====================================+============+ | Llama3.1-405B-Instruct | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.1-8B-Instruct | fireworks-0 | fireworks/llama-v3p1-8b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.2-3B-Instruct | fireworks-0 | fireworks/llama-v3p2-1b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | dineshyv-model | fireworks-0 | fireworks/llama-v3p1-70b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ ❯ llama-stack-client models update dineshyv-model --provider-model-id=fireworks/llama-v3p1-405b-instruct Successfully updated model dineshyv-model ❯ llama-stack-client models list +------------------------+---------------+------------------------------------+------------+ | identifier | provider_id | provider_resource_id | metadata | +========================+===============+====================================+============+ | Llama3.1-405B-Instruct | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.1-8B-Instruct | fireworks-0 | fireworks/llama-v3p1-8b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.2-3B-Instruct | fireworks-0 | fireworks/llama-v3p2-1b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | dineshyv-model | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ llama-stack-client models delete dineshyv-model ❯ llama-stack-client models list +------------------------+---------------+------------------------------------+------------+ | identifier | provider_id | provider_resource_id | metadata | +========================+===============+====================================+============+ | Llama3.1-405B-Instruct | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.1-8B-Instruct | fireworks-0 | fireworks/llama-v3p1-8b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.2-3B-Instruct | fireworks-0 | fireworks/llama-v3p2-1b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ ``` --------- Co-authored-by: Dinesh Yeduguru <dineshyv@fb.com>
2025-06-27 18:50:41 +00:00 · 2024-11-13 21:55:41 -08:00 · 2024-11-13 21:55:41 -08:00 · efe791bab7
commit efe791bab7
parent 4253cfcd7f
7 changed files with 447 additions and 129 deletions
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -21,7 +21,7 @@
    "info": {
        "title": "[DRAFT] Llama Stack Specification",
        "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-11-13 11:02:50.081698"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-11-13 21:05:58.323310"
    },
    "servers": [
        {
@ -429,6 +429,39 @@
                }
            }
        },
+        "/models/delete": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK"
+                    }
+                },
+                "tags": [
+                    "Models"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/DeleteModelRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
        "/inference/embeddings": {
            "post": {
                "responses": {
@ -2225,6 +2258,46 @@
                    "required": true
                }
            }
+        },
+        "/models/update": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/Model"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "Models"
+                ],
+                "parameters": [
+                    {
+                        "name": "X-LlamaStack-ProviderData",
+                        "in": "header",
+                        "description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
+                        "required": false,
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                ],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/UpdateModelRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
        }
    },
    "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -4549,6 +4622,18 @@
                    "session_id"
                ]
            },
+            "DeleteModelRequest": {
+                "type": "object",
+                "properties": {
+                    "model_id": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model_id"
+                ]
+            },
            "EmbeddingsRequest": {
                "type": "object",
                "properties": {
@ -7826,6 +7911,49 @@
                    "synthetic_data"
                ],
                "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
+            },
+            "UpdateModelRequest": {
+                "type": "object",
+                "properties": {
+                    "model_id": {
+                        "type": "string"
+                    },
+                    "provider_model_id": {
+                        "type": "string"
+                    },
+                    "provider_id": {
+                        "type": "string"
+                    },
+                    "metadata": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model_id"
+                ]
            }
        },
        "responses": {}
@ -7837,23 +7965,20 @@
    ],
    "tags": [
        {
-            "name": "Inspect"
+            "name": "Agents"
+        },
+        {
+            "name": "DatasetIO"
        },
        {
            "name": "Models"
        },
-        {
-            "name": "Eval"
-        },
-        {
-            "name": "EvalTasks"
-        },
-        {
-            "name": "Scoring"
-        },
        {
            "name": "Inference"
        },
+        {
+            "name": "BatchInference"
+        },
        {
            "name": "Memory"
        },
@ -7861,35 +7986,38 @@
            "name": "Safety"
        },
        {
-            "name": "PostTraining"
+            "name": "Inspect"
        },
        {
-            "name": "ScoringFunctions"
+            "name": "EvalTasks"
        },
        {
-            "name": "Telemetry"
-        },
-        {
-            "name": "Shields"
-        },
-        {
-            "name": "BatchInference"
-        },
-        {
-            "name": "MemoryBanks"
+            "name": "Scoring"
        },
        {
            "name": "Datasets"
        },
+        {
+            "name": "PostTraining"
+        },
+        {
+            "name": "Eval"
+        },
+        {
+            "name": "Shields"
+        },
+        {
+            "name": "Telemetry"
+        },
+        {
+            "name": "ScoringFunctions"
+        },
+        {
+            "name": "MemoryBanks"
+        },
        {
            "name": "SyntheticDataGeneration"
        },
-        {
-            "name": "DatasetIO"
-        },
-        {
-            "name": "Agents"
-        },
        {
            "name": "BuiltinTool",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
@ -8142,6 +8270,10 @@
            "name": "DeleteAgentsSessionRequest",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteAgentsSessionRequest\" />"
        },
+        {
+            "name": "DeleteModelRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteModelRequest\" />"
+        },
        {
            "name": "EmbeddingsRequest",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/EmbeddingsRequest\" />"
@ -8453,6 +8585,10 @@
        {
            "name": "SyntheticDataGenerationResponse",
            "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
+        },
+        {
+            "name": "UpdateModelRequest",
+            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/UpdateModelRequest\" />"
        }
    ],
    "x-tagGroups": [
@ -8521,6 +8657,7 @@
                "Dataset",
                "DeleteAgentsRequest",
                "DeleteAgentsSessionRequest",
+                "DeleteModelRequest",
                "DoraFinetuningConfig",
                "EmbeddingsRequest",
                "EmbeddingsResponse",
@ -8618,6 +8755,7 @@
                "Turn",
                "URL",
                "UnstructuredLogEvent",
+                "UpdateModelRequest",
                "UserMessage",
                "VectorMemoryBank",
                "VectorMemoryBankParams",
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -867,6 +867,14 @@ components:
      - agent_id
      - session_id
      type: object
+    DeleteModelRequest:
+      additionalProperties: false
+      properties:
+        model_id:
+          type: string
+      required:
+      - model_id
+      type: object
    DoraFinetuningConfig:
      additionalProperties: false
      properties:
@ -3272,6 +3280,28 @@ components:
      - message
      - severity
      type: object
+    UpdateModelRequest:
+      additionalProperties: false
+      properties:
+        metadata:
+          additionalProperties:
+            oneOf:
+            - type: 'null'
+            - type: boolean
+            - type: number
+            - type: string
+            - type: array
+            - type: object
+          type: object
+        model_id:
+          type: string
+        provider_id:
+          type: string
+        provider_model_id:
+          type: string
+      required:
+      - model_id
+      type: object
    UserMessage:
      additionalProperties: false
      properties:
@ -3384,7 +3414,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-11-13 11:02:50.081698"
+    \ draft and subject to change.\n                Generated at 2024-11-13 21:05:58.323310"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -4186,6 +4216,27 @@ paths:
      responses: {}
      tags:
      - MemoryBanks
+  /models/delete:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/DeleteModelRequest'
+        required: true
+      responses:
+        '200':
+          description: OK
+      tags:
+      - Models
  /models/get:
    get:
      parameters:
@ -4256,6 +4307,31 @@ paths:
          description: OK
      tags:
      - Models
+  /models/update:
+    post:
+      parameters:
+      - description: JSON-encoded provider data which will be made available to the
+          adapter servicing the API
+        in: header
+        name: X-LlamaStack-ProviderData
+        required: false
+        schema:
+          type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/UpdateModelRequest'
+        required: true
+      responses:
+        '200':
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Model'
+          description: OK
+      tags:
+      - Models
  /post_training/job/artifacts:
    get:
      parameters:
@ -4748,24 +4824,24 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
- name: Inspect
+- name: Agents
+- name: DatasetIO
 - name: Models
- name: Eval
- name: EvalTasks
- name: Scoring
 - name: Inference
+- name: BatchInference
 - name: Memory
 - name: Safety
- name: PostTraining
- name: ScoringFunctions
- name: Telemetry
- name: Shields
- name: BatchInference
- name: MemoryBanks
+- name: Inspect
+- name: EvalTasks
+- name: Scoring
 - name: Datasets
+- name: PostTraining
+- name: Eval
+- name: Shields
+- name: Telemetry
+- name: ScoringFunctions
+- name: MemoryBanks
 - name: SyntheticDataGeneration
- name: DatasetIO
- name: Agents
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
  name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
@ -4964,6 +5040,9 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/DeleteAgentsSessionRequest"
    />
  name: DeleteAgentsSessionRequest
+- description: <SchemaDefinition schemaRef="#/components/schemas/DeleteModelRequest"
+    />
+  name: DeleteModelRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/EmbeddingsRequest"
    />
  name: EmbeddingsRequest
@ -5194,6 +5273,9 @@ tags:
    <SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
    />'
  name: SyntheticDataGenerationResponse
+- description: <SchemaDefinition schemaRef="#/components/schemas/UpdateModelRequest"
+    />
+  name: UpdateModelRequest
 x-tagGroups:
 - name: Operations
  tags:
@ -5256,6 +5338,7 @@ x-tagGroups:
  - Dataset
  - DeleteAgentsRequest
  - DeleteAgentsSessionRequest
+  - DeleteModelRequest
  - DoraFinetuningConfig
  - EmbeddingsRequest
  - EmbeddingsResponse
@ -5353,6 +5436,7 @@ x-tagGroups:
  - Turn
  - URL
  - UnstructuredLogEvent
+  - UpdateModelRequest
  - UserMessage
  - VectorMemoryBank
  - VectorMemoryBankParams