add model update and delete

This commit is contained in:
Dinesh Yeduguru 2024-11-13 15:30:17 -08:00
parent 4253cfcd7f
commit 4b1b196251
6 changed files with 356 additions and 49 deletions

View file

@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-13 11:02:50.081698"
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-13 15:29:27.077633"
},
"servers": [
{
@ -429,6 +429,39 @@
}
}
},
"/models/delete": {
"post": {
"responses": {
"200": {
"description": "OK"
}
},
"tags": [
"Models"
],
"parameters": [
{
"name": "X-LlamaStack-ProviderData",
"in": "header",
"description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
"required": false,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DeleteModelRequest"
}
}
},
"required": true
}
}
},
"/inference/embeddings": {
"post": {
"responses": {
@ -2225,6 +2258,46 @@
"required": true
}
}
},
"/models/update": {
"post": {
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Model"
}
}
}
}
},
"tags": [
"Models"
],
"parameters": [
{
"name": "X-LlamaStack-ProviderData",
"in": "header",
"description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
"required": false,
"schema": {
"type": "string"
}
}
],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/UpdateModelRequest"
}
}
},
"required": true
}
}
}
},
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -4549,6 +4622,18 @@
"session_id"
]
},
"DeleteModelRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string"
}
},
"additionalProperties": false,
"required": [
"model_id"
]
},
"EmbeddingsRequest": {
"type": "object",
"properties": {
@ -7826,6 +7911,49 @@
"synthetic_data"
],
"title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
},
"UpdateModelRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string"
},
"provider_model_id": {
"type": "string"
},
"provider_id": {
"type": "string"
},
"metadata": {
"type": "object",
"additionalProperties": {
"oneOf": [
{
"type": "null"
},
{
"type": "boolean"
},
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array"
},
{
"type": "object"
}
]
}
}
},
"additionalProperties": false,
"required": [
"model_id"
]
}
},
"responses": {}
@ -7837,53 +7965,53 @@
],
"tags": [
{
"name": "Inspect"
},
{
"name": "Models"
},
{
"name": "Eval"
},
{
"name": "EvalTasks"
},
{
"name": "Scoring"
"name": "Datasets"
},
{
"name": "Inference"
},
{
"name": "Memory"
},
{
"name": "Safety"
},
{
"name": "PostTraining"
},
{
"name": "ScoringFunctions"
},
{
"name": "Telemetry"
},
{
"name": "Shields"
},
{
"name": "BatchInference"
},
{
"name": "MemoryBanks"
},
{
"name": "Datasets"
"name": "Telemetry"
},
{
"name": "PostTraining"
},
{
"name": "Models"
},
{
"name": "Inspect"
},
{
"name": "Safety"
},
{
"name": "Scoring"
},
{
"name": "BatchInference"
},
{
"name": "Eval"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "EvalTasks"
},
{
"name": "Shields"
},
{
"name": "Memory"
},
{
"name": "DatasetIO"
},
@ -8142,6 +8270,10 @@
"name": "DeleteAgentsSessionRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteAgentsSessionRequest\" />"
},
{
"name": "DeleteModelRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteModelRequest\" />"
},
{
"name": "EmbeddingsRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/EmbeddingsRequest\" />"
@ -8453,6 +8585,10 @@
{
"name": "SyntheticDataGenerationResponse",
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
},
{
"name": "UpdateModelRequest",
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/UpdateModelRequest\" />"
}
],
"x-tagGroups": [
@ -8521,6 +8657,7 @@
"Dataset",
"DeleteAgentsRequest",
"DeleteAgentsSessionRequest",
"DeleteModelRequest",
"DoraFinetuningConfig",
"EmbeddingsRequest",
"EmbeddingsResponse",
@ -8618,6 +8755,7 @@
"Turn",
"URL",
"UnstructuredLogEvent",
"UpdateModelRequest",
"UserMessage",
"VectorMemoryBank",
"VectorMemoryBankParams",

View file

@ -867,6 +867,14 @@ components:
- agent_id
- session_id
type: object
DeleteModelRequest:
additionalProperties: false
properties:
model_id:
type: string
required:
- model_id
type: object
DoraFinetuningConfig:
additionalProperties: false
properties:
@ -3272,6 +3280,28 @@ components:
- message
- severity
type: object
UpdateModelRequest:
additionalProperties: false
properties:
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
model_id:
type: string
provider_id:
type: string
provider_model_id:
type: string
required:
- model_id
type: object
UserMessage:
additionalProperties: false
properties:
@ -3384,7 +3414,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-11-13 11:02:50.081698"
\ draft and subject to change.\n Generated at 2024-11-13 15:29:27.077633"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -4186,6 +4216,27 @@ paths:
responses: {}
tags:
- MemoryBanks
/models/delete:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/DeleteModelRequest'
required: true
responses:
'200':
description: OK
tags:
- Models
/models/get:
get:
parameters:
@ -4256,6 +4307,31 @@ paths:
description: OK
tags:
- Models
/models/update:
post:
parameters:
- description: JSON-encoded provider data which will be made available to the
adapter servicing the API
in: header
name: X-LlamaStack-ProviderData
required: false
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/UpdateModelRequest'
required: true
responses:
'200':
content:
application/json:
schema:
$ref: '#/components/schemas/Model'
description: OK
tags:
- Models
/post_training/job/artifacts:
get:
parameters:
@ -4748,22 +4824,22 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
- name: Inspect
- name: Models
- name: Eval
- name: EvalTasks
- name: Scoring
- name: Inference
- name: Memory
- name: Safety
- name: PostTraining
- name: ScoringFunctions
- name: Telemetry
- name: Shields
- name: BatchInference
- name: MemoryBanks
- name: Datasets
- name: Inference
- name: ScoringFunctions
- name: MemoryBanks
- name: Telemetry
- name: PostTraining
- name: Models
- name: Inspect
- name: Safety
- name: Scoring
- name: BatchInference
- name: Eval
- name: SyntheticDataGeneration
- name: EvalTasks
- name: Shields
- name: Memory
- name: DatasetIO
- name: Agents
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
@ -4964,6 +5040,9 @@ tags:
- description: <SchemaDefinition schemaRef="#/components/schemas/DeleteAgentsSessionRequest"
/>
name: DeleteAgentsSessionRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/DeleteModelRequest"
/>
name: DeleteModelRequest
- description: <SchemaDefinition schemaRef="#/components/schemas/EmbeddingsRequest"
/>
name: EmbeddingsRequest
@ -5194,6 +5273,9 @@ tags:
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
/>'
name: SyntheticDataGenerationResponse
- description: <SchemaDefinition schemaRef="#/components/schemas/UpdateModelRequest"
/>
name: UpdateModelRequest
x-tagGroups:
- name: Operations
tags:
@ -5256,6 +5338,7 @@ x-tagGroups:
- Dataset
- DeleteAgentsRequest
- DeleteAgentsSessionRequest
- DeleteModelRequest
- DoraFinetuningConfig
- EmbeddingsRequest
- EmbeddingsResponse
@ -5353,6 +5436,7 @@ x-tagGroups:
- Turn
- URL
- UnstructuredLogEvent
- UpdateModelRequest
- UserMessage
- VectorMemoryBank
- VectorMemoryBankParams

View file

@ -7,7 +7,7 @@
import asyncio
import json
from typing import List, Optional
from typing import Any, Dict, List, Optional
import fire
import httpx
@ -61,6 +61,36 @@ class ModelsClient(Models):
return None
return Model(**j)
async def update_model(
self,
model_id: str,
provider_model_id: Optional[str] = None,
provider_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> Model:
async with httpx.AsyncClient() as client:
response = await client.put(
f"{self.base_url}/models/update",
json={
"model_id": model_id,
"provider_model_id": provider_model_id,
"provider_id": provider_id,
"metadata": metadata,
},
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
return Model(**response.json())
async def delete_model(self, model_id: str) -> None:
async with httpx.AsyncClient() as client:
response = await client.delete(
f"{self.base_url}/models/delete",
params={"model_id": model_id},
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
async def run_main(host: str, port: int, stream: bool):
client = ModelsClient(f"http://{host}:{port}")

View file

@ -54,3 +54,15 @@ class Models(Protocol):
provider_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> Model: ...
@webmethod(route="/models/update", method="PUT")
async def update_model(
self,
model_id: str,
provider_model_id: Optional[str] = None,
provider_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> Model: ...
@webmethod(route="/models/delete", method="DELETE")
async def delete_model(self, model_id: str) -> None: ...

View file

@ -152,6 +152,10 @@ class CommonRoutingTableImpl(RoutingTable):
assert len(objects) == 1
return objects[0]
async def delete_object(self, obj: RoutableObjectWithProvider) -> None:
await self.dist_registry.delete(obj.type, obj.identifier)
# TODO: delete from provider
async def register_object(
self, obj: RoutableObjectWithProvider
) -> RoutableObjectWithProvider:
@ -225,6 +229,33 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
registered_model = await self.register_object(model)
return registered_model
async def update_model(
self,
model_id: str,
provider_model_id: Optional[str] = None,
provider_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> Model:
existing_model = await self.get_model(model_id)
if existing_model is None:
raise ValueError(f"Model {model_id} not found")
updated_model = Model(
identifier=model_id,
provider_resource_id=provider_model_id
or existing_model.provider_resource_id,
provider_id=provider_id or existing_model.provider_id,
metadata=metadata or existing_model.metadata,
)
registered_model = await self.register_object(updated_model)
return registered_model
async def delete_model(self, model_id: str) -> None:
existing_model = await self.get_model(model_id)
if existing_model is None:
raise ValueError(f"Model {model_id} not found")
await self.delete_object(existing_model)
class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
async def list_shields(self) -> List[Shield]:

View file

@ -36,6 +36,8 @@ class DistributionRegistry(Protocol):
# The current approach could lead to inconsistencies if the same logical object has different data across providers.
async def register(self, obj: RoutableObjectWithProvider) -> bool: ...
async def delete(self, type: str, identifier: str) -> None: ...
REGISTER_PREFIX = "distributions:registry"
KEY_VERSION = "v1"
@ -120,6 +122,9 @@ class DiskDistributionRegistry(DistributionRegistry):
)
return True
async def delete(self, type: str, identifier: str) -> None:
await self.kvstore.delete(KEY_FORMAT.format(type=type, identifier=identifier))
class CachedDiskDistributionRegistry(DiskDistributionRegistry):
def __init__(self, kvstore: KVStore):
@ -206,6 +211,13 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
return success
async def delete(self, type: str, identifier: str) -> None:
await super().delete(type, identifier)
cache_key = (type, identifier)
async with self._locked_cache() as cache:
if cache_key in cache:
del cache[cache_key]
async def create_dist_registry(
metadata_store: Optional[KVStoreConfig],