forked from phoenix-oss/llama-stack-mirror
Support model resource updates and deletes (#452)
# What does this PR do? * Changes the registry to store only one RoutableObject per identifier. Before it was a list, which is not really required. * Adds impl for updates and deletes * Updates routing table to handle updates correctly ## Test Plan ``` ❯ llama-stack-client models list +------------------------+---------------+------------------------------------+------------+ | identifier | provider_id | provider_resource_id | metadata | +========================+===============+====================================+============+ | Llama3.1-405B-Instruct | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.1-8B-Instruct | fireworks-0 | fireworks/llama-v3p1-8b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.2-3B-Instruct | fireworks-0 | fireworks/llama-v3p2-1b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ ❯ llama-stack-client models register dineshyv-model --provider-model-id=fireworks/llama-v3p1-70b-instruct Successfully registered model dineshyv-model ❯ llama-stack-client models list +------------------------+---------------+------------------------------------+------------+ | identifier | provider_id | provider_resource_id | metadata | +========================+===============+====================================+============+ | Llama3.1-405B-Instruct | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.1-8B-Instruct | fireworks-0 | fireworks/llama-v3p1-8b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.2-3B-Instruct | fireworks-0 | fireworks/llama-v3p2-1b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | dineshyv-model | fireworks-0 | fireworks/llama-v3p1-70b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ ❯ llama-stack-client models update dineshyv-model --provider-model-id=fireworks/llama-v3p1-405b-instruct Successfully updated model dineshyv-model ❯ llama-stack-client models list +------------------------+---------------+------------------------------------+------------+ | identifier | provider_id | provider_resource_id | metadata | +========================+===============+====================================+============+ | Llama3.1-405B-Instruct | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.1-8B-Instruct | fireworks-0 | fireworks/llama-v3p1-8b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.2-3B-Instruct | fireworks-0 | fireworks/llama-v3p2-1b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | dineshyv-model | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ llama-stack-client models delete dineshyv-model ❯ llama-stack-client models list +------------------------+---------------+------------------------------------+------------+ | identifier | provider_id | provider_resource_id | metadata | +========================+===============+====================================+============+ | Llama3.1-405B-Instruct | fireworks-0 | fireworks/llama-v3p1-405b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.1-8B-Instruct | fireworks-0 | fireworks/llama-v3p1-8b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ | Llama3.2-3B-Instruct | fireworks-0 | fireworks/llama-v3p2-1b-instruct | {} | +------------------------+---------------+------------------------------------+------------+ ``` --------- Co-authored-by: Dinesh Yeduguru <dineshyv@fb.com>
This commit is contained in:
parent
4253cfcd7f
commit
efe791bab7
7 changed files with 447 additions and 129 deletions
|
@ -21,7 +21,7 @@
|
||||||
"info": {
|
"info": {
|
||||||
"title": "[DRAFT] Llama Stack Specification",
|
"title": "[DRAFT] Llama Stack Specification",
|
||||||
"version": "0.0.1",
|
"version": "0.0.1",
|
||||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-13 11:02:50.081698"
|
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-13 21:05:58.323310"
|
||||||
},
|
},
|
||||||
"servers": [
|
"servers": [
|
||||||
{
|
{
|
||||||
|
@ -429,6 +429,39 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/models/delete": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Models"
|
||||||
|
],
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "X-LlamaStack-ProviderData",
|
||||||
|
"in": "header",
|
||||||
|
"description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/DeleteModelRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
"/inference/embeddings": {
|
"/inference/embeddings": {
|
||||||
"post": {
|
"post": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
@ -2225,6 +2258,46 @@
|
||||||
"required": true
|
"required": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"/models/update": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "OK",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Model"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Models"
|
||||||
|
],
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "X-LlamaStack-ProviderData",
|
||||||
|
"in": "header",
|
||||||
|
"description": "JSON-encoded provider data which will be made available to the adapter servicing the API",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/UpdateModelRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
|
"jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
|
||||||
|
@ -4549,6 +4622,18 @@
|
||||||
"session_id"
|
"session_id"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
"DeleteModelRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"model_id": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"model_id"
|
||||||
|
]
|
||||||
|
},
|
||||||
"EmbeddingsRequest": {
|
"EmbeddingsRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
@ -7826,6 +7911,49 @@
|
||||||
"synthetic_data"
|
"synthetic_data"
|
||||||
],
|
],
|
||||||
"title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
"title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
|
||||||
|
},
|
||||||
|
"UpdateModelRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"model_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"provider_model_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"provider_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"oneOf": [
|
||||||
|
{
|
||||||
|
"type": "null"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "number"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "array"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "object"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"model_id"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"responses": {}
|
"responses": {}
|
||||||
|
@ -7837,23 +7965,20 @@
|
||||||
],
|
],
|
||||||
"tags": [
|
"tags": [
|
||||||
{
|
{
|
||||||
"name": "Inspect"
|
"name": "Agents"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "DatasetIO"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Models"
|
"name": "Models"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "Eval"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "EvalTasks"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Scoring"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "Inference"
|
"name": "Inference"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "BatchInference"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Memory"
|
"name": "Memory"
|
||||||
},
|
},
|
||||||
|
@ -7861,35 +7986,38 @@
|
||||||
"name": "Safety"
|
"name": "Safety"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "PostTraining"
|
"name": "Inspect"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "ScoringFunctions"
|
"name": "EvalTasks"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Telemetry"
|
"name": "Scoring"
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Shields"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "BatchInference"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MemoryBanks"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "Datasets"
|
"name": "Datasets"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "PostTraining"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Eval"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Shields"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Telemetry"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "ScoringFunctions"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "MemoryBanks"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "SyntheticDataGeneration"
|
"name": "SyntheticDataGeneration"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "DatasetIO"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Agents"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "BuiltinTool",
|
"name": "BuiltinTool",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
|
||||||
|
@ -8142,6 +8270,10 @@
|
||||||
"name": "DeleteAgentsSessionRequest",
|
"name": "DeleteAgentsSessionRequest",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteAgentsSessionRequest\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteAgentsSessionRequest\" />"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "DeleteModelRequest",
|
||||||
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/DeleteModelRequest\" />"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "EmbeddingsRequest",
|
"name": "EmbeddingsRequest",
|
||||||
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/EmbeddingsRequest\" />"
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/EmbeddingsRequest\" />"
|
||||||
|
@ -8453,6 +8585,10 @@
|
||||||
{
|
{
|
||||||
"name": "SyntheticDataGenerationResponse",
|
"name": "SyntheticDataGenerationResponse",
|
||||||
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
|
"description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "UpdateModelRequest",
|
||||||
|
"description": "<SchemaDefinition schemaRef=\"#/components/schemas/UpdateModelRequest\" />"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"x-tagGroups": [
|
"x-tagGroups": [
|
||||||
|
@ -8521,6 +8657,7 @@
|
||||||
"Dataset",
|
"Dataset",
|
||||||
"DeleteAgentsRequest",
|
"DeleteAgentsRequest",
|
||||||
"DeleteAgentsSessionRequest",
|
"DeleteAgentsSessionRequest",
|
||||||
|
"DeleteModelRequest",
|
||||||
"DoraFinetuningConfig",
|
"DoraFinetuningConfig",
|
||||||
"EmbeddingsRequest",
|
"EmbeddingsRequest",
|
||||||
"EmbeddingsResponse",
|
"EmbeddingsResponse",
|
||||||
|
@ -8618,6 +8755,7 @@
|
||||||
"Turn",
|
"Turn",
|
||||||
"URL",
|
"URL",
|
||||||
"UnstructuredLogEvent",
|
"UnstructuredLogEvent",
|
||||||
|
"UpdateModelRequest",
|
||||||
"UserMessage",
|
"UserMessage",
|
||||||
"VectorMemoryBank",
|
"VectorMemoryBank",
|
||||||
"VectorMemoryBankParams",
|
"VectorMemoryBankParams",
|
||||||
|
|
|
@ -867,6 +867,14 @@ components:
|
||||||
- agent_id
|
- agent_id
|
||||||
- session_id
|
- session_id
|
||||||
type: object
|
type: object
|
||||||
|
DeleteModelRequest:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
model_id:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- model_id
|
||||||
|
type: object
|
||||||
DoraFinetuningConfig:
|
DoraFinetuningConfig:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -3272,6 +3280,28 @@ components:
|
||||||
- message
|
- message
|
||||||
- severity
|
- severity
|
||||||
type: object
|
type: object
|
||||||
|
UpdateModelRequest:
|
||||||
|
additionalProperties: false
|
||||||
|
properties:
|
||||||
|
metadata:
|
||||||
|
additionalProperties:
|
||||||
|
oneOf:
|
||||||
|
- type: 'null'
|
||||||
|
- type: boolean
|
||||||
|
- type: number
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
- type: object
|
||||||
|
type: object
|
||||||
|
model_id:
|
||||||
|
type: string
|
||||||
|
provider_id:
|
||||||
|
type: string
|
||||||
|
provider_model_id:
|
||||||
|
type: string
|
||||||
|
required:
|
||||||
|
- model_id
|
||||||
|
type: object
|
||||||
UserMessage:
|
UserMessage:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
properties:
|
properties:
|
||||||
|
@ -3384,7 +3414,7 @@ info:
|
||||||
description: "This is the specification of the llama stack that provides\n \
|
description: "This is the specification of the llama stack that provides\n \
|
||||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||||
\ to\n best leverage Llama Models. The specification is still in\
|
\ to\n best leverage Llama Models. The specification is still in\
|
||||||
\ draft and subject to change.\n Generated at 2024-11-13 11:02:50.081698"
|
\ draft and subject to change.\n Generated at 2024-11-13 21:05:58.323310"
|
||||||
title: '[DRAFT] Llama Stack Specification'
|
title: '[DRAFT] Llama Stack Specification'
|
||||||
version: 0.0.1
|
version: 0.0.1
|
||||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||||
|
@ -4186,6 +4216,27 @@ paths:
|
||||||
responses: {}
|
responses: {}
|
||||||
tags:
|
tags:
|
||||||
- MemoryBanks
|
- MemoryBanks
|
||||||
|
/models/delete:
|
||||||
|
post:
|
||||||
|
parameters:
|
||||||
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
adapter servicing the API
|
||||||
|
in: header
|
||||||
|
name: X-LlamaStack-ProviderData
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/DeleteModelRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- Models
|
||||||
/models/get:
|
/models/get:
|
||||||
get:
|
get:
|
||||||
parameters:
|
parameters:
|
||||||
|
@ -4256,6 +4307,31 @@ paths:
|
||||||
description: OK
|
description: OK
|
||||||
tags:
|
tags:
|
||||||
- Models
|
- Models
|
||||||
|
/models/update:
|
||||||
|
post:
|
||||||
|
parameters:
|
||||||
|
- description: JSON-encoded provider data which will be made available to the
|
||||||
|
adapter servicing the API
|
||||||
|
in: header
|
||||||
|
name: X-LlamaStack-ProviderData
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/UpdateModelRequest'
|
||||||
|
required: true
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Model'
|
||||||
|
description: OK
|
||||||
|
tags:
|
||||||
|
- Models
|
||||||
/post_training/job/artifacts:
|
/post_training/job/artifacts:
|
||||||
get:
|
get:
|
||||||
parameters:
|
parameters:
|
||||||
|
@ -4748,24 +4824,24 @@ security:
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
tags:
|
tags:
|
||||||
- name: Inspect
|
- name: Agents
|
||||||
|
- name: DatasetIO
|
||||||
- name: Models
|
- name: Models
|
||||||
- name: Eval
|
|
||||||
- name: EvalTasks
|
|
||||||
- name: Scoring
|
|
||||||
- name: Inference
|
- name: Inference
|
||||||
|
- name: BatchInference
|
||||||
- name: Memory
|
- name: Memory
|
||||||
- name: Safety
|
- name: Safety
|
||||||
- name: PostTraining
|
- name: Inspect
|
||||||
- name: ScoringFunctions
|
- name: EvalTasks
|
||||||
- name: Telemetry
|
- name: Scoring
|
||||||
- name: Shields
|
|
||||||
- name: BatchInference
|
|
||||||
- name: MemoryBanks
|
|
||||||
- name: Datasets
|
- name: Datasets
|
||||||
|
- name: PostTraining
|
||||||
|
- name: Eval
|
||||||
|
- name: Shields
|
||||||
|
- name: Telemetry
|
||||||
|
- name: ScoringFunctions
|
||||||
|
- name: MemoryBanks
|
||||||
- name: SyntheticDataGeneration
|
- name: SyntheticDataGeneration
|
||||||
- name: DatasetIO
|
|
||||||
- name: Agents
|
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
|
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
|
||||||
name: BuiltinTool
|
name: BuiltinTool
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
||||||
|
@ -4964,6 +5040,9 @@ tags:
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/DeleteAgentsSessionRequest"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/DeleteAgentsSessionRequest"
|
||||||
/>
|
/>
|
||||||
name: DeleteAgentsSessionRequest
|
name: DeleteAgentsSessionRequest
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/DeleteModelRequest"
|
||||||
|
/>
|
||||||
|
name: DeleteModelRequest
|
||||||
- description: <SchemaDefinition schemaRef="#/components/schemas/EmbeddingsRequest"
|
- description: <SchemaDefinition schemaRef="#/components/schemas/EmbeddingsRequest"
|
||||||
/>
|
/>
|
||||||
name: EmbeddingsRequest
|
name: EmbeddingsRequest
|
||||||
|
@ -5194,6 +5273,9 @@ tags:
|
||||||
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
|
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
|
||||||
/>'
|
/>'
|
||||||
name: SyntheticDataGenerationResponse
|
name: SyntheticDataGenerationResponse
|
||||||
|
- description: <SchemaDefinition schemaRef="#/components/schemas/UpdateModelRequest"
|
||||||
|
/>
|
||||||
|
name: UpdateModelRequest
|
||||||
x-tagGroups:
|
x-tagGroups:
|
||||||
- name: Operations
|
- name: Operations
|
||||||
tags:
|
tags:
|
||||||
|
@ -5256,6 +5338,7 @@ x-tagGroups:
|
||||||
- Dataset
|
- Dataset
|
||||||
- DeleteAgentsRequest
|
- DeleteAgentsRequest
|
||||||
- DeleteAgentsSessionRequest
|
- DeleteAgentsSessionRequest
|
||||||
|
- DeleteModelRequest
|
||||||
- DoraFinetuningConfig
|
- DoraFinetuningConfig
|
||||||
- EmbeddingsRequest
|
- EmbeddingsRequest
|
||||||
- EmbeddingsResponse
|
- EmbeddingsResponse
|
||||||
|
@ -5353,6 +5436,7 @@ x-tagGroups:
|
||||||
- Turn
|
- Turn
|
||||||
- URL
|
- URL
|
||||||
- UnstructuredLogEvent
|
- UnstructuredLogEvent
|
||||||
|
- UpdateModelRequest
|
||||||
- UserMessage
|
- UserMessage
|
||||||
- VectorMemoryBank
|
- VectorMemoryBank
|
||||||
- VectorMemoryBankParams
|
- VectorMemoryBankParams
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from typing import List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
import httpx
|
import httpx
|
||||||
|
@ -61,6 +61,36 @@ class ModelsClient(Models):
|
||||||
return None
|
return None
|
||||||
return Model(**j)
|
return Model(**j)
|
||||||
|
|
||||||
|
async def update_model(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
provider_model_id: Optional[str] = None,
|
||||||
|
provider_id: Optional[str] = None,
|
||||||
|
metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Model:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.put(
|
||||||
|
f"{self.base_url}/models/update",
|
||||||
|
json={
|
||||||
|
"model_id": model_id,
|
||||||
|
"provider_model_id": provider_model_id,
|
||||||
|
"provider_id": provider_id,
|
||||||
|
"metadata": metadata,
|
||||||
|
},
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return Model(**response.json())
|
||||||
|
|
||||||
|
async def delete_model(self, model_id: str) -> None:
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
response = await client.delete(
|
||||||
|
f"{self.base_url}/models/delete",
|
||||||
|
params={"model_id": model_id},
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
|
||||||
async def run_main(host: str, port: int, stream: bool):
|
async def run_main(host: str, port: int, stream: bool):
|
||||||
client = ModelsClient(f"http://{host}:{port}")
|
client = ModelsClient(f"http://{host}:{port}")
|
||||||
|
|
|
@ -54,3 +54,15 @@ class Models(Protocol):
|
||||||
provider_id: Optional[str] = None,
|
provider_id: Optional[str] = None,
|
||||||
metadata: Optional[Dict[str, Any]] = None,
|
metadata: Optional[Dict[str, Any]] = None,
|
||||||
) -> Model: ...
|
) -> Model: ...
|
||||||
|
|
||||||
|
@webmethod(route="/models/update", method="POST")
|
||||||
|
async def update_model(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
provider_model_id: Optional[str] = None,
|
||||||
|
provider_id: Optional[str] = None,
|
||||||
|
metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Model: ...
|
||||||
|
|
||||||
|
@webmethod(route="/models/delete", method="POST")
|
||||||
|
async def delete_model(self, model_id: str) -> None: ...
|
||||||
|
|
|
@ -124,8 +124,8 @@ class CommonRoutingTableImpl(RoutingTable):
|
||||||
apiname, objtype = apiname_object()
|
apiname, objtype = apiname_object()
|
||||||
|
|
||||||
# Get objects from disk registry
|
# Get objects from disk registry
|
||||||
objects = self.dist_registry.get_cached(objtype, routing_key)
|
obj = self.dist_registry.get_cached(objtype, routing_key)
|
||||||
if not objects:
|
if not obj:
|
||||||
provider_ids = list(self.impls_by_provider_id.keys())
|
provider_ids = list(self.impls_by_provider_id.keys())
|
||||||
if len(provider_ids) > 1:
|
if len(provider_ids) > 1:
|
||||||
provider_ids_str = f"any of the providers: {', '.join(provider_ids)}"
|
provider_ids_str = f"any of the providers: {', '.join(provider_ids)}"
|
||||||
|
@ -135,9 +135,8 @@ class CommonRoutingTableImpl(RoutingTable):
|
||||||
f"{objtype.capitalize()} `{routing_key}` not served by {provider_ids_str}. Make sure there is an {apiname} provider serving this {objtype}."
|
f"{objtype.capitalize()} `{routing_key}` not served by {provider_ids_str}. Make sure there is an {apiname} provider serving this {objtype}."
|
||||||
)
|
)
|
||||||
|
|
||||||
for obj in objects:
|
if not provider_id or provider_id == obj.provider_id:
|
||||||
if not provider_id or provider_id == obj.provider_id:
|
return self.impls_by_provider_id[obj.provider_id]
|
||||||
return self.impls_by_provider_id[obj.provider_id]
|
|
||||||
|
|
||||||
raise ValueError(f"Provider not found for `{routing_key}`")
|
raise ValueError(f"Provider not found for `{routing_key}`")
|
||||||
|
|
||||||
|
@ -145,26 +144,36 @@ class CommonRoutingTableImpl(RoutingTable):
|
||||||
self, type: str, identifier: str
|
self, type: str, identifier: str
|
||||||
) -> Optional[RoutableObjectWithProvider]:
|
) -> Optional[RoutableObjectWithProvider]:
|
||||||
# Get from disk registry
|
# Get from disk registry
|
||||||
objects = await self.dist_registry.get(type, identifier)
|
obj = await self.dist_registry.get(type, identifier)
|
||||||
if not objects:
|
if not obj:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
assert len(objects) == 1
|
return obj
|
||||||
return objects[0]
|
|
||||||
|
async def delete_object(self, obj: RoutableObjectWithProvider) -> None:
|
||||||
|
await self.dist_registry.delete(obj.type, obj.identifier)
|
||||||
|
# TODO: delete from provider
|
||||||
|
|
||||||
|
async def update_object(
|
||||||
|
self, obj: RoutableObjectWithProvider
|
||||||
|
) -> RoutableObjectWithProvider:
|
||||||
|
registered_obj = await register_object_with_provider(
|
||||||
|
obj, self.impls_by_provider_id[obj.provider_id]
|
||||||
|
)
|
||||||
|
return await self.dist_registry.update(registered_obj)
|
||||||
|
|
||||||
async def register_object(
|
async def register_object(
|
||||||
self, obj: RoutableObjectWithProvider
|
self, obj: RoutableObjectWithProvider
|
||||||
) -> RoutableObjectWithProvider:
|
) -> RoutableObjectWithProvider:
|
||||||
# Get existing objects from registry
|
# Get existing objects from registry
|
||||||
existing_objects = await self.dist_registry.get(obj.type, obj.identifier)
|
existing_obj = await self.dist_registry.get(obj.type, obj.identifier)
|
||||||
|
|
||||||
# Check for existing registration
|
# Check for existing registration
|
||||||
for existing_obj in existing_objects:
|
if existing_obj and existing_obj.provider_id == obj.provider_id:
|
||||||
if existing_obj.provider_id == obj.provider_id or not obj.provider_id:
|
print(
|
||||||
print(
|
f"`{obj.identifier}` already registered with `{existing_obj.provider_id}`"
|
||||||
f"`{obj.identifier}` already registered with `{existing_obj.provider_id}`"
|
)
|
||||||
)
|
return existing_obj
|
||||||
return existing_obj
|
|
||||||
|
|
||||||
# if provider_id is not specified, pick an arbitrary one from existing entries
|
# if provider_id is not specified, pick an arbitrary one from existing entries
|
||||||
if not obj.provider_id and len(self.impls_by_provider_id) > 0:
|
if not obj.provider_id and len(self.impls_by_provider_id) > 0:
|
||||||
|
@ -225,6 +234,33 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
registered_model = await self.register_object(model)
|
registered_model = await self.register_object(model)
|
||||||
return registered_model
|
return registered_model
|
||||||
|
|
||||||
|
async def update_model(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
provider_model_id: Optional[str] = None,
|
||||||
|
provider_id: Optional[str] = None,
|
||||||
|
metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> Model:
|
||||||
|
existing_model = await self.get_model(model_id)
|
||||||
|
if existing_model is None:
|
||||||
|
raise ValueError(f"Model {model_id} not found")
|
||||||
|
|
||||||
|
updated_model = Model(
|
||||||
|
identifier=model_id,
|
||||||
|
provider_resource_id=provider_model_id
|
||||||
|
or existing_model.provider_resource_id,
|
||||||
|
provider_id=provider_id or existing_model.provider_id,
|
||||||
|
metadata=metadata or existing_model.metadata,
|
||||||
|
)
|
||||||
|
registered_model = await self.update_object(updated_model)
|
||||||
|
return registered_model
|
||||||
|
|
||||||
|
async def delete_model(self, model_id: str) -> None:
|
||||||
|
existing_model = await self.get_model(model_id)
|
||||||
|
if existing_model is None:
|
||||||
|
raise ValueError(f"Model {model_id} not found")
|
||||||
|
await self.delete_object(existing_model)
|
||||||
|
|
||||||
|
|
||||||
class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
|
||||||
async def list_shields(self) -> List[Shield]:
|
async def list_shields(self) -> List[Shield]:
|
||||||
|
|
|
@ -26,19 +26,21 @@ class DistributionRegistry(Protocol):
|
||||||
|
|
||||||
async def initialize(self) -> None: ...
|
async def initialize(self) -> None: ...
|
||||||
|
|
||||||
async def get(self, identifier: str) -> List[RoutableObjectWithProvider]: ...
|
async def get(self, identifier: str) -> Optional[RoutableObjectWithProvider]: ...
|
||||||
|
|
||||||
def get_cached(self, identifier: str) -> List[RoutableObjectWithProvider]: ...
|
def get_cached(self, identifier: str) -> Optional[RoutableObjectWithProvider]: ...
|
||||||
|
|
||||||
|
async def update(
|
||||||
|
self, obj: RoutableObjectWithProvider
|
||||||
|
) -> RoutableObjectWithProvider: ...
|
||||||
|
|
||||||
# The current data structure allows multiple objects with the same identifier but different providers.
|
|
||||||
# This is not ideal - we should have a single object that can be served by multiple providers,
|
|
||||||
# suggesting a data structure like (obj: Obj, providers: List[str]) rather than List[RoutableObjectWithProvider].
|
|
||||||
# The current approach could lead to inconsistencies if the same logical object has different data across providers.
|
|
||||||
async def register(self, obj: RoutableObjectWithProvider) -> bool: ...
|
async def register(self, obj: RoutableObjectWithProvider) -> bool: ...
|
||||||
|
|
||||||
|
async def delete(self, type: str, identifier: str) -> None: ...
|
||||||
|
|
||||||
|
|
||||||
REGISTER_PREFIX = "distributions:registry"
|
REGISTER_PREFIX = "distributions:registry"
|
||||||
KEY_VERSION = "v1"
|
KEY_VERSION = "v2"
|
||||||
KEY_FORMAT = f"{REGISTER_PREFIX}:{KEY_VERSION}::" + "{type}:{identifier}"
|
KEY_FORMAT = f"{REGISTER_PREFIX}:{KEY_VERSION}::" + "{type}:{identifier}"
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,19 +54,11 @@ def _parse_registry_values(values: List[str]) -> List[RoutableObjectWithProvider
|
||||||
"""Utility function to parse registry values into RoutableObjectWithProvider objects."""
|
"""Utility function to parse registry values into RoutableObjectWithProvider objects."""
|
||||||
all_objects = []
|
all_objects = []
|
||||||
for value in values:
|
for value in values:
|
||||||
try:
|
obj = pydantic.parse_obj_as(
|
||||||
objects_data = json.loads(value)
|
RoutableObjectWithProvider,
|
||||||
objects = [
|
json.loads(value),
|
||||||
pydantic.parse_obj_as(
|
)
|
||||||
RoutableObjectWithProvider,
|
all_objects.append(obj)
|
||||||
json.loads(obj_str),
|
|
||||||
)
|
|
||||||
for obj_str in objects_data
|
|
||||||
]
|
|
||||||
all_objects.extend(objects)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error parsing value: {e}")
|
|
||||||
traceback.print_exc()
|
|
||||||
return all_objects
|
return all_objects
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,54 +71,60 @@ class DiskDistributionRegistry(DistributionRegistry):
|
||||||
|
|
||||||
def get_cached(
|
def get_cached(
|
||||||
self, type: str, identifier: str
|
self, type: str, identifier: str
|
||||||
) -> List[RoutableObjectWithProvider]:
|
) -> Optional[RoutableObjectWithProvider]:
|
||||||
# Disk registry does not have a cache
|
# Disk registry does not have a cache
|
||||||
return []
|
raise NotImplementedError("Disk registry does not have a cache")
|
||||||
|
|
||||||
async def get_all(self) -> List[RoutableObjectWithProvider]:
|
async def get_all(self) -> List[RoutableObjectWithProvider]:
|
||||||
start_key, end_key = _get_registry_key_range()
|
start_key, end_key = _get_registry_key_range()
|
||||||
values = await self.kvstore.range(start_key, end_key)
|
values = await self.kvstore.range(start_key, end_key)
|
||||||
return _parse_registry_values(values)
|
return _parse_registry_values(values)
|
||||||
|
|
||||||
async def get(self, type: str, identifier: str) -> List[RoutableObjectWithProvider]:
|
async def get(
|
||||||
|
self, type: str, identifier: str
|
||||||
|
) -> Optional[RoutableObjectWithProvider]:
|
||||||
json_str = await self.kvstore.get(
|
json_str = await self.kvstore.get(
|
||||||
KEY_FORMAT.format(type=type, identifier=identifier)
|
KEY_FORMAT.format(type=type, identifier=identifier)
|
||||||
)
|
)
|
||||||
if not json_str:
|
if not json_str:
|
||||||
return []
|
return None
|
||||||
|
|
||||||
objects_data = json.loads(json_str)
|
objects_data = json.loads(json_str)
|
||||||
return [
|
# Return only the first object if any exist
|
||||||
pydantic.parse_obj_as(
|
if objects_data:
|
||||||
|
return pydantic.parse_obj_as(
|
||||||
RoutableObjectWithProvider,
|
RoutableObjectWithProvider,
|
||||||
json.loads(obj_str),
|
json.loads(objects_data),
|
||||||
)
|
)
|
||||||
for obj_str in objects_data
|
return None
|
||||||
]
|
|
||||||
|
|
||||||
async def register(self, obj: RoutableObjectWithProvider) -> bool:
|
async def update(self, obj: RoutableObjectWithProvider) -> None:
|
||||||
existing_objects = await self.get(obj.type, obj.identifier)
|
|
||||||
# dont register if the object's providerid already exists
|
|
||||||
for eobj in existing_objects:
|
|
||||||
if eobj.provider_id == obj.provider_id:
|
|
||||||
return False
|
|
||||||
|
|
||||||
existing_objects.append(obj)
|
|
||||||
|
|
||||||
objects_json = [
|
|
||||||
obj.model_dump_json() for obj in existing_objects
|
|
||||||
] # Fixed variable name
|
|
||||||
await self.kvstore.set(
|
await self.kvstore.set(
|
||||||
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
||||||
json.dumps(objects_json),
|
obj.model_dump_json(),
|
||||||
|
)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
async def register(self, obj: RoutableObjectWithProvider) -> bool:
|
||||||
|
existing_obj = await self.get(obj.type, obj.identifier)
|
||||||
|
# dont register if the object's providerid already exists
|
||||||
|
if existing_obj and existing_obj.provider_id == obj.provider_id:
|
||||||
|
return False
|
||||||
|
|
||||||
|
await self.kvstore.set(
|
||||||
|
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
||||||
|
obj.model_dump_json(),
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
async def delete(self, type: str, identifier: str) -> None:
|
||||||
|
await self.kvstore.delete(KEY_FORMAT.format(type=type, identifier=identifier))
|
||||||
|
|
||||||
|
|
||||||
class CachedDiskDistributionRegistry(DiskDistributionRegistry):
|
class CachedDiskDistributionRegistry(DiskDistributionRegistry):
|
||||||
def __init__(self, kvstore: KVStore):
|
def __init__(self, kvstore: KVStore):
|
||||||
super().__init__(kvstore)
|
super().__init__(kvstore)
|
||||||
self.cache: Dict[Tuple[str, str], List[RoutableObjectWithProvider]] = {}
|
self.cache: Dict[Tuple[str, str], RoutableObjectWithProvider] = {}
|
||||||
self._initialized = False
|
self._initialized = False
|
||||||
self._initialize_lock = asyncio.Lock()
|
self._initialize_lock = asyncio.Lock()
|
||||||
self._cache_lock = asyncio.Lock()
|
self._cache_lock = asyncio.Lock()
|
||||||
|
@ -151,13 +151,7 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
|
||||||
async with self._locked_cache() as cache:
|
async with self._locked_cache() as cache:
|
||||||
for obj in objects:
|
for obj in objects:
|
||||||
cache_key = (obj.type, obj.identifier)
|
cache_key = (obj.type, obj.identifier)
|
||||||
if cache_key not in cache:
|
cache[cache_key] = obj
|
||||||
cache[cache_key] = []
|
|
||||||
if not any(
|
|
||||||
cached_obj.provider_id == obj.provider_id
|
|
||||||
for cached_obj in cache[cache_key]
|
|
||||||
):
|
|
||||||
cache[cache_key].append(obj)
|
|
||||||
|
|
||||||
self._initialized = True
|
self._initialized = True
|
||||||
|
|
||||||
|
@ -166,28 +160,22 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
|
||||||
|
|
||||||
def get_cached(
|
def get_cached(
|
||||||
self, type: str, identifier: str
|
self, type: str, identifier: str
|
||||||
) -> List[RoutableObjectWithProvider]:
|
) -> Optional[RoutableObjectWithProvider]:
|
||||||
return self.cache.get((type, identifier), [])[:] # Return a copy
|
return self.cache.get((type, identifier), None)
|
||||||
|
|
||||||
async def get_all(self) -> List[RoutableObjectWithProvider]:
|
async def get_all(self) -> List[RoutableObjectWithProvider]:
|
||||||
await self._ensure_initialized()
|
await self._ensure_initialized()
|
||||||
async with self._locked_cache() as cache:
|
async with self._locked_cache() as cache:
|
||||||
return [item for sublist in cache.values() for item in sublist]
|
return list(cache.values())
|
||||||
|
|
||||||
async def get(self, type: str, identifier: str) -> List[RoutableObjectWithProvider]:
|
async def get(
|
||||||
|
self, type: str, identifier: str
|
||||||
|
) -> Optional[RoutableObjectWithProvider]:
|
||||||
await self._ensure_initialized()
|
await self._ensure_initialized()
|
||||||
cache_key = (type, identifier)
|
cache_key = (type, identifier)
|
||||||
|
|
||||||
async with self._locked_cache() as cache:
|
async with self._locked_cache() as cache:
|
||||||
if cache_key in cache:
|
return cache.get(cache_key, None)
|
||||||
return cache[cache_key][:]
|
|
||||||
|
|
||||||
objects = await super().get(type, identifier)
|
|
||||||
if objects:
|
|
||||||
async with self._locked_cache() as cache:
|
|
||||||
cache[cache_key] = objects
|
|
||||||
|
|
||||||
return objects
|
|
||||||
|
|
||||||
async def register(self, obj: RoutableObjectWithProvider) -> bool:
|
async def register(self, obj: RoutableObjectWithProvider) -> bool:
|
||||||
await self._ensure_initialized()
|
await self._ensure_initialized()
|
||||||
|
@ -196,16 +184,24 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):
|
||||||
if success:
|
if success:
|
||||||
cache_key = (obj.type, obj.identifier)
|
cache_key = (obj.type, obj.identifier)
|
||||||
async with self._locked_cache() as cache:
|
async with self._locked_cache() as cache:
|
||||||
if cache_key not in cache:
|
cache[cache_key] = obj
|
||||||
cache[cache_key] = []
|
|
||||||
if not any(
|
|
||||||
cached_obj.provider_id == obj.provider_id
|
|
||||||
for cached_obj in cache[cache_key]
|
|
||||||
):
|
|
||||||
cache[cache_key].append(obj)
|
|
||||||
|
|
||||||
return success
|
return success
|
||||||
|
|
||||||
|
async def update(self, obj: RoutableObjectWithProvider) -> None:
|
||||||
|
await super().update(obj)
|
||||||
|
cache_key = (obj.type, obj.identifier)
|
||||||
|
async with self._locked_cache() as cache:
|
||||||
|
cache[cache_key] = obj
|
||||||
|
return obj
|
||||||
|
|
||||||
|
async def delete(self, type: str, identifier: str) -> None:
|
||||||
|
await super().delete(type, identifier)
|
||||||
|
cache_key = (type, identifier)
|
||||||
|
async with self._locked_cache() as cache:
|
||||||
|
if cache_key in cache:
|
||||||
|
del cache[cache_key]
|
||||||
|
|
||||||
|
|
||||||
async def create_dist_registry(
|
async def create_dist_registry(
|
||||||
metadata_store: Optional[KVStoreConfig],
|
metadata_store: Optional[KVStoreConfig],
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from llama_models.datatypes import CoreModelId
|
||||||
|
|
||||||
# How to run this test:
|
# How to run this test:
|
||||||
#
|
#
|
||||||
# pytest -v -s llama_stack/providers/tests/inference/test_model_registration.py
|
# pytest -v -s llama_stack/providers/tests/inference/test_model_registration.py
|
||||||
|
@ -33,3 +35,23 @@ class TestModelRegistration:
|
||||||
await models_impl.register_model(
|
await models_impl.register_model(
|
||||||
model_id="Llama3-NonExistent-Model",
|
model_id="Llama3-NonExistent-Model",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_update_model(self, inference_stack):
|
||||||
|
_, models_impl = inference_stack
|
||||||
|
|
||||||
|
# Register a model to update
|
||||||
|
model_id = CoreModelId.llama3_1_8b_instruct.value
|
||||||
|
old_model = await models_impl.register_model(model_id=model_id)
|
||||||
|
|
||||||
|
# Update the model
|
||||||
|
new_model_id = CoreModelId.llama3_2_3b_instruct.value
|
||||||
|
updated_model = await models_impl.update_model(
|
||||||
|
model_id=model_id, provider_model_id=new_model_id
|
||||||
|
)
|
||||||
|
|
||||||
|
# Retrieve the updated model to verify changes
|
||||||
|
assert updated_model.provider_resource_id != old_model.provider_resource_id
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
await models_impl.delete_model(model_id=model_id)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue