forked from phoenix-oss/llama-stack-mirror
Inference to use provider resource id to register and validate (#428)
This PR changes the way model id gets translated to the final model name that gets passed through the provider. Major changes include: 1) Providers are responsible for registering an object and as part of the registration returning the object with the correct provider specific name of the model provider_resource_id 2) To help with the common look ups different names a new ModelLookup class is created. Tested all inference providers including together, fireworks, vllm, ollama, meta reference and bedrock
This commit is contained in:
parent
e51107e019
commit
fdff24e77a
21 changed files with 460 additions and 290 deletions
|
@ -21,7 +21,7 @@
|
|||
"info": {
|
||||
"title": "[DRAFT] Llama Stack Specification",
|
||||
"version": "0.0.1",
|
||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-12 11:39:48.665782"
|
||||
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-12 15:47:15.607543"
|
||||
},
|
||||
"servers": [
|
||||
{
|
||||
|
@ -2856,7 +2856,7 @@
|
|||
"ChatCompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"model_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"messages": {
|
||||
|
@ -2993,7 +2993,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model",
|
||||
"model_id",
|
||||
"messages"
|
||||
]
|
||||
},
|
||||
|
@ -3120,7 +3120,7 @@
|
|||
"CompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"model_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
|
@ -3249,7 +3249,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model",
|
||||
"model_id",
|
||||
"content"
|
||||
]
|
||||
},
|
||||
|
@ -4552,7 +4552,7 @@
|
|||
"EmbeddingsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"model_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"contents": {
|
||||
|
@ -4584,7 +4584,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model",
|
||||
"model_id",
|
||||
"contents"
|
||||
]
|
||||
},
|
||||
|
@ -7837,34 +7837,10 @@
|
|||
],
|
||||
"tags": [
|
||||
{
|
||||
"name": "MemoryBanks"
|
||||
"name": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "BatchInference"
|
||||
},
|
||||
{
|
||||
"name": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "DatasetIO"
|
||||
},
|
||||
{
|
||||
"name": "Eval"
|
||||
},
|
||||
{
|
||||
"name": "Models"
|
||||
},
|
||||
{
|
||||
"name": "PostTraining"
|
||||
},
|
||||
{
|
||||
"name": "ScoringFunctions"
|
||||
},
|
||||
{
|
||||
"name": "Datasets"
|
||||
"name": "EvalTasks"
|
||||
},
|
||||
{
|
||||
"name": "Shields"
|
||||
|
@ -7872,15 +7848,6 @@
|
|||
{
|
||||
"name": "Telemetry"
|
||||
},
|
||||
{
|
||||
"name": "Inspect"
|
||||
},
|
||||
{
|
||||
"name": "Safety"
|
||||
},
|
||||
{
|
||||
"name": "SyntheticDataGeneration"
|
||||
},
|
||||
{
|
||||
"name": "Memory"
|
||||
},
|
||||
|
@ -7888,7 +7855,40 @@
|
|||
"name": "Scoring"
|
||||
},
|
||||
{
|
||||
"name": "EvalTasks"
|
||||
"name": "ScoringFunctions"
|
||||
},
|
||||
{
|
||||
"name": "SyntheticDataGeneration"
|
||||
},
|
||||
{
|
||||
"name": "Models"
|
||||
},
|
||||
{
|
||||
"name": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "MemoryBanks"
|
||||
},
|
||||
{
|
||||
"name": "DatasetIO"
|
||||
},
|
||||
{
|
||||
"name": "Inference"
|
||||
},
|
||||
{
|
||||
"name": "Datasets"
|
||||
},
|
||||
{
|
||||
"name": "PostTraining"
|
||||
},
|
||||
{
|
||||
"name": "BatchInference"
|
||||
},
|
||||
{
|
||||
"name": "Eval"
|
||||
},
|
||||
{
|
||||
"name": "Inspect"
|
||||
},
|
||||
{
|
||||
"name": "BuiltinTool",
|
||||
|
|
|
@ -396,7 +396,7 @@ components:
|
|||
- $ref: '#/components/schemas/ToolResponseMessage'
|
||||
- $ref: '#/components/schemas/CompletionMessage'
|
||||
type: array
|
||||
model:
|
||||
model_id:
|
||||
type: string
|
||||
response_format:
|
||||
oneOf:
|
||||
|
@ -453,7 +453,7 @@ components:
|
|||
$ref: '#/components/schemas/ToolDefinition'
|
||||
type: array
|
||||
required:
|
||||
- model
|
||||
- model_id
|
||||
- messages
|
||||
type: object
|
||||
ChatCompletionResponse:
|
||||
|
@ -577,7 +577,7 @@ components:
|
|||
default: 0
|
||||
type: integer
|
||||
type: object
|
||||
model:
|
||||
model_id:
|
||||
type: string
|
||||
response_format:
|
||||
oneOf:
|
||||
|
@ -626,7 +626,7 @@ components:
|
|||
stream:
|
||||
type: boolean
|
||||
required:
|
||||
- model
|
||||
- model_id
|
||||
- content
|
||||
type: object
|
||||
CompletionResponse:
|
||||
|
@ -903,10 +903,10 @@ components:
|
|||
- $ref: '#/components/schemas/ImageMedia'
|
||||
type: array
|
||||
type: array
|
||||
model:
|
||||
model_id:
|
||||
type: string
|
||||
required:
|
||||
- model
|
||||
- model_id
|
||||
- contents
|
||||
type: object
|
||||
EmbeddingsResponse:
|
||||
|
@ -3384,7 +3384,7 @@ info:
|
|||
description: "This is the specification of the llama stack that provides\n \
|
||||
\ a set of endpoints and their corresponding interfaces that are tailored\
|
||||
\ to\n best leverage Llama Models. The specification is still in\
|
||||
\ draft and subject to change.\n Generated at 2024-11-12 11:39:48.665782"
|
||||
\ draft and subject to change.\n Generated at 2024-11-12 15:47:15.607543"
|
||||
title: '[DRAFT] Llama Stack Specification'
|
||||
version: 0.0.1
|
||||
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
|
||||
|
@ -4748,24 +4748,24 @@ security:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
tags:
|
||||
- name: MemoryBanks
|
||||
- name: BatchInference
|
||||
- name: Agents
|
||||
- name: Inference
|
||||
- name: DatasetIO
|
||||
- name: Eval
|
||||
- name: Models
|
||||
- name: PostTraining
|
||||
- name: ScoringFunctions
|
||||
- name: Datasets
|
||||
- name: Safety
|
||||
- name: EvalTasks
|
||||
- name: Shields
|
||||
- name: Telemetry
|
||||
- name: Inspect
|
||||
- name: Safety
|
||||
- name: SyntheticDataGeneration
|
||||
- name: Memory
|
||||
- name: Scoring
|
||||
- name: EvalTasks
|
||||
- name: ScoringFunctions
|
||||
- name: SyntheticDataGeneration
|
||||
- name: Models
|
||||
- name: Agents
|
||||
- name: MemoryBanks
|
||||
- name: DatasetIO
|
||||
- name: Inference
|
||||
- name: Datasets
|
||||
- name: PostTraining
|
||||
- name: BatchInference
|
||||
- name: Eval
|
||||
- name: Inspect
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
|
||||
name: BuiltinTool
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue