Inference to use provider resource id to register and validate (#428)

This PR changes the way model id gets translated to the final model name
that gets passed through the provider.
Major changes include:
1) Providers are responsible for registering an object and as part of
the registration returning the object with the correct provider specific
name of the model provider_resource_id
2) To help with the common look ups different names a new ModelLookup
class is created.



Tested all inference providers including together, fireworks, vllm,
ollama, meta reference and bedrock
This commit is contained in:
Dinesh Yeduguru 2024-11-12 20:02:00 -08:00 committed by GitHub
parent e51107e019
commit fdff24e77a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 460 additions and 290 deletions

View file

@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-12 11:39:48.665782"
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-12 15:47:15.607543"
},
"servers": [
{
@ -2856,7 +2856,7 @@
"ChatCompletionRequest": {
"type": "object",
"properties": {
"model": {
"model_id": {
"type": "string"
},
"messages": {
@ -2993,7 +2993,7 @@
},
"additionalProperties": false,
"required": [
"model",
"model_id",
"messages"
]
},
@ -3120,7 +3120,7 @@
"CompletionRequest": {
"type": "object",
"properties": {
"model": {
"model_id": {
"type": "string"
},
"content": {
@ -3249,7 +3249,7 @@
},
"additionalProperties": false,
"required": [
"model",
"model_id",
"content"
]
},
@ -4552,7 +4552,7 @@
"EmbeddingsRequest": {
"type": "object",
"properties": {
"model": {
"model_id": {
"type": "string"
},
"contents": {
@ -4584,7 +4584,7 @@
},
"additionalProperties": false,
"required": [
"model",
"model_id",
"contents"
]
},
@ -7837,34 +7837,10 @@
],
"tags": [
{
"name": "MemoryBanks"
"name": "Safety"
},
{
"name": "BatchInference"
},
{
"name": "Agents"
},
{
"name": "Inference"
},
{
"name": "DatasetIO"
},
{
"name": "Eval"
},
{
"name": "Models"
},
{
"name": "PostTraining"
},
{
"name": "ScoringFunctions"
},
{
"name": "Datasets"
"name": "EvalTasks"
},
{
"name": "Shields"
@ -7872,15 +7848,6 @@
{
"name": "Telemetry"
},
{
"name": "Inspect"
},
{
"name": "Safety"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "Memory"
},
@ -7888,7 +7855,40 @@
"name": "Scoring"
},
{
"name": "EvalTasks"
"name": "ScoringFunctions"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "Models"
},
{
"name": "Agents"
},
{
"name": "MemoryBanks"
},
{
"name": "DatasetIO"
},
{
"name": "Inference"
},
{
"name": "Datasets"
},
{
"name": "PostTraining"
},
{
"name": "BatchInference"
},
{
"name": "Eval"
},
{
"name": "Inspect"
},
{
"name": "BuiltinTool",

View file

@ -396,7 +396,7 @@ components:
- $ref: '#/components/schemas/ToolResponseMessage'
- $ref: '#/components/schemas/CompletionMessage'
type: array
model:
model_id:
type: string
response_format:
oneOf:
@ -453,7 +453,7 @@ components:
$ref: '#/components/schemas/ToolDefinition'
type: array
required:
- model
- model_id
- messages
type: object
ChatCompletionResponse:
@ -577,7 +577,7 @@ components:
default: 0
type: integer
type: object
model:
model_id:
type: string
response_format:
oneOf:
@ -626,7 +626,7 @@ components:
stream:
type: boolean
required:
- model
- model_id
- content
type: object
CompletionResponse:
@ -903,10 +903,10 @@ components:
- $ref: '#/components/schemas/ImageMedia'
type: array
type: array
model:
model_id:
type: string
required:
- model
- model_id
- contents
type: object
EmbeddingsResponse:
@ -3384,7 +3384,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-11-12 11:39:48.665782"
\ draft and subject to change.\n Generated at 2024-11-12 15:47:15.607543"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -4748,24 +4748,24 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
- name: MemoryBanks
- name: BatchInference
- name: Agents
- name: Inference
- name: DatasetIO
- name: Eval
- name: Models
- name: PostTraining
- name: ScoringFunctions
- name: Datasets
- name: Safety
- name: EvalTasks
- name: Shields
- name: Telemetry
- name: Inspect
- name: Safety
- name: SyntheticDataGeneration
- name: Memory
- name: Scoring
- name: EvalTasks
- name: ScoringFunctions
- name: SyntheticDataGeneration
- name: Models
- name: Agents
- name: MemoryBanks
- name: DatasetIO
- name: Inference
- name: Datasets
- name: PostTraining
- name: BatchInference
- name: Eval
- name: Inspect
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
name: BuiltinTool
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"