Inference to use provider resource id to register and validate (#428)

This PR changes the way model id gets translated to the final model name that gets passed through the provider. Major changes include: 1) Providers are responsible for registering an object and as part of the registration returning the object with the correct provider specific name of the model provider_resource_id 2) To help with the common look ups different names a new ModelLookup class is created. Tested all inference providers including together, fireworks, vllm, ollama, meta reference and bedrock
2025-06-27 18:50:41 +00:00 · 2024-11-12 20:02:00 -08:00 · 2024-11-12 20:02:00 -08:00 · fdff24e77a
commit fdff24e77a
parent e51107e019
21 changed files with 460 additions and 290 deletions
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -21,7 +21,7 @@
    "info": {
        "title": "[DRAFT] Llama Stack Specification",
        "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-11-12 11:39:48.665782"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-11-12 15:47:15.607543"
    },
    "servers": [
        {
@ -2856,7 +2856,7 @@
            "ChatCompletionRequest": {
                "type": "object",
                "properties": {
-                    "model": {
+                    "model_id": {
                        "type": "string"
                    },
                    "messages": {
@ -2993,7 +2993,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "model",
+                    "model_id",
                    "messages"
                ]
            },
@ -3120,7 +3120,7 @@
            "CompletionRequest": {
                "type": "object",
                "properties": {
-                    "model": {
+                    "model_id": {
                        "type": "string"
                    },
                    "content": {
@ -3249,7 +3249,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "model",
+                    "model_id",
                    "content"
                ]
            },
@ -4552,7 +4552,7 @@
            "EmbeddingsRequest": {
                "type": "object",
                "properties": {
-                    "model": {
+                    "model_id": {
                        "type": "string"
                    },
                    "contents": {
@ -4584,7 +4584,7 @@
                },
                "additionalProperties": false,
                "required": [
-                    "model",
+                    "model_id",
                    "contents"
                ]
            },
@ -7837,34 +7837,10 @@
    ],
    "tags": [
        {
-            "name": "MemoryBanks"
+            "name": "Safety"
        },
        {
-            "name": "BatchInference"
-        },
-        {
-            "name": "Agents"
-        },
-        {
-            "name": "Inference"
-        },
-        {
-            "name": "DatasetIO"
-        },
-        {
-            "name": "Eval"
-        },
-        {
-            "name": "Models"
-        },
-        {
-            "name": "PostTraining"
-        },
-        {
-            "name": "ScoringFunctions"
-        },
-        {
-            "name": "Datasets"
+            "name": "EvalTasks"
        },
        {
            "name": "Shields"
@ -7872,15 +7848,6 @@
        {
            "name": "Telemetry"
        },
-        {
-            "name": "Inspect"
-        },
-        {
-            "name": "Safety"
-        },
-        {
-            "name": "SyntheticDataGeneration"
-        },
        {
            "name": "Memory"
        },
@ -7888,7 +7855,40 @@
            "name": "Scoring"
        },
        {
-            "name": "EvalTasks"
+            "name": "ScoringFunctions"
+        },
+        {
+            "name": "SyntheticDataGeneration"
+        },
+        {
+            "name": "Models"
+        },
+        {
+            "name": "Agents"
+        },
+        {
+            "name": "MemoryBanks"
+        },
+        {
+            "name": "DatasetIO"
+        },
+        {
+            "name": "Inference"
+        },
+        {
+            "name": "Datasets"
+        },
+        {
+            "name": "PostTraining"
+        },
+        {
+            "name": "BatchInference"
+        },
+        {
+            "name": "Eval"
+        },
+        {
+            "name": "Inspect"
        },
        {
            "name": "BuiltinTool",
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -396,7 +396,7 @@ components:
            - $ref: '#/components/schemas/ToolResponseMessage'
            - $ref: '#/components/schemas/CompletionMessage'
          type: array
-        model:
+        model_id:
          type: string
        response_format:
          oneOf:
@ -453,7 +453,7 @@ components:
            $ref: '#/components/schemas/ToolDefinition'
          type: array
      required:
-      - model
+      - model_id
      - messages
      type: object
    ChatCompletionResponse:
@ -577,7 +577,7 @@ components:
              default: 0
              type: integer
          type: object
-        model:
+        model_id:
          type: string
        response_format:
          oneOf:
@ -626,7 +626,7 @@ components:
        stream:
          type: boolean
      required:
-      - model
+      - model_id
      - content
      type: object
    CompletionResponse:
@ -903,10 +903,10 @@ components:
                - $ref: '#/components/schemas/ImageMedia'
              type: array
          type: array
-        model:
+        model_id:
          type: string
      required:
-      - model
+      - model_id
      - contents
      type: object
    EmbeddingsResponse:
@ -3384,7 +3384,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-11-12 11:39:48.665782"
+    \ draft and subject to change.\n                Generated at 2024-11-12 15:47:15.607543"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -4748,24 +4748,24 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
- name: MemoryBanks
- name: BatchInference
- name: Agents
- name: Inference
- name: DatasetIO
- name: Eval
- name: Models
- name: PostTraining
- name: ScoringFunctions
- name: Datasets
+- name: Safety
+- name: EvalTasks
 - name: Shields
 - name: Telemetry
- name: Inspect
- name: Safety
- name: SyntheticDataGeneration
 - name: Memory
 - name: Scoring
- name: EvalTasks
+- name: ScoringFunctions
+- name: SyntheticDataGeneration
+- name: Models
+- name: Agents
+- name: MemoryBanks
+- name: DatasetIO
+- name: Inference
+- name: Datasets
+- name: PostTraining
+- name: BatchInference
+- name: Eval
+- name: Inspect
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
  name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"