[api_updates_3] fix CLI for routing_table, bug fixes for memory & safety (#90)

* fix llama stack build * fix configure * fix configure for simple case * configure w/ routing * move examples config * fix memory router naming * issue w/ safety * fix config w/ safety * update memory endpoints * allow providers in api_providers * configure script works * all endpoints w/ build->configure->run simple local works * new example run.yaml * run openapi generator
2025-12-04 02:03:44 +00:00 · 2024-09-23 08:46:33 -07:00 · 2024-09-23 08:46:33 -07:00 · ddebf9b6e7
commit ddebf9b6e7
parent 8cf634e615
18 changed files with 725 additions and 605 deletions
--- a/docs/resources/llama-stack-spec.html
+++ b/docs/resources/llama-stack-spec.html
@ -21,7 +21,7 @@
    "info": {
        "title": "[DRAFT] Llama Stack Specification",
        "version": "0.0.1",
-        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-09-20 14:53:17.090953"
+        "description": "This is the specification of the llama stack that provides\n                a set of endpoints and their corresponding interfaces that are tailored to\n                best leverage Llama Models. The specification is still in draft and subject to change.\n                Generated at 2024-09-23 01:08:55.758597"
    },
    "servers": [
        {
@ -422,7 +422,7 @@
                }
            }
        },
-        "/memory_banks/create": {
+        "/memory/create": {
            "post": {
                "responses": {
                    "200": {
@ -561,7 +561,7 @@
                }
            }
        },
-        "/memory_bank/documents/delete": {
+        "/memory/documents/delete": {
            "post": {
                "responses": {
                    "200": {
@ -594,7 +594,7 @@
                }
            }
        },
-        "/memory_banks/drop": {
+        "/memory/drop": {
            "post": {
                "responses": {
                    "200": {
@ -988,7 +988,7 @@
                ]
            }
        },
-        "/memory_bank/documents/get": {
+        "/memory/documents/get": {
            "post": {
                "responses": {
                    "200": {
@ -1180,7 +1180,7 @@
                ]
            }
        },
-        "/memory_banks/get": {
+        "/memory/get": {
            "get": {
                "responses": {
                    "200": {
@ -1407,7 +1407,7 @@
                ]
            }
        },
-        "/memory_bank/insert": {
+        "/memory/insert": {
            "post": {
                "responses": {
                    "200": {
@ -1440,7 +1440,7 @@
                }
            }
        },
-        "/memory_banks/list": {
+        "/memory/list": {
            "get": {
                "responses": {
                    "200": {
@ -1543,7 +1543,7 @@
                }
            }
        },
-        "/memory_bank/query": {
+        "/memory/query": {
            "post": {
                "responses": {
                    "200": {
@ -1743,7 +1743,7 @@
                }
            }
        },
-        "/memory_bank/update": {
+        "/memory/update": {
            "post": {
                "responses": {
                    "200": {
@ -2584,183 +2584,7 @@
                                    "$ref": "#/components/schemas/FunctionCallToolDefinition"
                                },
                                {
-                                    "type": "object",
+                                    "$ref": "#/components/schemas/MemoryToolDefinition"
                                    "properties": {
                                        "input_shields": {
                                            "type": "array",
                                            "items": {
                                                "type": "string"
                                            }
                                        },
                                        "output_shields": {
                                            "type": "array",
                                            "items": {
                                                "type": "string"
                                            }
                                        },
                                        "type": {
                                            "type": "string",
                                            "const": "memory"
                                        },
                                        "memory_bank_configs": {
                                            "type": "array",
                                            "items": {
                                                "oneOf": [
                                                    {
                                                        "type": "object",
                                                        "properties": {
                                                            "bank_id": {
                                                                "type": "string"
                                                            },
                                                            "type": {
                                                                "type": "string",
                                                                "const": "vector"
                                                            }
                                                        },
                                                        "additionalProperties": false,
                                                        "required": [
                                                            "bank_id",
                                                            "type"
                                                        ]
                                                    },
                                                    {
                                                        "type": "object",
                                                        "properties": {
                                                            "bank_id": {
                                                                "type": "string"
                                                            },
                                                            "type": {
                                                                "type": "string",
                                                                "const": "keyvalue"
                                                            },
                                                            "keys": {
                                                                "type": "array",
                                                                "items": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        },
                                                        "additionalProperties": false,
                                                        "required": [
                                                            "bank_id",
                                                            "type",
                                                            "keys"
                                                        ]
                                                    },
                                                    {
                                                        "type": "object",
                                                        "properties": {
                                                            "bank_id": {
                                                                "type": "string"
                                                            },
                                                            "type": {
                                                                "type": "string",
                                                                "const": "keyword"
                                                            }
                                                        },
                                                        "additionalProperties": false,
                                                        "required": [
                                                            "bank_id",
                                                            "type"
                                                        ]
                                                    },
                                                    {
                                                        "type": "object",
                                                        "properties": {
                                                            "bank_id": {
                                                                "type": "string"
                                                            },
                                                            "type": {
                                                                "type": "string",
                                                                "const": "graph"
                                                            },
                                                            "entities": {
                                                                "type": "array",
                                                                "items": {
                                                                    "type": "string"
                                                                }
                                                            }
                                                        },
                                                        "additionalProperties": false,
                                                        "required": [
                                                            "bank_id",
                                                            "type",
                                                            "entities"
                                                        ]
                                                    }
                                                ]
                                            }
                                        },
                                        "query_generator_config": {
                                            "oneOf": [
                                                {
                                                    "type": "object",
                                                    "properties": {
                                                        "type": {
                                                            "type": "string",
                                                            "const": "default"
                                                        },
                                                        "sep": {
                                                            "type": "string"
                                                        }
                                                    },
                                                    "additionalProperties": false,
                                                    "required": [
                                                        "type",
                                                        "sep"
                                                    ]
                                                },
                                                {
                                                    "type": "object",
                                                    "properties": {
                                                        "type": {
                                                            "type": "string",
                                                            "const": "llm"
                                                        },
                                                        "model": {
                                                            "type": "string"
                                                        },
                                                        "template": {
                                                            "type": "string"
                                                        }
                                                    },
                                                    "additionalProperties": false,
                                                    "required": [
                                                        "type",
                                                        "model",
                                                        "template"
                                                    ]
                                                },
                                                {
                                                    "type": "object",
                                                    "properties": {
                                                        "type": {
                                                            "type": "string",
                                                            "const": "custom"
                                                        }
                                                    },
                                                    "additionalProperties": false,
                                                    "required": [
                                                        "type"
                                                    ]
                                                }
                                            ]
                                        },
                                        "max_tokens_in_context": {
                                            "type": "integer"
                                        },
                                        "max_chunks": {
                                            "type": "integer"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "type",
                                        "memory_bank_configs",
                                        "query_generator_config",
                                        "max_tokens_in_context",
                                        "max_chunks"
                                    ]
                                }
                            ]
                        }
@ -2771,17 +2595,25 @@
                    "tool_prompt_format": {
                        "$ref": "#/components/schemas/ToolPromptFormat"
                    },
                    "max_infer_iters": {
                        "type": "integer"
                    },
                    "model": {
                        "type": "string"
                    },
                    "instructions": {
                        "type": "string"
                    },
                    "enable_session_persistence": {
                        "type": "boolean"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "max_infer_iters",
                    "model",
-                    "instructions"
+                    "instructions",
                    "enable_session_persistence"
                ]
            },
            "CodeInterpreterToolDefinition": {
@ -2859,6 +2691,185 @@
                    "parameters"
                ]
            },
            "MemoryToolDefinition": {
                "type": "object",
                "properties": {
                    "input_shields": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        }
                    },
                    "output_shields": {
                        "type": "array",
                        "items": {
                            "type": "string"
                        }
                    },
                    "type": {
                        "type": "string",
                        "const": "memory"
                    },
                    "memory_bank_configs": {
                        "type": "array",
                        "items": {
                            "oneOf": [
                                {
                                    "type": "object",
                                    "properties": {
                                        "bank_id": {
                                            "type": "string"
                                        },
                                        "type": {
                                            "type": "string",
                                            "const": "vector"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "bank_id",
                                        "type"
                                    ]
                                },
                                {
                                    "type": "object",
                                    "properties": {
                                        "bank_id": {
                                            "type": "string"
                                        },
                                        "type": {
                                            "type": "string",
                                            "const": "keyvalue"
                                        },
                                        "keys": {
                                            "type": "array",
                                            "items": {
                                                "type": "string"
                                            }
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "bank_id",
                                        "type",
                                        "keys"
                                    ]
                                },
                                {
                                    "type": "object",
                                    "properties": {
                                        "bank_id": {
                                            "type": "string"
                                        },
                                        "type": {
                                            "type": "string",
                                            "const": "keyword"
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "bank_id",
                                        "type"
                                    ]
                                },
                                {
                                    "type": "object",
                                    "properties": {
                                        "bank_id": {
                                            "type": "string"
                                        },
                                        "type": {
                                            "type": "string",
                                            "const": "graph"
                                        },
                                        "entities": {
                                            "type": "array",
                                            "items": {
                                                "type": "string"
                                            }
                                        }
                                    },
                                    "additionalProperties": false,
                                    "required": [
                                        "bank_id",
                                        "type",
                                        "entities"
                                    ]
                                }
                            ]
                        }
                    },
                    "query_generator_config": {
                        "oneOf": [
                            {
                                "type": "object",
                                "properties": {
                                    "type": {
                                        "type": "string",
                                        "const": "default"
                                    },
                                    "sep": {
                                        "type": "string"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "type",
                                    "sep"
                                ]
                            },
                            {
                                "type": "object",
                                "properties": {
                                    "type": {
                                        "type": "string",
                                        "const": "llm"
                                    },
                                    "model": {
                                        "type": "string"
                                    },
                                    "template": {
                                        "type": "string"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "type",
                                    "model",
                                    "template"
                                ]
                            },
                            {
                                "type": "object",
                                "properties": {
                                    "type": {
                                        "type": "string",
                                        "const": "custom"
                                    }
                                },
                                "additionalProperties": false,
                                "required": [
                                    "type"
                                ]
                            }
                        ]
                    },
                    "max_tokens_in_context": {
                        "type": "integer"
                    },
                    "max_chunks": {
                        "type": "integer"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type",
                    "memory_bank_configs",
                    "query_generator_config",
                    "max_tokens_in_context",
                    "max_chunks"
                ]
            },
            "PhotogenToolDefinition": {
                "type": "object",
                "properties": {
@ -5569,31 +5580,28 @@
    ],
    "tags": [
        {
-            "name": "Agents"
+            "name": "PostTraining"
        },
        {
            "name": "RewardScoring"
        },
        {
            "name": "Evaluations"
        },
        {
            "name": "Safety"
        },
        {
-            "name": "Telemetry"
+            "name": "SyntheticDataGeneration"
        },
        {
            "name": "PostTraining"
        },
        {
            "name": "Datasets"
        },
        {
-            "name": "Inference"
+            "name": "Telemetry"
        },
        {
-            "name": "SyntheticDataGeneration"
+            "name": "Evaluations"
        },
        {
            "name": "RewardScoring"
        },
        {
            "name": "Agents"
        },
        {
            "name": "Memory"
@ -5601,6 +5609,9 @@
        {
            "name": "BatchInference"
        },
        {
            "name": "Inference"
        },
        {
            "name": "BuiltinTool",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/BuiltinTool\" />"
@ -5733,6 +5744,10 @@
            "name": "FunctionCallToolDefinition",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/FunctionCallToolDefinition\" />"
        },
        {
            "name": "MemoryToolDefinition",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/MemoryToolDefinition\" />"
        },
        {
            "name": "PhotogenToolDefinition",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/PhotogenToolDefinition\" />"
@ -6174,6 +6189,7 @@
                "MemoryBank",
                "MemoryBankDocument",
                "MemoryRetrievalStep",
                "MemoryToolDefinition",
                "MetricEvent",
                "OptimizerConfig",
                "PhotogenToolDefinition",
--- a/docs/resources/llama-stack-spec.yaml
+++ b/docs/resources/llama-stack-spec.yaml
@ -4,12 +4,16 @@ components:
    AgentConfig:
      additionalProperties: false
      properties:
        enable_session_persistence:
          type: boolean
        input_shields:
          items:
            type: string
          type: array
        instructions:
          type: string
        max_infer_iters:
          type: integer
        model:
          type: string
        output_shields:
@ -30,127 +34,13 @@ components:
            - $ref: '#/components/schemas/PhotogenToolDefinition'
            - $ref: '#/components/schemas/CodeInterpreterToolDefinition'
            - $ref: '#/components/schemas/FunctionCallToolDefinition'
-            - additionalProperties: false
+            - $ref: '#/components/schemas/MemoryToolDefinition'
              properties:
                input_shields:
                  items:
                    type: string
                  type: array
                max_chunks:
                  type: integer
                max_tokens_in_context:
                  type: integer
                memory_bank_configs:
                  items:
                    oneOf:
                    - additionalProperties: false
                      properties:
                        bank_id:
                          type: string
                        type:
                          const: vector
                          type: string
                      required:
                      - bank_id
                      - type
                      type: object
                    - additionalProperties: false
                      properties:
                        bank_id:
                          type: string
                        keys:
                          items:
                            type: string
                          type: array
                        type:
                          const: keyvalue
                          type: string
                      required:
                      - bank_id
                      - type
                      - keys
                      type: object
                    - additionalProperties: false
                      properties:
                        bank_id:
                          type: string
                        type:
                          const: keyword
                          type: string
                      required:
                      - bank_id
                      - type
                      type: object
                    - additionalProperties: false
                      properties:
                        bank_id:
                          type: string
                        entities:
                          items:
                            type: string
                          type: array
                        type:
                          const: graph
                          type: string
                      required:
                      - bank_id
                      - type
                      - entities
                      type: object
                  type: array
                output_shields:
                  items:
                    type: string
                  type: array
                query_generator_config:
                  oneOf:
                  - additionalProperties: false
                    properties:
                      sep:
                        type: string
                      type:
                        const: default
                        type: string
                    required:
                    - type
                    - sep
                    type: object
                  - additionalProperties: false
                    properties:
                      model:
                        type: string
                      template:
                        type: string
                      type:
                        const: llm
                        type: string
                    required:
                    - type
                    - model
                    - template
                    type: object
                  - additionalProperties: false
                    properties:
                      type:
                        const: custom
                        type: string
                    required:
                    - type
                    type: object
                type:
                  const: memory
                  type: string
              required:
              - type
              - memory_bank_configs
              - query_generator_config
              - max_tokens_in_context
              - max_chunks
              type: object
          type: array
      required:
      - max_infer_iters
      - model
      - instructions
      - enable_session_persistence
      type: object
    AgentCreateResponse:
      additionalProperties: false
@ -1182,6 +1072,124 @@ components:
      - memory_bank_ids
      - inserted_context
      type: object
    MemoryToolDefinition:
      additionalProperties: false
      properties:
        input_shields:
          items:
            type: string
          type: array
        max_chunks:
          type: integer
        max_tokens_in_context:
          type: integer
        memory_bank_configs:
          items:
            oneOf:
            - additionalProperties: false
              properties:
                bank_id:
                  type: string
                type:
                  const: vector
                  type: string
              required:
              - bank_id
              - type
              type: object
            - additionalProperties: false
              properties:
                bank_id:
                  type: string
                keys:
                  items:
                    type: string
                  type: array
                type:
                  const: keyvalue
                  type: string
              required:
              - bank_id
              - type
              - keys
              type: object
            - additionalProperties: false
              properties:
                bank_id:
                  type: string
                type:
                  const: keyword
                  type: string
              required:
              - bank_id
              - type
              type: object
            - additionalProperties: false
              properties:
                bank_id:
                  type: string
                entities:
                  items:
                    type: string
                  type: array
                type:
                  const: graph
                  type: string
              required:
              - bank_id
              - type
              - entities
              type: object
          type: array
        output_shields:
          items:
            type: string
          type: array
        query_generator_config:
          oneOf:
          - additionalProperties: false
            properties:
              sep:
                type: string
              type:
                const: default
                type: string
            required:
            - type
            - sep
            type: object
          - additionalProperties: false
            properties:
              model:
                type: string
              template:
                type: string
              type:
                const: llm
                type: string
            required:
            - type
            - model
            - template
            type: object
          - additionalProperties: false
            properties:
              type:
                const: custom
                type: string
            required:
            - type
            type: object
        type:
          const: memory
          type: string
      required:
      - type
      - memory_bank_configs
      - query_generator_config
      - max_tokens_in_context
      - max_chunks
      type: object
    MetricEvent:
      additionalProperties: false
      properties:
@ -2341,7 +2349,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-09-20 14:53:17.090953"
+    \ draft and subject to change.\n                Generated at 2024-09-23 01:08:55.758597"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -2944,7 +2952,32 @@ paths:
          description: OK
      tags:
      - Inference
-  /memory_bank/documents/delete:
+  /memory/create:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
          adapter servicing the API
        in: header
        name: X-LlamaStack-ProviderData
        required: false
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateMemoryBankRequest'
        required: true
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/MemoryBank'
          description: OK
      tags:
      - Memory
  /memory/documents/delete:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
@ -2965,7 +2998,7 @@ paths:
          description: OK
      tags:
      - Memory
-  /memory_bank/documents/get:
+  /memory/documents/get:
    post:
      parameters:
      - in: query
@ -2995,99 +3028,7 @@ paths:
          description: OK
      tags:
      - Memory
-  /memory_bank/insert:
+  /memory/drop:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
          adapter servicing the API
        in: header
        name: X-LlamaStack-ProviderData
        required: false
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/InsertDocumentsRequest'
        required: true
      responses:
        '200':
          description: OK
      tags:
      - Memory
  /memory_bank/query:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
          adapter servicing the API
        in: header
        name: X-LlamaStack-ProviderData
        required: false
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueryDocumentsRequest'
        required: true
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueryDocumentsResponse'
          description: OK
      tags:
      - Memory
  /memory_bank/update:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
          adapter servicing the API
        in: header
        name: X-LlamaStack-ProviderData
        required: false
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/UpdateDocumentsRequest'
        required: true
      responses:
        '200':
          description: OK
      tags:
      - Memory
  /memory_banks/create:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
          adapter servicing the API
        in: header
        name: X-LlamaStack-ProviderData
        required: false
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateMemoryBankRequest'
        required: true
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/MemoryBank'
          description: OK
      tags:
      - Memory
  /memory_banks/drop:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
@ -3112,7 +3053,7 @@ paths:
          description: OK
      tags:
      - Memory
-  /memory_banks/get:
+  /memory/get:
    get:
      parameters:
      - in: query
@ -3138,7 +3079,28 @@ paths:
          description: OK
      tags:
      - Memory
-  /memory_banks/list:
+  /memory/insert:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
          adapter servicing the API
        in: header
        name: X-LlamaStack-ProviderData
        required: false
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/InsertDocumentsRequest'
        required: true
      responses:
        '200':
          description: OK
      tags:
      - Memory
  /memory/list:
    get:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
@ -3157,6 +3119,52 @@ paths:
          description: OK
      tags:
      - Memory
  /memory/query:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
          adapter servicing the API
        in: header
        name: X-LlamaStack-ProviderData
        required: false
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/QueryDocumentsRequest'
        required: true
      responses:
        '200':
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/QueryDocumentsResponse'
          description: OK
      tags:
      - Memory
  /memory/update:
    post:
      parameters:
      - description: JSON-encoded provider data which will be made available to the
          adapter servicing the API
        in: header
        name: X-LlamaStack-ProviderData
        required: false
        schema:
          type: string
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/UpdateDocumentsRequest'
        required: true
      responses:
        '200':
          description: OK
      tags:
      - Memory
  /post_training/job/artifacts:
    get:
      parameters:
@ -3444,17 +3452,17 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
 - name: Agents
 - name: RewardScoring
 - name: Evaluations
 - name: Safety
 - name: Telemetry
 - name: PostTraining
- name: Datasets
+- name: Safety
 - name: Inference
 - name: SyntheticDataGeneration
 - name: Datasets
 - name: Telemetry
 - name: Evaluations
 - name: RewardScoring
 - name: Agents
 - name: Memory
 - name: BatchInference
 - name: Inference
 - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
  name: BuiltinTool
 - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"
@ -3564,6 +3572,9 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/FunctionCallToolDefinition"
    />
  name: FunctionCallToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryToolDefinition"
    />
  name: MemoryToolDefinition
 - description: <SchemaDefinition schemaRef="#/components/schemas/PhotogenToolDefinition"
    />
  name: PhotogenToolDefinition
@ -3922,6 +3933,7 @@ x-tagGroups:
  - MemoryBank
  - MemoryBankDocument
  - MemoryRetrievalStep
  - MemoryToolDefinition
  - MetricEvent
  - OptimizerConfig
  - PhotogenToolDefinition
--- a/llama_stack/apis/memory/client.py
+++ b/llama_stack/apis/memory/client.py
@ -38,7 +38,7 @@ class MemoryClient(Memory):
    async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]:
        async with httpx.AsyncClient() as client:
            r = await client.get(
-                f"{self.base_url}/memory_banks/get",
+                f"{self.base_url}/memory/get",
                params={
                    "bank_id": bank_id,
                },
@ -59,7 +59,7 @@ class MemoryClient(Memory):
    ) -> MemoryBank:
        async with httpx.AsyncClient() as client:
            r = await client.post(
-                f"{self.base_url}/memory_banks/create",
+                f"{self.base_url}/memory/create",
                json={
                    "name": name,
                    "config": config.dict(),
@ -81,7 +81,7 @@ class MemoryClient(Memory):
    ) -> None:
        async with httpx.AsyncClient() as client:
            r = await client.post(
-                f"{self.base_url}/memory_bank/insert",
+                f"{self.base_url}/memory/insert",
                json={
                    "bank_id": bank_id,
                    "documents": [d.dict() for d in documents],
@ -99,7 +99,7 @@ class MemoryClient(Memory):
    ) -> QueryDocumentsResponse:
        async with httpx.AsyncClient() as client:
            r = await client.post(
-                f"{self.base_url}/memory_bank/query",
+                f"{self.base_url}/memory/query",
                json={
                    "bank_id": bank_id,
                    "query": query,
--- a/llama_stack/apis/memory/memory.py
+++ b/llama_stack/apis/memory/memory.py
@ -96,7 +96,7 @@ class MemoryBank(BaseModel):
 class Memory(Protocol):
-    @webmethod(route="/memory_banks/create")
+    @webmethod(route="/memory/create")
    async def create_memory_bank(
        self,
        name: str,
@ -104,13 +104,13 @@ class Memory(Protocol):
        url: Optional[URL] = None,
    ) -> MemoryBank: ...
-    @webmethod(route="/memory_banks/list", method="GET")
+    @webmethod(route="/memory/list", method="GET")
    async def list_memory_banks(self) -> List[MemoryBank]: ...
-    @webmethod(route="/memory_banks/get", method="GET")
+    @webmethod(route="/memory/get", method="GET")
    async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: ...
-    @webmethod(route="/memory_banks/drop", method="DELETE")
+    @webmethod(route="/memory/drop", method="DELETE")
    async def drop_memory_bank(
        self,
        bank_id: str,
@ -118,7 +118,7 @@ class Memory(Protocol):
    # this will just block now until documents are inserted, but it should
    # probably return a Job instance which can be polled for completion
-    @webmethod(route="/memory_bank/insert")
+    @webmethod(route="/memory/insert")
    async def insert_documents(
        self,
        bank_id: str,
@ -126,14 +126,14 @@ class Memory(Protocol):
        ttl_seconds: Optional[int] = None,
    ) -> None: ...
-    @webmethod(route="/memory_bank/update")
+    @webmethod(route="/memory/update")
    async def update_documents(
        self,
        bank_id: str,
        documents: List[MemoryBankDocument],
    ) -> None: ...
-    @webmethod(route="/memory_bank/query")
+    @webmethod(route="/memory/query")
    async def query_documents(
        self,
        bank_id: str,
@ -141,14 +141,14 @@ class Memory(Protocol):
        params: Optional[Dict[str, Any]] = None,
    ) -> QueryDocumentsResponse: ...
-    @webmethod(route="/memory_bank/documents/get", method="GET")
+    @webmethod(route="/memory/documents/get", method="GET")
    async def get_documents(
        self,
        bank_id: str,
        document_ids: List[str],
    ) -> List[MemoryBankDocument]: ...
-    @webmethod(route="/memory_bank/documents/delete", method="DELETE")
+    @webmethod(route="/memory/documents/delete", method="DELETE")
    async def delete_documents(
        self,
        bank_id: str,
--- a/llama_stack/apis/memory_banks/memory_banks.py
+++ b/llama_stack/apis/memory_banks/memory_banks.py
@ -7,11 +7,11 @@
 from typing import List, Optional, Protocol
 from llama_models.schema_utils import json_schema_type, webmethod
 from pydantic import BaseModel, Field
 from llama_stack.apis.memory import MemoryBankType
 from llama_stack.distribution.datatypes import GenericProviderConfig
 from pydantic import BaseModel, Field
@json_schema_type
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@ -160,7 +160,11 @@ class StackBuild(Subcommand):
    def _run_stack_build_command(self, args: argparse.Namespace) -> None:
        import yaml
-        from llama_stack.distribution.distribution import Api, api_providers
+        from llama_stack.distribution.distribution import (
            Api,
            api_providers,
            builtin_automatically_routed_apis,
        )
        from llama_stack.distribution.utils.dynamic import instantiate_class_type
        from prompt_toolkit import prompt
        from prompt_toolkit.validation import Validator
@ -213,8 +217,15 @@ class StackBuild(Subcommand):
            )
            providers = dict()
            all_providers = api_providers()
            routing_table_apis = set(
                x.routing_table_api for x in builtin_automatically_routed_apis()
            )
            for api in Api:
-                all_providers = api_providers()
+                if api in routing_table_apis:
                    continue
                providers_for_api = all_providers[api]
                api_provider = prompt(
--- a/llama_stack/cli/stack/configure.py
+++ b/llama_stack/cli/stack/configure.py
@ -145,7 +145,7 @@ class StackConfigure(Subcommand):
                built_at=datetime.now(),
                image_name=image_name,
                apis_to_serve=[],
-                provider_map={},
+                api_providers={},
            )
        config = configure_api_providers(config, build_config.distribution_spec)
--- a/llama_stack/distribution/configure.py
+++ b/llama_stack/distribution/configure.py
@ -9,12 +9,21 @@ from typing import Any
 from pydantic import BaseModel
 from llama_stack.distribution.datatypes import *  # noqa: F403
-from termcolor import cprint
+from llama_stack.apis.memory.memory import MemoryBankType
-
+from llama_stack.distribution.distribution import (
-from llama_stack.distribution.distribution import api_providers, stack_apis
+    api_providers,
    builtin_automatically_routed_apis,
    stack_apis,
 )
 from llama_stack.distribution.utils.dynamic import instantiate_class_type
 from llama_stack.distribution.utils.prompt_for_config import prompt_for_config
 from llama_stack.providers.impls.meta_reference.safety.config import (
    MetaReferenceShieldType,
 )
 from prompt_toolkit import prompt
 from prompt_toolkit.validation import Validator
 from termcolor import cprint
 def make_routing_entry_type(config_class: Any):
@ -25,71 +34,139 @@ def make_routing_entry_type(config_class: Any):
    return BaseModelWithConfig
 def get_builtin_apis(provider_backed_apis: List[str]) -> List[str]:
    """Get corresponding builtin APIs given provider backed APIs"""
    res = []
    for inf in builtin_automatically_routed_apis():
        if inf.router_api.value in provider_backed_apis:
            res.append(inf.routing_table_api.value)
    return res
 # TODO: make sure we can deal with existing configuration values correctly
 # instead of just overwriting them
 def configure_api_providers(
    config: StackRunConfig, spec: DistributionSpec
 ) -> StackRunConfig:
    apis = config.apis_to_serve or list(spec.providers.keys())
-    config.apis_to_serve = [a for a in apis if a != "telemetry"]
+    # append the bulitin routing APIs
    apis += get_builtin_apis(apis)
    router_api2builtin_api = {
        inf.router_api.value: inf.routing_table_api.value
        for inf in builtin_automatically_routed_apis()
    }
    config.apis_to_serve = list(set([a for a in apis if a != "telemetry"]))
    apis = [v.value for v in stack_apis()]
    all_providers = api_providers()
    # configure simple case for with non-routing providers to api_providers
    for api_str in spec.providers.keys():
        if api_str not in apis:
            raise ValueError(f"Unknown API `{api_str}`")
-        cprint(f"Configuring API `{api_str}`...\n", "white", attrs=["bold"])
+        cprint(f"Configuring API `{api_str}`...", "green", attrs=["bold"])
        api = Api(api_str)
-        provider_or_providers = spec.providers[api_str]
+        p = spec.providers[api_str]
-        if isinstance(provider_or_providers, list) and len(provider_or_providers) > 1:
+        cprint(f"=== Configuring provider `{p}` for API {api_str}...", "green")
-            print(
+
-                "You have specified multiple providers for this API. We will configure a routing table now. For each provider, provide a routing key followed by provider configuration.\n"
+        if isinstance(p, list):
            cprint(
                f"[WARN] Interactive configuration of multiple providers {p} is not supported, configuring {p[0]} only, please manually configure {p[1:]} in routing_table of run.yaml",
                "yellow",
            )
            p = p[0]
        provider_spec = all_providers[api][p]
        config_type = instantiate_class_type(provider_spec.config_class)
        try:
            provider_config = config.api_providers.get(api_str)
            if provider_config:
                existing = config_type(**provider_config.config)
            else:
                existing = None
        except Exception:
            existing = None
        cfg = prompt_for_config(config_type, existing)
        if api_str in router_api2builtin_api:
            # a routing api, we need to infer and assign it a routing_key and put it in the routing_table
            routing_key = "<PLEASE_FILL_ROUTING_KEY>"
            routing_entries = []
-            for p in provider_or_providers:
+            if api_str == "inference":
-                print(f"Configuring provider `{p}`...")
+                if hasattr(cfg, "model"):
-                provider_spec = all_providers[api][p]
+                    routing_key = cfg.model
-                config_type = instantiate_class_type(provider_spec.config_class)
+                else:
-
+                    routing_key = prompt(
-                # TODO: we need to validate the routing keys, and
+                        "> Please enter the supported model your provider has for inference: ",
-                # perhaps it is better if we break this out into asking
+                        default="Meta-Llama3.1-8B-Instruct",
-                # for a routing key separately from the associated config
+                    )
                wrapper_type = make_routing_entry_type(config_type)
                rt_entry = prompt_for_config(wrapper_type, None)
                routing_entries.append(
-                    ProviderRoutingEntry(
+                    RoutableProviderConfig(
                        routing_key=routing_key,
                        provider_id=p,
-                        routing_key=rt_entry.routing_key,
+                        config=cfg.dict(),
                        config=rt_entry.config.dict(),
                    )
                )
-            config.provider_map[api_str] = routing_entries
+
-        else:
+            if api_str == "safety":
-            p = (
+                # TODO: add support for other safety providers, and simplify safety provider config
-                provider_or_providers[0]
+                if p == "meta-reference":
-                if isinstance(provider_or_providers, list)
+                    for shield_type in MetaReferenceShieldType:
-                else provider_or_providers
+                        routing_entries.append(
-            )
+                            RoutableProviderConfig(
-            print(f"Configuring provider `{p}`...")
+                                routing_key=shield_type.value,
-            provider_spec = all_providers[api][p]
+                                provider_id=p,
-            config_type = instantiate_class_type(provider_spec.config_class)
+                                config=cfg.dict(),
-            try:
+                            )
-                provider_config = config.provider_map.get(api_str)
+                        )
                if provider_config:
                    existing = config_type(**provider_config.config)
                else:
-                    existing = None
+                    cprint(
-            except Exception:
+                        f"[WARN] Interactive configuration of safety provider {p} is not supported, please manually configure safety shields types in routing_table of run.yaml",
-                existing = None
+                        "yellow",
-            cfg = prompt_for_config(config_type, existing)
+                    )
-            config.provider_map[api_str] = GenericProviderConfig(
+                    routing_entries.append(
                        RoutableProviderConfig(
                            routing_key=routing_key,
                            provider_id=p,
                            config=cfg.dict(),
                        )
                    )
            if api_str == "memory":
                bank_types = list([x.value for x in MemoryBankType])
                routing_key = prompt(
                    "> Please enter the supported memory bank type your provider has for memory: ",
                    default="vector",
                    validator=Validator.from_callable(
                        lambda x: x in bank_types,
                        error_message="Invalid provider, please enter one of the following: {}".format(
                            bank_types
                        ),
                    ),
                )
                routing_entries.append(
                    RoutableProviderConfig(
                        routing_key=routing_key,
                        provider_id=p,
                        config=cfg.dict(),
                    )
                )
            config.routing_table[api_str] = routing_entries
            config.api_providers[api_str] = PlaceholderProviderConfig(
                providers=p if isinstance(p, list) else [p]
            )
        else:
            config.api_providers[api_str] = GenericProviderConfig(
                provider_id=p,
                config=cfg.dict(),
            )
        print("")
    return config
--- a/llama_stack/distribution/datatypes.py
+++ b/llama_stack/distribution/datatypes.py
@ -59,17 +59,16 @@ class GenericProviderConfig(BaseModel):
    config: Dict[str, Any]
 class PlaceholderProviderConfig(BaseModel):
    """Placeholder provider config for API whose provider are defined in routing_table"""
    providers: List[str]
 class RoutableProviderConfig(GenericProviderConfig):
    routing_key: str
 class RoutingTableConfig(BaseModel):
    entries: List[RoutableProviderConfig] = Field(...)
    keys: Optional[List[str]] = Field(
        default=None,
    )
 # Example: /inference, /safety
@json_schema_type
 class AutoRoutedProviderSpec(ProviderSpec):
@ -270,12 +269,14 @@ this could be just a hash
 The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""",
    )
-    api_providers: Dict[str, GenericProviderConfig] = Field(
+    api_providers: Dict[
        str, Union[GenericProviderConfig, PlaceholderProviderConfig]
    ] = Field(
        description="""
 Provider configurations for each of the APIs provided by this package.
 """,
    )
-    routing_tables: Dict[str, RoutingTableConfig] = Field(
+    routing_table: Dict[str, List[RoutableProviderConfig]] = Field(
        default_factory=dict,
        description="""
--- a/llama_stack/distribution/distribution.py
+++ b/llama_stack/distribution/distribution.py
@ -8,8 +8,6 @@ import importlib
 import inspect
 from typing import Dict, List
 from pydantic import BaseModel
 from llama_stack.apis.agents import Agents
 from llama_stack.apis.inference import Inference
 from llama_stack.apis.memory import Memory
@ -19,6 +17,8 @@ from llama_stack.apis.safety import Safety
 from llama_stack.apis.shields import Shields
 from llama_stack.apis.telemetry import Telemetry
 from pydantic import BaseModel
 from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec
 # These are the dependencies needed by the distribution server.
--- a/llama_stack/distribution/routers/init.py
+++ b/llama_stack/distribution/routers/init.py
@ -12,7 +12,7 @@ from llama_stack.distribution.datatypes import *  # noqa: F403
 async def get_routing_table_impl(
    api: Api,
    inner_impls: List[Tuple[str, Any]],
-    routing_table_config: RoutingTableConfig,
+    routing_table_config: Dict[str, List[RoutableProviderConfig]],
    _deps,
 ) -> Any:
    from .routing_tables import (
--- a/llama_stack/distribution/routers/routers.py
+++ b/llama_stack/distribution/routers/routers.py
@ -46,9 +46,9 @@ class MemoryRouter(Memory):
        url: Optional[URL] = None,
    ) -> MemoryBank:
        bank_type = config.type
-        provider = await self.routing_table.get_provider_impl(
+        bank = await self.routing_table.get_provider_impl(bank_type).create_memory_bank(
-            bank_type
+            name, config, url
-        ).create_memory_bank(name, config, url)
+        )
        self.bank_id_to_type[bank.bank_id] = bank_type
        return bank
@ -162,6 +162,7 @@ class SafetyRouter(Safety):
        messages: List[Message],
        params: Dict[str, Any] = None,
    ) -> RunShieldResponse:
        print(f"Running shield {shield_type}")
        return await self.routing_table.get_provider_impl(shield_type).run_shield(
            shield_type=shield_type,
            messages=messages,
--- a/llama_stack/distribution/routers/routing_tables.py
+++ b/llama_stack/distribution/routers/routing_tables.py
@ -20,7 +20,7 @@ class CommonRoutingTableImpl(RoutingTable):
    def __init__(
        self,
        inner_impls: List[Tuple[str, Any]],
-        routing_table_config: RoutingTableConfig,
+        routing_table_config: Dict[str, List[RoutableProviderConfig]],
    ) -> None:
        self.providers = {k: v for k, v in inner_impls}
        self.routing_keys = list(self.providers.keys())
@ -40,7 +40,7 @@ class CommonRoutingTableImpl(RoutingTable):
        return self.routing_keys
    def get_provider_config(self, routing_key: str) -> Optional[GenericProviderConfig]:
-        for entry in self.routing_table_config.entries:
+        for entry in self.routing_table_config:
            if entry.routing_key == routing_key:
                return entry
        return None
@ -50,7 +50,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
    async def list_models(self) -> List[ModelServingSpec]:
        specs = []
-        for entry in self.routing_table_config.entries:
+        for entry in self.routing_table_config:
            model_id = entry.routing_key
            specs.append(
                ModelServingSpec(
@ -61,7 +61,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
        return specs
    async def get_model(self, core_model_id: str) -> Optional[ModelServingSpec]:
-        for entry in self.routing_table_config.entries:
+        for entry in self.routing_table_config:
            if entry.routing_key == core_model_id:
                return ModelServingSpec(
                    llama_model=resolve_model(core_model_id),
@ -74,7 +74,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
    async def list_shields(self) -> List[ShieldSpec]:
        specs = []
-        for entry in self.routing_table_config.entries:
+        for entry in self.routing_table_config:
            specs.append(
                ShieldSpec(
                    shield_type=entry.routing_key,
@ -84,7 +84,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
        return specs
    async def get_shield(self, shield_type: str) -> Optional[ShieldSpec]:
-        for entry in self.routing_table_config.entries:
+        for entry in self.routing_table_config:
            if entry.routing_key == shield_type:
                return ShieldSpec(
                    shield_type=entry.routing_key,
@ -97,7 +97,7 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
    async def list_memory_banks(self) -> List[MemoryBankSpec]:
        specs = []
-        for entry in self.routing_table_config.entries:
+        for entry in self.routing_table_config:
            specs.append(
                MemoryBankSpec(
                    bank_type=entry.routing_key,
@ -107,7 +107,7 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks):
        return specs
    async def get_memory_bank(self, bank_type: str) -> Optional[MemoryBankSpec]:
-        for entry in self.routing_table_config.entries:
+        for entry in self.routing_table_config:
            if entry.routing_key == bank_type:
                return MemoryBankSpec(
                    bank_type=entry.routing_key,
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -35,9 +35,6 @@ from fastapi import Body, FastAPI, HTTPException, Request, Response
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse, StreamingResponse
 from fastapi.routing import APIRoute
 from pydantic import BaseModel, ValidationError
 from termcolor import cprint
 from typing_extensions import Annotated
 from llama_stack.providers.utils.telemetry.tracing import (
    end_trace,
@ -45,6 +42,9 @@ from llama_stack.providers.utils.telemetry.tracing import (
    SpanStatus,
    start_trace,
 )
 from pydantic import BaseModel, ValidationError
 from termcolor import cprint
 from typing_extensions import Annotated
 from llama_stack.distribution.datatypes import *  # noqa: F403
 from llama_stack.distribution.distribution import (
@ -307,6 +307,10 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
        # TODO: check that these APIs are not in the routing table part of the config
        providers = all_providers[api]
        # skip checks for API whose provider config is specified in routing_table
        if isinstance(config, PlaceholderProviderConfig):
            continue
        if config.provider_id not in providers:
            raise ValueError(
                f"Unknown provider `{config.provider_id}` is not available for API `{api}`"
@ -315,9 +319,8 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
        configs[api] = config
    apis_to_serve = run_config.apis_to_serve or set(
-        list(specs.keys()) + list(run_config.routing_tables.keys())
+        list(specs.keys()) + list(run_config.routing_table.keys())
    )
    print("apis_to_serve", apis_to_serve)
    for info in builtin_automatically_routed_apis():
        source_api = info.routing_table_api
@ -331,15 +334,16 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
        if info.router_api.value not in apis_to_serve:
            continue
-        if source_api.value not in run_config.routing_tables:
+        print("router_api", info.router_api)
        if info.router_api.value not in run_config.routing_table:
            raise ValueError(f"Routing table for `{source_api.value}` is not provided?")
-        routing_table = run_config.routing_tables[source_api.value]
+        routing_table = run_config.routing_table[info.router_api.value]
        providers = all_providers[info.router_api]
        inner_specs = []
-        for rt_entry in routing_table.entries:
+        for rt_entry in routing_table:
            if rt_entry.provider_id not in providers:
                raise ValueError(
                    f"Unknown provider `{rt_entry.provider_id}` is not available for API `{api}`"
--- a/llama_stack/distribution/utils/dynamic.py
+++ b/llama_stack/distribution/utils/dynamic.py
@ -8,6 +8,7 @@ import importlib
 from typing import Any, Dict
 from llama_stack.distribution.datatypes import *  # noqa: F403
 from termcolor import cprint
 def instantiate_class_type(fully_qualified_name):
@ -43,12 +44,12 @@ async def instantiate_provider(
    elif isinstance(provider_spec, RoutingTableProviderSpec):
        method = "get_routing_table_impl"
-        assert isinstance(provider_config, RoutingTableConfig)
+        assert isinstance(provider_config, List)
        routing_table = provider_config
        inner_specs = {x.provider_id: x for x in provider_spec.inner_specs}
        inner_impls = []
-        for routing_entry in routing_table.entries:
+        for routing_entry in routing_table:
            impl = await instantiate_provider(
                inner_specs[routing_entry.provider_id],
                deps,
--- a/tests/examples/local-run.yaml
+++ b/tests/examples/local-run.yaml
@ -0,0 +1,87 @@
 built_at: '2024-09-23T00:54:40.551416'
 image_name: test-2
 docker_image: null
 conda_env: test-2
 apis_to_serve:
 - shields
 - agents
 - models
 - memory
 - memory_banks
 - inference
 - safety
 api_providers:
  inference:
    providers:
    - meta-reference
  safety:
    providers:
    - meta-reference
  agents:
    provider_id: meta-reference
    config:
      persistence_store:
        namespace: null
        type: sqlite
        db_path: /home/xiyan/.llama/runtime/kvstore.db
  memory:
    providers:
    - meta-reference
  telemetry:
    provider_id: meta-reference
    config: {}
 routing_table:
  inference:
  - provider_id: meta-reference
    config:
      model: Meta-Llama3.1-8B-Instruct
      quantization: null
      torch_seed: null
      max_seq_len: 4096
      max_batch_size: 1
    routing_key: Meta-Llama3.1-8B-Instruct
  safety:
  - provider_id: meta-reference
    config:
      llama_guard_shield:
        model: Llama-Guard-3-8B
        excluded_categories: []
        disable_input_check: false
        disable_output_check: false
      prompt_guard_shield:
        model: Prompt-Guard-86M
    routing_key: llama_guard
  - provider_id: meta-reference
    config:
      llama_guard_shield:
        model: Llama-Guard-3-8B
        excluded_categories: []
        disable_input_check: false
        disable_output_check: false
      prompt_guard_shield:
        model: Prompt-Guard-86M
    routing_key: code_scanner_guard
  - provider_id: meta-reference
    config:
      llama_guard_shield:
        model: Llama-Guard-3-8B
        excluded_categories: []
        disable_input_check: false
        disable_output_check: false
      prompt_guard_shield:
        model: Prompt-Guard-86M
    routing_key: injection_shield
  - provider_id: meta-reference
    config:
      llama_guard_shield:
        model: Llama-Guard-3-8B
        excluded_categories: []
        disable_input_check: false
        disable_output_check: false
      prompt_guard_shield:
        model: Prompt-Guard-86M
    routing_key: jailbreak_shield
  memory:
  - provider_id: meta-reference
    config: {}
    routing_key: vector
--- a/tests/examples/router-local-run.yaml
+++ b/tests/examples/router-local-run.yaml
@ -1,50 +0,0 @@
 built_at: '2024-09-18T13:41:17.656743'
 image_name: local
 docker_image: null
 conda_env: local
 apis_to_serve:
 - inference
 - memory
 - telemetry
 - agents
 - safety
 - models
 provider_map:
  telemetry:
    provider_id: meta-reference
    config: {}
  safety:
    provider_id: meta-reference
    config:
      llama_guard_shield:
        model: Llama-Guard-3-8B
        excluded_categories: []
        disable_input_check: false
        disable_output_check: false
      prompt_guard_shield:
        model: Prompt-Guard-86M
  agents:
    provider_id: meta-reference
    config: {}
 provider_routing_table:
  inference:
    - routing_key: Meta-Llama3.1-8B-Instruct
      provider_id: meta-reference
      config:
        model: Meta-Llama3.1-8B-Instruct
        quantization: null
        torch_seed: null
        max_seq_len: 4096
        max_batch_size: 1
    - routing_key: Meta-Llama3.1-8B
      provider_id: meta-reference
      config:
        model: Meta-Llama3.1-8B
        quantization: null
        torch_seed: null
        max_seq_len: 4096
        max_batch_size: 1
  memory:
    - routing_key: vector
      provider_id: meta-reference
      config: {}
--- a/tests/examples/simple-local-run.yaml
+++ b/tests/examples/simple-local-run.yaml
@ -1,40 +0,0 @@
 built_at: '2024-09-19T22:50:36.239761'
 image_name: simple-local
 docker_image: null
 conda_env: simple-local
 apis_to_serve:
 - inference
 - safety
 - agents
 - memory
 - models
 - telemetry
 provider_map:
  inference:
    provider_id: meta-reference
    config:
      model: Meta-Llama3.1-8B-Instruct
      quantization: null
      torch_seed: null
      max_seq_len: 4096
      max_batch_size: 1
  safety:
    provider_id: meta-reference
    config:
      llama_guard_shield:
        model: Llama-Guard-3-8B
        excluded_categories: []
        disable_input_check: false
        disable_output_check: false
      prompt_guard_shield:
        model: Prompt-Guard-86M
  agents:
    provider_id: meta-reference
    config: {}
  memory:
    provider_id: meta-reference
    config: {}
  telemetry:
    provider_id: meta-reference
    config: {}
 provider_routing_table: {}