diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 8b17510b7..8fff470f6 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -2,4 +2,4 @@
 
 # These owners will be the default owners for everything in
 # the repo. Unless a later match takes precedence,
-* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @terrytangyuan @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
+* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index b58f4eb69..a498ef0a0 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -48,3 +48,4 @@ jobs:
         command -v llama
         llama stack list-apis
         llama stack list-providers inference
+        llama stack list-deps starter
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c31a39406..f94356fe5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -199,6 +199,27 @@ repos:
               echo;
               exit 1;
             } || true
+      - id: check-api-independence
+        name: Ensure llama_stack_api does not import llama_stack
+        entry: bash
+        language: system
+        pass_filenames: false
+        require_serial: true
+        always_run: true
+        files: ^src/llama_stack_api/.*$
+        args:
+          - -c
+          - |
+            API_DIR="src/llama_stack_api"
+            grep -rn --include="*.py" -E '^[^#]*(import llama_stack\b|from llama_stack\b)' "$API_DIR" 2>/dev/null && {
+              echo "llama_stack_api must not import llama_stack";
+              exit 1;
+            }
+            [ -f "$API_DIR/pyproject.toml" ] && grep -n 'llama_stack[^_]' "$API_DIR/pyproject.toml" && {
+              echo "llama_stack_api must not depend on llama_stack in pyproject.toml";
+              exit 1;
+            }
+            exit 0
 
 ci:
     autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
diff --git a/README.md b/README.md
index 639e7280d..5360f4ff0 100644
--- a/README.md
+++ b/README.md
@@ -10,83 +10,6 @@
 [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
 
 
-### ✨🎉 Llama 4 Support  🎉✨
-We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
-
-<details>
-
-<summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
-
-\
-*Note you need 8xH100 GPU-host to run these models*
-
-```bash
-pip install -U llama_stack
-
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-# get meta url from llama.com
-huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
-
-# install dependencies for the distribution
-llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
-
-# start a llama stack server
-INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
-
-# install client to interact with the server
-pip install llama-stack-client
-```
-### CLI
-```bash
-# Run a chat completion
-MODEL="Llama-4-Scout-17B-16E-Instruct"
-
-llama-stack-client --endpoint http://localhost:8321 \
-inference chat-completion \
---model-id meta-llama/$MODEL \
---message "write a haiku for meta's llama 4 models"
-
-OpenAIChatCompletion(
-    ...
-    choices=[
-        OpenAIChatCompletionChoice(
-            finish_reason='stop',
-            index=0,
-            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
-                role='assistant',
-                content='...**Silent minds awaken,**  \n**Whispers of billions of words,**  \n**Reasoning breaks the night.**  \n\n—  \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
-                ...
-            ),
-            ...
-        )
-    ],
-    ...
-)
-```
-### Python SDK
-```python
-from llama_stack_client import LlamaStackClient
-
-client = LlamaStackClient(base_url=f"http://localhost:8321")
-
-model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
-prompt = "Write a haiku about coding"
-
-print(f"User> {prompt}")
-response = client.chat.completions.create(
-    model=model_id,
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": prompt},
-    ],
-)
-print(f"Assistant> {response.choices[0].message.content}")
-```
-As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
-
-
-</details>
-
 ### 🚀 One-Line Installer 🚀
 
 To try Llama Stack locally, run:
diff --git a/client-sdks/stainless/README.md b/client-sdks/stainless/README.md
index 73e7082d4..54ff3d3d1 100644
--- a/client-sdks/stainless/README.md
+++ b/client-sdks/stainless/README.md
@@ -5,4 +5,7 @@ These are the source-of-truth configuration files used to generate the Stainless
 
 A small side note: notice the `.yml` suffixes since Stainless uses that suffix typically for its configuration files.
 
-These files go hand-in-hand. As of now, only the `openapi.yml` file is automatically generated using the `scripts/run_openapi_generator.sh` script.
+These files go hand-in-hand. Both `openapi.yml` and `config.yml` are generated by `scripts/run_openapi_generator.sh`:
+
+- `openapi.yml` comes from the FastAPI-based generator.
+- `config.yml` is rendered from `scripts/openapi_generator/stainless_config/config_data.py` so the Stainless config stays in lock-step with the spec.
diff --git a/client-sdks/stainless/config.yml b/client-sdks/stainless/config.yml
index 9b26114fe..212b2b54a 100644
--- a/client-sdks/stainless/config.yml
+++ b/client-sdks/stainless/config.yml
@@ -1,20 +1,16 @@
 # yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
 
 organization:
-  # Name of your organization or company, used to determine the name of the client
-  # and headings.
   name: llama-stack-client
   docs: https://llama-stack.readthedocs.io/en/latest/
   contact: llamastack@meta.com
 security:
-  - {}
-  - BearerAuth: []
+- {}
+- BearerAuth: []
 security_schemes:
   BearerAuth:
     type: http
     scheme: bearer
-# `targets` define the output targets and their customization options, such as
-# whether to emit the Node SDK and what it's package name should be.
 targets:
   node:
     package_name: llama-stack-client
@@ -40,71 +36,123 @@ targets:
     options:
       enable_v2: true
       back_compat_use_shared_package: false
-
-# `client_settings` define settings for the API client, such as extra constructor
-# arguments (used for authentication), retry behavior, idempotency, etc.
 client_settings:
   default_env_prefix: LLAMA_STACK_CLIENT
   opts:
     api_key:
       type: string
       read_env: LLAMA_STACK_CLIENT_API_KEY
-      auth: { security_scheme: BearerAuth }
+      auth:
+        security_scheme: BearerAuth
       nullable: true
-
-# `environments` are a map of the name of the environment (e.g. "sandbox",
-# "production") to the corresponding url to use.
 environments:
   production: http://any-hosted-llama-stack.com
-
-# `pagination` defines [pagination schemes] which provides a template to match
-# endpoints and generate next-page and auto-pagination helpers in the SDKs.
 pagination:
-  - name: datasets_iterrows
-    type: offset
-    request:
-      dataset_id:
-        type: string
-      start_index:
-        type: integer
-        x-stainless-pagination-property:
-          purpose: offset_count_param
-      limit:
-        type: integer
-    response:
-      data:
-        type: array
-        items:
+- name: datasets_iterrows
+  type: offset
+  request:
+    dataset_id:
+      type: string
+    start_index:
+      type: integer
+      x-stainless-pagination-property:
+        purpose: offset_count_param
+    limit:
+      type: integer
+  response:
+    data:
+      type: array
+      items:
+        type: object
+    next_index:
+      type: integer
+      x-stainless-pagination-property:
+        purpose: offset_count_start_field
+- name: openai_cursor_page
+  type: cursor
+  request:
+    limit:
+      type: integer
+    after:
+      type: string
+      x-stainless-pagination-property:
+        purpose: next_cursor_param
+  response:
+    data:
+      type: array
+      items: {}
+    has_more:
+      type: boolean
+    last_id:
+      type: string
+      x-stainless-pagination-property:
+        purpose: next_cursor_field
+settings:
+  license: MIT
+  unwrap_response_fields:
+  - data
+  file_header: 'Copyright (c) Meta Platforms, Inc. and affiliates.
+
+    All rights reserved.
+
+
+    This source code is licensed under the terms described in the LICENSE file in
+
+    the root directory of this source tree.
+
+    '
+openapi:
+  transformations:
+  - command: mergeObject
+    reason: Better return_type using enum
+    args:
+      target:
+      - $.components.schemas
+      object:
+        ReturnType:
+          additionalProperties: false
+          properties:
+            type:
+              enum:
+              - string
+              - number
+              - boolean
+              - array
+              - object
+              - json
+              - union
+              - chat_completion_input
+              - completion_input
+              - agent_turn_input
+          required:
+          - type
           type: object
-      next_index:
-        type: integer
-        x-stainless-pagination-property:
-          purpose: offset_count_start_field
-  - name: openai_cursor_page
-    type: cursor
-    request:
-      limit:
-        type: integer
-      after:
-        type: string
-        x-stainless-pagination-property:
-          purpose: next_cursor_param
-    response:
-      data:
-        type: array
-        items: {}
-      has_more:
-        type: boolean
-      last_id:
-        type: string
-        x-stainless-pagination-property:
-          purpose: next_cursor_field
-# `resources` define the structure and organziation for your API, such as how
-# methods and models are grouped together and accessed. See the [configuration
-# guide] for more information.
-#
-# [configuration guide]:
-#   https://app.stainlessapi.com/docs/guides/configure#resources
+  - command: replaceProperties
+    reason: Replace return type properties with better model (see above)
+    args:
+      filter:
+        only:
+        - $.components.schemas.ScoringFn.properties.return_type
+        - $.components.schemas.RegisterScoringFunctionRequest.properties.return_type
+      value:
+        $ref: '#/components/schemas/ReturnType'
+  - command: oneOfToAnyOf
+    reason: Prism (mock server) doesn't like one of our requests as it technically
+      matches multiple variants
+readme:
+  example_requests:
+    default:
+      type: request
+      endpoint: post /v1/chat/completions
+      params: {}
+    headline:
+      type: request
+      endpoint: get /v1/models
+      params: {}
+    pagination:
+      type: request
+      endpoint: post /v1/chat/completions
+      params: {}
 resources:
   $shared:
     models:
@@ -128,19 +176,17 @@ resources:
     methods:
       get: get /v1/tools/{tool_name}
       list:
-        endpoint: get /v1/tools
         paginated: false
-
+        endpoint: get /v1/tools
   tool_runtime:
     models:
       tool_def: ToolDef
       tool_invocation_result: ToolInvocationResult
     methods:
       list_tools:
-        endpoint: get /v1/tool-runtime/list-tools
         paginated: false
+        endpoint: get /v1/tool-runtime/list-tools
       invoke_tool: post /v1/tool-runtime/invoke
-
   responses:
     models:
       response_object_stream: OpenAIResponseObjectStream
@@ -148,10 +194,10 @@ resources:
     methods:
       create:
         type: http
-        endpoint: post /v1/responses
         streaming:
           stream_event_model: responses.response_object_stream
           param_discriminator: stream
+        endpoint: post /v1/responses
       retrieve: get /v1/responses/{response_id}
       list:
         type: http
@@ -164,9 +210,8 @@ resources:
         methods:
           list:
             type: http
-            endpoint: get /v1/responses/{response_id}/input_items
             paginated: false
-
+            endpoint: get /v1/responses/{response_id}/input_items
   prompts:
     models:
       prompt: Prompt
@@ -174,8 +219,8 @@ resources:
     methods:
       create: post /v1/prompts
       list:
-        endpoint: get /v1/prompts
         paginated: false
+        endpoint: get /v1/prompts
       retrieve: get /v1/prompts/{prompt_id}
       update: post /v1/prompts/{prompt_id}
       delete: delete /v1/prompts/{prompt_id}
@@ -184,9 +229,8 @@ resources:
       versions:
         methods:
           list:
-            endpoint: get /v1/prompts/{prompt_id}/versions
             paginated: false
-
+            endpoint: get /v1/prompts/{prompt_id}/versions
   conversations:
     models:
       conversation_object: Conversation
@@ -216,7 +260,6 @@ resources:
           delete:
             type: http
             endpoint: delete /v1/conversations/{conversation_id}/items/{item_id}
-
   inspect:
     models:
       healthInfo: HealthInfo
@@ -226,13 +269,11 @@ resources:
     methods:
       health: get /v1/health
       version: get /v1/version
-
   embeddings:
     models:
       create_embeddings_response: OpenAIEmbeddingsResponse
     methods:
       create: post /v1/embeddings
-
   chat:
     models:
       chat_completion_chunk: OpenAIChatCompletionChunk
@@ -241,14 +282,14 @@ resources:
         methods:
           create:
             type: http
-            endpoint: post /v1/chat/completions
             streaming:
               stream_event_model: chat.chat_completion_chunk
               param_discriminator: stream
+            endpoint: post /v1/chat/completions
           list:
             type: http
-            endpoint: get /v1/chat/completions
             paginated: false
+            endpoint: get /v1/chat/completions
           retrieve:
             type: http
             endpoint: get /v1/chat/completions/{completion_id}
@@ -256,17 +297,15 @@ resources:
     methods:
       create:
         type: http
-        endpoint: post /v1/completions
         streaming:
           param_discriminator: stream
-
+        endpoint: post /v1/completions
   vector_io:
     models:
       queryChunksResponse: QueryChunksResponse
     methods:
       insert: post /v1/vector-io/insert
       query: post /v1/vector-io/query
-
   vector_stores:
     models:
       vector_store: VectorStoreObject
@@ -275,8 +314,7 @@ resources:
       vector_store_search_response: VectorStoreSearchResponsePage
     methods:
       create: post /v1/vector_stores
-      list:
-        endpoint: get /v1/vector_stores
+      list: get /v1/vector_stores
       retrieve: get /v1/vector_stores/{vector_store_id}
       update: post /v1/vector_stores/{vector_store_id}
       delete: delete /v1/vector_stores/{vector_store_id}
@@ -301,15 +339,14 @@ resources:
           retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
           list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
           cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
-
   models:
     models:
       model: OpenAIModel
       list_models_response: OpenAIListModelsResponse
     methods:
       list:
-        endpoint: get /v1/models
         paginated: false
+        endpoint: get /v1/models
       retrieve: get /v1/models/{model_id}
       register: post /v1/models
       unregister: delete /v1/models/{model_id}
@@ -317,38 +354,33 @@ resources:
       openai:
         methods:
           list:
-            endpoint: get /v1/models
             paginated: false
-
+            endpoint: get /v1/models
   providers:
     models:
       list_providers_response: ListProvidersResponse
     methods:
       list:
-        endpoint: get /v1/providers
         paginated: false
+        endpoint: get /v1/providers
       retrieve: get /v1/providers/{provider_id}
-
   routes:
     models:
       list_routes_response: ListRoutesResponse
     methods:
       list:
-        endpoint: get /v1/inspect/routes
         paginated: false
-
+        endpoint: get /v1/inspect/routes
   moderations:
     models:
       create_response: ModerationObject
     methods:
       create: post /v1/moderations
-
   safety:
     models:
       run_shield_response: RunShieldResponse
     methods:
       run_shield: post /v1/safety/run-shield
-
   shields:
     models:
       shield: Shield
@@ -356,53 +388,48 @@ resources:
     methods:
       retrieve: get /v1/shields/{identifier}
       list:
-        endpoint: get /v1/shields
         paginated: false
+        endpoint: get /v1/shields
       register: post /v1/shields
       delete: delete /v1/shields/{identifier}
-
   scoring:
     methods:
       score: post /v1/scoring/score
       score_batch: post /v1/scoring/score-batch
   scoring_functions:
-    methods:
-      retrieve: get /v1/scoring-functions/{scoring_fn_id}
-      list:
-        endpoint: get /v1/scoring-functions
-        paginated: false
-      register: post /v1/scoring-functions
-      unregister: delete /v1/scoring-functions/{scoring_fn_id}
     models:
       scoring_fn: ScoringFn
       scoring_fn_params: ScoringFnParams
       list_scoring_functions_response: ListScoringFunctionsResponse
-
+    methods:
+      retrieve: get /v1/scoring-functions/{scoring_fn_id}
+      list:
+        paginated: false
+        endpoint: get /v1/scoring-functions
+      register: post /v1/scoring-functions
+      unregister: delete /v1/scoring-functions/{scoring_fn_id}
   files:
+    models:
+      file: OpenAIFileObject
+      list_files_response: ListOpenAIFileResponse
+      delete_file_response: OpenAIFileDeleteResponse
     methods:
       create: post /v1/files
       list: get /v1/files
       retrieve: get /v1/files/{file_id}
       delete: delete /v1/files/{file_id}
       content: get /v1/files/{file_id}/content
-    models:
-      file: OpenAIFileObject
-      list_files_response: ListOpenAIFileResponse
-      delete_file_response: OpenAIFileDeleteResponse
-
   batches:
     methods:
       create: post /v1/batches
       list: get /v1/batches
       retrieve: get /v1/batches/{batch_id}
       cancel: post /v1/batches/{batch_id}/cancel
-
   alpha:
     subresources:
       inference:
         methods:
           rerank: post /v1alpha/inference/rerank
-
       post_training:
         models:
           algorithm_config: AlgorithmConfig
@@ -418,39 +445,35 @@ resources:
               cancel: post /v1alpha/post-training/job/cancel
               status: get /v1alpha/post-training/job/status
               list:
-                endpoint: get /v1alpha/post-training/jobs
                 paginated: false
-
+                endpoint: get /v1alpha/post-training/jobs
       benchmarks:
-        methods:
-          retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
-          list:
-            endpoint: get /v1alpha/eval/benchmarks
-            paginated: false
-          register: post /v1alpha/eval/benchmarks
-          unregister: delete /v1alpha/eval/benchmarks/{benchmark_id}
         models:
           benchmark: Benchmark
           list_benchmarks_response: ListBenchmarksResponse
-
+        methods:
+          retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
+          list:
+            paginated: false
+            endpoint: get /v1alpha/eval/benchmarks
+          register: post /v1alpha/eval/benchmarks
+          unregister: delete /v1alpha/eval/benchmarks/{benchmark_id}
       eval:
+        models:
+          evaluate_response: EvaluateResponse
+          benchmark_config: BenchmarkConfig
+          job: Job
         methods:
           evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
           run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
           evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
           run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
-
         subresources:
           jobs:
             methods:
               cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
               status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
               retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
-        models:
-          evaluate_response: EvaluateResponse
-          benchmark_config: BenchmarkConfig
-          job: Job
-
   beta:
     subresources:
       datasets:
@@ -460,74 +483,8 @@ resources:
           register: post /v1beta/datasets
           retrieve: get /v1beta/datasets/{dataset_id}
           list:
-            endpoint: get /v1beta/datasets
             paginated: false
+            endpoint: get /v1beta/datasets
           unregister: delete /v1beta/datasets/{dataset_id}
           iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
           appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
-
-settings:
-  license: MIT
-  unwrap_response_fields: [data]
-  file_header: |
-    Copyright (c) Meta Platforms, Inc. and affiliates.
-    All rights reserved.
-
-    This source code is licensed under the terms described in the LICENSE file in
-    the root directory of this source tree.
-
-openapi:
-  transformations:
-    - command: mergeObject
-      reason: Better return_type using enum
-      args:
-        target:
-          - "$.components.schemas"
-        object:
-          ReturnType:
-            additionalProperties: false
-            properties:
-              type:
-                enum:
-                  - string
-                  - number
-                  - boolean
-                  - array
-                  - object
-                  - json
-                  - union
-                  - chat_completion_input
-                  - completion_input
-                  - agent_turn_input
-            required:
-              - type
-            type: object
-    - command: replaceProperties
-      reason: Replace return type properties with better model (see above)
-      args:
-        filter:
-          only:
-            - "$.components.schemas.ScoringFn.properties.return_type"
-            - "$.components.schemas.RegisterScoringFunctionRequest.properties.return_type"
-        value:
-          $ref: "#/components/schemas/ReturnType"
-    - command: oneOfToAnyOf
-      reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
-
-# `readme` is used to configure the code snippets that will be rendered in the
-# README.md of various SDKs. In particular, you can change the `headline`
-# snippet's endpoint and the arguments to call it with.
-readme:
-  example_requests:
-    default:
-      type: request
-      endpoint: post /v1/chat/completions
-      params: &ref_0 {}
-    headline:
-      type: request
-      endpoint: get /v1/models
-      params: *ref_0
-    pagination:
-      type: request
-      endpoint: post /v1/chat/completions
-      params: {}
diff --git a/client-sdks/stainless/openapi.yml b/client-sdks/stainless/openapi.yml
index 5f14bc9c8..0623ea561 100644
--- a/client-sdks/stainless/openapi.yml
+++ b/client-sdks/stainless/openapi.yml
@@ -1810,7 +1810,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
@@ -3300,7 +3300,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
+              $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
@@ -3557,7 +3557,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/BenchmarkConfig'
+              $ref: '#/components/schemas/RunEvalRequest'
         required: true
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
@@ -10598,6 +10598,14 @@ components:
       - scores
       title: EvaluateResponse
       description: The response from an evaluation.
+    RunEvalRequest:
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - benchmark_config
+      title: RunEvalRequest
     Job:
       properties:
         job_id:
@@ -11181,6 +11189,67 @@ components:
       - $ref: '#/components/schemas/CompletionInputType'
         title: CompletionInputType
       title: StringType | ... (9 variants)
+    RegisterScoringFunctionRequest:
+      properties:
+        scoring_fn_id:
+          type: string
+          title: Scoring Fn Id
+        description:
+          type: string
+          title: Description
+        return_type:
+          anyOf:
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+        provider_scoring_fn_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        params:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequest
     RegisterShieldRequest:
       properties:
         shield_id:
@@ -11239,6 +11308,31 @@ components:
       - $ref: '#/components/schemas/RowsDataSource'
         title: RowsDataSource
       title: URIDataSource | RowsDataSource
+    RegisterDatasetRequest:
+      properties:
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          anyOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequest
     RegisterBenchmarkRequest:
       properties:
         benchmark_id:
@@ -11975,41 +12069,6 @@ components:
       required:
       - reasoning_tokens
       title: OutputTokensDetails
-    RegisterDatasetRequestLoose:
-      properties:
-        purpose:
-          title: Purpose
-        source:
-          title: Source
-        metadata:
-          title: Metadata
-        dataset_id:
-          title: Dataset Id
-      type: object
-      required:
-      - purpose
-      - source
-      title: RegisterDatasetRequestLoose
-    RegisterScoringFunctionRequestLoose:
-      properties:
-        scoring_fn_id:
-          title: Scoring Fn Id
-        description:
-          title: Description
-        return_type:
-          title: Return Type
-        provider_scoring_fn_id:
-          title: Provider Scoring Fn Id
-        provider_id:
-          title: Provider Id
-        params:
-          title: Params
-      type: object
-      required:
-      - scoring_fn_id
-      - description
-      - return_type
-      title: RegisterScoringFunctionRequestLoose
     SearchRankingOptions:
       properties:
         ranker:
diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx
index 3b78ec57b..f7b913fef 100644
--- a/docs/docs/building_applications/tools.mdx
+++ b/docs/docs/building_applications/tools.mdx
@@ -104,23 +104,19 @@ client.toolgroups.register(
 )
 ```
 
-Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide authorization headers to send to the MCP server using the "Provider Data" abstraction provided by Llama Stack. When making an agent call,
+Note that most of the more useful MCP servers need you to authenticate with them. Many of them use OAuth2.0 for authentication. You can provide the authorization token when creating the Agent:
 
 ```python
 agent = Agent(
     ...,
-    tools=["mcp::deepwiki"],
-    extra_headers={
-        "X-LlamaStack-Provider-Data": json.dumps(
-            {
-                "mcp_headers": {
-                    "http://mcp.deepwiki.com/sse": {
-                        "Authorization": "Bearer <your_access_token>",
-                    },
-                },
-            }
-        ),
-    },
+    tools=[
+        {
+            "type": "mcp",
+            "server_url": "https://mcp.deepwiki.com/sse",
+            "server_label": "mcp::deepwiki",
+            "authorization": "<your_access_token>",  # OAuth token (without "Bearer " prefix)
+        }
+    ],
 )
 agent.create_turn(...)
 ```
diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx
index 06eb104af..200a3b9ca 100644
--- a/docs/docs/providers/agents/index.mdx
+++ b/docs/docs/providers/agents/index.mdx
@@ -1,7 +1,8 @@
 ---
-description: "Agents
+description: |
+  Agents
 
-    APIs for creating and interacting with agentic systems."
+      APIs for creating and interacting with agentic systems.
 sidebar_label: Agents
 title: Agents
 ---
diff --git a/docs/docs/providers/agents/inline_meta-reference.mdx b/docs/docs/providers/agents/inline_meta-reference.mdx
index fac9b8406..99a67feb4 100644
--- a/docs/docs/providers/agents/inline_meta-reference.mdx
+++ b/docs/docs/providers/agents/inline_meta-reference.mdx
@@ -14,7 +14,7 @@ Meta's reference implementation of an agent system that can use tools, access ve
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence` | `<class 'inline.agents.meta_reference.config.AgentPersistenceConfig'>` | No |  |  |
+| `persistence` | `AgentPersistenceConfig` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx
index 2c64b277f..18fd49945 100644
--- a/docs/docs/providers/batches/index.mdx
+++ b/docs/docs/providers/batches/index.mdx
@@ -1,14 +1,15 @@
 ---
-description: "The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+description: |
+  The Batches API enables efficient processing of multiple requests in a single operation,
+      particularly useful for processing large datasets, batch evaluation workflows, and
+      cost-effective inference at scale.
 
-    The API is designed to allow use of openai client libraries for seamless integration.
+      The API is designed to allow use of openai client libraries for seamless integration.
 
-    This API provides the following extensions:
-     - idempotent batch creation
+      This API provides the following extensions:
+       - idempotent batch creation
 
-    Note: This API is currently under active development and may undergo changes."
+      Note: This API is currently under active development and may undergo changes.
 sidebar_label: Batches
 title: Batches
 ---
diff --git a/docs/docs/providers/batches/inline_reference.mdx b/docs/docs/providers/batches/inline_reference.mdx
index 45304fbb1..0a062c245 100644
--- a/docs/docs/providers/batches/inline_reference.mdx
+++ b/docs/docs/providers/batches/inline_reference.mdx
@@ -14,9 +14,9 @@ Reference implementation of batches API with KVStore persistence.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Configuration for the key-value store backend. |
-| `max_concurrent_batches` | `<class 'int'>` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
-| `max_concurrent_requests_per_batch` | `<class 'int'>` | No | 10 | Maximum number of concurrent requests to process per batch. |
+| `kvstore` | `KVStoreReference` | No |  | Configuration for the key-value store backend. |
+| `max_concurrent_batches` | `int` | No | 1 | Maximum number of concurrent batches to process simultaneously. |
+| `max_concurrent_requests_per_batch` | `int` | No | 10 | Maximum number of concurrent requests to process per batch. |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/datasetio/inline_localfs.mdx b/docs/docs/providers/datasetio/inline_localfs.mdx
index a9363376c..4314696c5 100644
--- a/docs/docs/providers/datasetio/inline_localfs.mdx
+++ b/docs/docs/providers/datasetio/inline_localfs.mdx
@@ -14,7 +14,7 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `kvstore` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/datasetio/remote_huggingface.mdx b/docs/docs/providers/datasetio/remote_huggingface.mdx
index de3ffaaa6..ede8ed631 100644
--- a/docs/docs/providers/datasetio/remote_huggingface.mdx
+++ b/docs/docs/providers/datasetio/remote_huggingface.mdx
@@ -14,7 +14,7 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `kvstore` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/datasetio/remote_nvidia.mdx b/docs/docs/providers/datasetio/remote_nvidia.mdx
index 35a7dacee..97c48d810 100644
--- a/docs/docs/providers/datasetio/remote_nvidia.mdx
+++ b/docs/docs/providers/datasetio/remote_nvidia.mdx
@@ -17,7 +17,7 @@ NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform
 | `api_key` | `str \| None` | No |  | The NVIDIA API key. |
 | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
 | `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. |
-| `datasets_url` | `<class 'str'>` | No | http://nemo.test | Base URL for the NeMo Dataset API |
+| `datasets_url` | `str` | No | http://nemo.test | Base URL for the NeMo Dataset API |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/eval/index.mdx b/docs/docs/providers/eval/index.mdx
index 94bafe15e..3543db246 100644
--- a/docs/docs/providers/eval/index.mdx
+++ b/docs/docs/providers/eval/index.mdx
@@ -1,7 +1,8 @@
 ---
-description: "Evaluations
+description: |
+  Evaluations
 
-    Llama Stack Evaluation API for running evaluations on model and agent candidates."
+      Llama Stack Evaluation API for running evaluations on model and agent candidates.
 sidebar_label: Eval
 title: Eval
 ---
diff --git a/docs/docs/providers/eval/inline_meta-reference.mdx b/docs/docs/providers/eval/inline_meta-reference.mdx
index 2c86c18c9..f1e923ee8 100644
--- a/docs/docs/providers/eval/inline_meta-reference.mdx
+++ b/docs/docs/providers/eval/inline_meta-reference.mdx
@@ -14,7 +14,7 @@ Meta's reference implementation of evaluation tasks with support for multiple la
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `kvstore` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `kvstore` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/eval/remote_nvidia.mdx b/docs/docs/providers/eval/remote_nvidia.mdx
index 36bb4726b..311496791 100644
--- a/docs/docs/providers/eval/remote_nvidia.mdx
+++ b/docs/docs/providers/eval/remote_nvidia.mdx
@@ -14,7 +14,7 @@ NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `evaluator_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
+| `evaluator_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/files/index.mdx b/docs/docs/providers/files/index.mdx
index 19e338035..0b28e9aee 100644
--- a/docs/docs/providers/files/index.mdx
+++ b/docs/docs/providers/files/index.mdx
@@ -1,7 +1,8 @@
 ---
-description: "Files
+description: |
+  Files
 
-    This API is used to upload documents that can be used with other Llama Stack APIs."
+      This API is used to upload documents that can be used with other Llama Stack APIs.
 sidebar_label: Files
 title: Files
 ---
diff --git a/docs/docs/providers/files/inline_localfs.mdx b/docs/docs/providers/files/inline_localfs.mdx
index bff0c4eb9..aa3a9232b 100644
--- a/docs/docs/providers/files/inline_localfs.mdx
+++ b/docs/docs/providers/files/inline_localfs.mdx
@@ -14,9 +14,9 @@ Local filesystem-based file storage provider for managing files and documents lo
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `storage_dir` | `<class 'str'>` | No |  | Directory to store uploaded files |
-| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
-| `ttl_secs` | `<class 'int'>` | No | 31536000 |  |
+| `storage_dir` | `str` | No |  | Directory to store uploaded files |
+| `metadata_store` | `SqlStoreReference` | No |  | SQL store configuration for file metadata |
+| `ttl_secs` | `int` | No | 31536000 |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/files/remote_openai.mdx b/docs/docs/providers/files/remote_openai.mdx
index 3b5c40aad..48fe2fd57 100644
--- a/docs/docs/providers/files/remote_openai.mdx
+++ b/docs/docs/providers/files/remote_openai.mdx
@@ -14,8 +14,8 @@ OpenAI Files API provider for managing files through OpenAI's native file storag
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `api_key` | `<class 'str'>` | No |  | OpenAI API key for authentication |
-| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
+| `api_key` | `str` | No |  | OpenAI API key for authentication |
+| `metadata_store` | `SqlStoreReference` | No |  | SQL store configuration for file metadata |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/files/remote_s3.mdx b/docs/docs/providers/files/remote_s3.mdx
index 65cd545c5..857ba1819 100644
--- a/docs/docs/providers/files/remote_s3.mdx
+++ b/docs/docs/providers/files/remote_s3.mdx
@@ -14,13 +14,13 @@ AWS S3-based file storage provider for scalable cloud file management with metad
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `bucket_name` | `<class 'str'>` | No |  | S3 bucket name to store files |
-| `region` | `<class 'str'>` | No | us-east-1 | AWS region where the bucket is located |
+| `bucket_name` | `str` | No |  | S3 bucket name to store files |
+| `region` | `str` | No | us-east-1 | AWS region where the bucket is located |
 | `aws_access_key_id` | `str \| None` | No |  | AWS access key ID (optional if using IAM roles) |
 | `aws_secret_access_key` | `str \| None` | No |  | AWS secret access key (optional if using IAM roles) |
 | `endpoint_url` | `str \| None` | No |  | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) |
-| `auto_create_bucket` | `<class 'bool'>` | No | False | Automatically create the S3 bucket if it doesn't exist |
-| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No |  | SQL store configuration for file metadata |
+| `auto_create_bucket` | `bool` | No | False | Automatically create the S3 bucket if it doesn't exist |
+| `metadata_store` | `SqlStoreReference` | No |  | SQL store configuration for file metadata |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx
index 478611420..e2d94bfaf 100644
--- a/docs/docs/providers/inference/index.mdx
+++ b/docs/docs/providers/inference/index.mdx
@@ -1,12 +1,13 @@
 ---
-description: "Inference
+description: |
+  Inference
 
-    Llama Stack Inference API for generating completions, chat completions, and embeddings.
+      Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Three kinds of models are supported:
-    - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search.
-    - Rerank models: these models reorder the documents based on their relevance to a query."
+      This API provides the raw interface to the underlying models. Three kinds of models are supported:
+      - LLM models: these models generate "raw" and "chat" (conversational) completions.
+      - Embedding models: these models generate embeddings to be used for semantic search.
+      - Rerank models: these models reorder the documents based on their relevance to a query.
 sidebar_label: Inference
 title: Inference
 ---
diff --git a/docs/docs/providers/inference/inline_meta-reference.mdx b/docs/docs/providers/inference/inline_meta-reference.mdx
index 328586f9a..55b1606b0 100644
--- a/docs/docs/providers/inference/inline_meta-reference.mdx
+++ b/docs/docs/providers/inference/inline_meta-reference.mdx
@@ -16,12 +16,12 @@ Meta's reference implementation of inference with support for various model form
 |-------|------|----------|---------|-------------|
 | `model` | `str \| None` | No |  |  |
 | `torch_seed` | `int \| None` | No |  |  |
-| `max_seq_len` | `<class 'int'>` | No | 4096 |  |
-| `max_batch_size` | `<class 'int'>` | No | 1 |  |
+| `max_seq_len` | `int` | No | 4096 |  |
+| `max_batch_size` | `int` | No | 1 |  |
 | `model_parallel_size` | `int \| None` | No |  |  |
-| `create_distributed_process_group` | `<class 'bool'>` | No | True |  |
+| `create_distributed_process_group` | `bool` | No | True |  |
 | `checkpoint_dir` | `str \| None` | No |  |  |
-| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No |  |  |
+| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig \| None` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_anthropic.mdx b/docs/docs/providers/inference/remote_anthropic.mdx
index 4acbbac50..14b431894 100644
--- a/docs/docs/providers/inference/remote_anthropic.mdx
+++ b/docs/docs/providers/inference/remote_anthropic.mdx
@@ -14,9 +14,9 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_azure.mdx b/docs/docs/providers/inference/remote_azure.mdx
index b3041259e..fd22b157e 100644
--- a/docs/docs/providers/inference/remote_azure.mdx
+++ b/docs/docs/providers/inference/remote_azure.mdx
@@ -21,10 +21,10 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `api_base` | `HttpUrl` | No |  | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
 | `api_version` | `str \| None` | No |  | Azure API version for Azure (e.g., 2024-12-01-preview) |
 | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
 
diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx
index 61931643e..86bef3000 100644
--- a/docs/docs/providers/inference/remote_bedrock.mdx
+++ b/docs/docs/providers/inference/remote_bedrock.mdx
@@ -14,10 +14,10 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `region_name` | `<class 'str'>` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `region_name` | `str` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_cerebras.mdx b/docs/docs/providers/inference/remote_cerebras.mdx
index cda0be224..1fb9530bb 100644
--- a/docs/docs/providers/inference/remote_cerebras.mdx
+++ b/docs/docs/providers/inference/remote_cerebras.mdx
@@ -14,10 +14,10 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `base_url` | `<class 'str'>` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx
index f14fd0175..7a926baf4 100644
--- a/docs/docs/providers/inference/remote_databricks.mdx
+++ b/docs/docs/providers/inference/remote_databricks.mdx
@@ -14,9 +14,9 @@ Databricks inference provider for running models on Databricks' unified analytic
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | The Databricks API token |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_token` | `SecretStr \| None` | No |  | The Databricks API token |
 | `url` | `str \| None` | No |  | The URL for the Databricks model serving endpoint |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_fireworks.mdx b/docs/docs/providers/inference/remote_fireworks.mdx
index 71f16ccec..7db74efc4 100644
--- a/docs/docs/providers/inference/remote_fireworks.mdx
+++ b/docs/docs/providers/inference/remote_fireworks.mdx
@@ -14,10 +14,10 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_gemini.mdx b/docs/docs/providers/inference/remote_gemini.mdx
index 22b3c8cb7..75e6b9692 100644
--- a/docs/docs/providers/inference/remote_gemini.mdx
+++ b/docs/docs/providers/inference/remote_gemini.mdx
@@ -14,9 +14,9 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_groq.mdx b/docs/docs/providers/inference/remote_groq.mdx
index aaf1516ca..3ebd6f907 100644
--- a/docs/docs/providers/inference/remote_groq.mdx
+++ b/docs/docs/providers/inference/remote_groq.mdx
@@ -14,10 +14,10 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://api.groq.com | The URL for the Groq AI server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_hf_endpoint.mdx b/docs/docs/providers/inference/remote_hf_endpoint.mdx
index 771b24f8d..52b40c1f2 100644
--- a/docs/docs/providers/inference/remote_hf_endpoint.mdx
+++ b/docs/docs/providers/inference/remote_hf_endpoint.mdx
@@ -14,8 +14,8 @@ HuggingFace Inference Endpoints provider for dedicated model serving.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `endpoint_name` | `<class 'str'>` | No |  | The name of the Hugging Face Inference Endpoint in the format of '&#123;namespace&#125;/&#123;endpoint_name&#125;' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
+| `endpoint_name` | `str` | No |  | The name of the Hugging Face Inference Endpoint in the format of '&#123;namespace&#125;/&#123;endpoint_name&#125;' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. |
+| `api_token` | `SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_hf_serverless.mdx b/docs/docs/providers/inference/remote_hf_serverless.mdx
index 1a89b8e3e..52280df82 100644
--- a/docs/docs/providers/inference/remote_hf_serverless.mdx
+++ b/docs/docs/providers/inference/remote_hf_serverless.mdx
@@ -14,8 +14,8 @@ HuggingFace Inference API serverless provider for on-demand model inference.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `huggingface_repo` | `<class 'str'>` | No |  | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
+| `huggingface_repo` | `str` | No |  | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') |
+| `api_token` | `SecretStr \| None` | No |  | Your Hugging Face user access token (will default to locally saved token if not provided) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_llama-openai-compat.mdx b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
index 9769c0793..f67f40909 100644
--- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx
+++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx
@@ -14,10 +14,10 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `openai_compat_api_base` | `<class 'str'>` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx
index 57c64ab46..6646d8b00 100644
--- a/docs/docs/providers/inference/remote_nvidia.mdx
+++ b/docs/docs/providers/inference/remote_nvidia.mdx
@@ -14,13 +14,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
-| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
-| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
-| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints.  |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
+| `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
+| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
+| `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints.  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_oci.mdx b/docs/docs/providers/inference/remote_oci.mdx
index 33a201a55..d448755bf 100644
--- a/docs/docs/providers/inference/remote_oci.mdx
+++ b/docs/docs/providers/inference/remote_oci.mdx
@@ -21,14 +21,14 @@ https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `oci_auth_type` | `<class 'str'>` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) |
-| `oci_region` | `<class 'str'>` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) |
-| `oci_compartment_id` | `<class 'str'>` | No |  | OCI compartment ID for the Generative AI service |
-| `oci_config_file_path` | `<class 'str'>` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) |
-| `oci_config_profile` | `<class 'str'>` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `oci_auth_type` | `str` | No | instance_principal | OCI authentication type (must be one of: instance_principal, config_file) |
+| `oci_region` | `str` | No | us-ashburn-1 | OCI region (e.g., us-ashburn-1) |
+| `oci_compartment_id` | `str` | No |  | OCI compartment ID for the Generative AI service |
+| `oci_config_file_path` | `str` | No | ~/.oci/config | OCI config file path (required if oci_auth_type is config_file) |
+| `oci_config_profile` | `str` | No | DEFAULT | OCI config profile (required if oci_auth_type is config_file) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_ollama.mdx b/docs/docs/providers/inference/remote_ollama.mdx
index e00e34e4a..497bfed52 100644
--- a/docs/docs/providers/inference/remote_ollama.mdx
+++ b/docs/docs/providers/inference/remote_ollama.mdx
@@ -14,9 +14,9 @@ Ollama inference provider for running local models through the Ollama runtime.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `url` | `<class 'str'>` | No | http://localhost:11434 |  |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `url` | `str` | No | http://localhost:11434 |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_openai.mdx b/docs/docs/providers/inference/remote_openai.mdx
index 28c8ab7bf..4931118fd 100644
--- a/docs/docs/providers/inference/remote_openai.mdx
+++ b/docs/docs/providers/inference/remote_openai.mdx
@@ -14,10 +14,10 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `base_url` | `<class 'str'>` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx
index 957cd04da..009961d49 100644
--- a/docs/docs/providers/inference/remote_passthrough.mdx
+++ b/docs/docs/providers/inference/remote_passthrough.mdx
@@ -14,10 +14,10 @@ Passthrough inference provider for connecting to any external inference service
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No |  | The URL for the passthrough endpoint |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No |  | The URL for the passthrough endpoint |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_runpod.mdx b/docs/docs/providers/inference/remote_runpod.mdx
index 3cbbd0322..3b67e157d 100644
--- a/docs/docs/providers/inference/remote_runpod.mdx
+++ b/docs/docs/providers/inference/remote_runpod.mdx
@@ -14,9 +14,9 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | The API token |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_token` | `SecretStr \| None` | No |  | The API token |
 | `url` | `str \| None` | No |  | The URL for the Runpod model serving endpoint |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_sambanova.mdx b/docs/docs/providers/inference/remote_sambanova.mdx
index 0ac4600b7..6f4c5d7f6 100644
--- a/docs/docs/providers/inference/remote_sambanova.mdx
+++ b/docs/docs/providers/inference/remote_sambanova.mdx
@@ -14,10 +14,10 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_tgi.mdx b/docs/docs/providers/inference/remote_tgi.mdx
index 67fe6d237..cd5ea7661 100644
--- a/docs/docs/providers/inference/remote_tgi.mdx
+++ b/docs/docs/providers/inference/remote_tgi.mdx
@@ -14,9 +14,9 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `url` | `<class 'str'>` | No |  | The URL for the TGI serving endpoint |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `url` | `str` | No |  | The URL for the TGI serving endpoint |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_together.mdx b/docs/docs/providers/inference/remote_together.mdx
index c8e3bcdcf..43192cc9e 100644
--- a/docs/docs/providers/inference/remote_together.mdx
+++ b/docs/docs/providers/inference/remote_together.mdx
@@ -14,10 +14,10 @@ Together AI inference provider for open-source models and collaborative AI devel
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_vertexai.mdx b/docs/docs/providers/inference/remote_vertexai.mdx
index c182ed485..59b574561 100644
--- a/docs/docs/providers/inference/remote_vertexai.mdx
+++ b/docs/docs/providers/inference/remote_vertexai.mdx
@@ -53,10 +53,10 @@ Available Models:
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `project` | `<class 'str'>` | No |  | Google Cloud project ID for Vertex AI |
-| `location` | `<class 'str'>` | No | us-central1 | Google Cloud location for Vertex AI |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `project` | `str` | No |  | Google Cloud project ID for Vertex AI |
+| `location` | `str` | No | us-central1 | Google Cloud location for Vertex AI |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/inference/remote_vllm.mdx b/docs/docs/providers/inference/remote_vllm.mdx
index f844bcee0..81620dbca 100644
--- a/docs/docs/providers/inference/remote_vllm.mdx
+++ b/docs/docs/providers/inference/remote_vllm.mdx
@@ -14,11 +14,11 @@ Remote vLLM inference provider for connecting to vLLM servers.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_token` | `pydantic.types.SecretStr \| None` | No |  | The API token |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_token` | `SecretStr \| None` | No |  | The API token |
 | `url` | `str \| None` | No |  | The URL for the vLLM model serving endpoint |
-| `max_tokens` | `<class 'int'>` | No | 4096 | Maximum number of tokens to generate. |
+| `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
 | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx
index 2227aa1cc..3a1dba3b4 100644
--- a/docs/docs/providers/inference/remote_watsonx.mdx
+++ b/docs/docs/providers/inference/remote_watsonx.mdx
@@ -14,12 +14,12 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | Authentication credential for the provider |
-| `url` | `<class 'str'>` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
+| `api_key` | `SecretStr \| None` | No |  | Authentication credential for the provider |
+| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
 | `project_id` | `str \| None` | No |  | The watsonx.ai project ID |
-| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
+| `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx
index ac7644de7..0d4241b27 100644
--- a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx
+++ b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx
@@ -14,23 +14,23 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `device` | `<class 'str'>` | No | cuda |  |
-| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No |  |  |
-| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface |  |
-| `chat_template` | `<class 'str'>` | No | `&lt;|user|&gt;`<br/>`{input}`<br/>`&lt;|assistant|&gt;`<br/>`{output}` |  |
-| `model_specific_config` | `<class 'dict'>` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` |  |
-| `max_seq_length` | `<class 'int'>` | No | 2048 |  |
-| `gradient_checkpointing` | `<class 'bool'>` | No | False |  |
-| `save_total_limit` | `<class 'int'>` | No | 3 |  |
-| `logging_steps` | `<class 'int'>` | No | 10 |  |
-| `warmup_ratio` | `<class 'float'>` | No | 0.1 |  |
-| `weight_decay` | `<class 'float'>` | No | 0.01 |  |
-| `dataloader_num_workers` | `<class 'int'>` | No | 4 |  |
-| `dataloader_pin_memory` | `<class 'bool'>` | No | True |  |
-| `dpo_beta` | `<class 'float'>` | No | 0.1 |  |
-| `use_reference_model` | `<class 'bool'>` | No | True |  |
-| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid |  |
-| `dpo_output_dir` | `<class 'str'>` | No |  |  |
+| `device` | `str` | No | cuda |  |
+| `distributed_backend` | `Literal[fsdp, deepspeed] \| None` | No |  |  |
+| `checkpoint_format` | `Literal[full_state, huggingface] \| None` | No | huggingface |  |
+| `chat_template` | `str` | No | `&lt;|user|&gt;`<br/>`{input}`<br/>`&lt;|assistant|&gt;`<br/>`{output}` |  |
+| `model_specific_config` | `dict` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` |  |
+| `max_seq_length` | `int` | No | 2048 |  |
+| `gradient_checkpointing` | `bool` | No | False |  |
+| `save_total_limit` | `int` | No | 3 |  |
+| `logging_steps` | `int` | No | 10 |  |
+| `warmup_ratio` | `float` | No | 0.1 |  |
+| `weight_decay` | `float` | No | 0.01 |  |
+| `dataloader_num_workers` | `int` | No | 4 |  |
+| `dataloader_pin_memory` | `bool` | No | True |  |
+| `dpo_beta` | `float` | No | 0.1 |  |
+| `use_reference_model` | `bool` | No | True |  |
+| `dpo_loss_type` | `Literal[sigmoid, hinge, ipo, kto_pair]` | No | sigmoid |  |
+| `dpo_output_dir` | `str` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx
index f789392fc..3e2c15d3e 100644
--- a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx
+++ b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx
@@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `torch_seed` | `int \| None` | No |  |  |
-| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta |  |
+| `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx
index bd87797af..ac222d8a5 100644
--- a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx
+++ b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx
@@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `torch_seed` | `int \| None` | No |  |  |
-| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta |  |
+| `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/post_training/remote_nvidia.mdx b/docs/docs/providers/post_training/remote_nvidia.mdx
index 448ac4c75..d0208f82f 100644
--- a/docs/docs/providers/post_training/remote_nvidia.mdx
+++ b/docs/docs/providers/post_training/remote_nvidia.mdx
@@ -18,9 +18,9 @@ NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.
 | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. |
 | `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. |
 | `customizer_url` | `str \| None` | No |  | Base URL for the NeMo Customizer API |
-| `timeout` | `<class 'int'>` | No | 300 | Timeout for the NVIDIA Post Training API |
-| `max_retries` | `<class 'int'>` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
-| `output_model_dir` | `<class 'str'>` | No | test-example-model@v1 | Directory to save the output model |
+| `timeout` | `int` | No | 300 | Timeout for the NVIDIA Post Training API |
+| `max_retries` | `int` | No | 3 | Maximum number of retries for the NVIDIA Post Training API |
+| `output_model_dir` | `str` | No | test-example-model@v1 | Directory to save the output model |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx
index 4e2de4f33..0c13de28c 100644
--- a/docs/docs/providers/safety/index.mdx
+++ b/docs/docs/providers/safety/index.mdx
@@ -1,7 +1,8 @@
 ---
-description: "Safety
+description: |
+  Safety
 
-    OpenAI-compatible Moderations API."
+      OpenAI-compatible Moderations API.
 sidebar_label: Safety
 title: Safety
 ---
diff --git a/docs/docs/providers/safety/inline_llama-guard.mdx b/docs/docs/providers/safety/inline_llama-guard.mdx
index 65866c9b2..d52e7289a 100644
--- a/docs/docs/providers/safety/inline_llama-guard.mdx
+++ b/docs/docs/providers/safety/inline_llama-guard.mdx
@@ -14,7 +14,7 @@ Llama Guard safety provider for content moderation and safety filtering using Me
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `excluded_categories` | `list[str` | No | [] |  |
+| `excluded_categories` | `list[str]` | No | [] |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/safety/inline_prompt-guard.mdx b/docs/docs/providers/safety/inline_prompt-guard.mdx
index c52e03e4b..dc57f8555 100644
--- a/docs/docs/providers/safety/inline_prompt-guard.mdx
+++ b/docs/docs/providers/safety/inline_prompt-guard.mdx
@@ -14,7 +14,7 @@ Prompt Guard safety provider for detecting and filtering unsafe prompts and cont
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `guard_type` | `<class 'str'>` | No | injection |  |
+| `guard_type` | `str` | No | injection |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx
index 663a761f0..990bd7246 100644
--- a/docs/docs/providers/safety/remote_bedrock.mdx
+++ b/docs/docs/providers/safety/remote_bedrock.mdx
@@ -14,8 +14,8 @@ AWS Bedrock safety provider for content moderation using AWS's safety services.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `allowed_models` | `list[str \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
-| `refresh_models` | `<class 'bool'>` | No | False | Whether to refresh models periodically from the provider |
+| `allowed_models` | `list[str] \| None` | No |  | List of models that should be registered with the model registry. If None, all models are allowed. |
+| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
 | `aws_access_key_id` | `str \| None` | No |  | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID |
 | `aws_secret_access_key` | `str \| None` | No |  | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY |
 | `aws_session_token` | `str \| None` | No |  | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN |
diff --git a/docs/docs/providers/safety/remote_nvidia.mdx b/docs/docs/providers/safety/remote_nvidia.mdx
index 0f665e60a..ac1fd0b03 100644
--- a/docs/docs/providers/safety/remote_nvidia.mdx
+++ b/docs/docs/providers/safety/remote_nvidia.mdx
@@ -14,7 +14,7 @@ NVIDIA's safety provider for content moderation and safety filtering.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `guardrails_service_url` | `<class 'str'>` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
+| `guardrails_service_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service |
 | `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store |
 
 ## Sample Configuration
diff --git a/docs/docs/providers/safety/remote_sambanova.mdx b/docs/docs/providers/safety/remote_sambanova.mdx
index da70fce6c..69712879c 100644
--- a/docs/docs/providers/safety/remote_sambanova.mdx
+++ b/docs/docs/providers/safety/remote_sambanova.mdx
@@ -14,8 +14,8 @@ SambaNova's safety provider for content moderation and safety filtering.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `url` | `<class 'str'>` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
-| `api_key` | `pydantic.types.SecretStr \| None` | No |  | The SambaNova cloud API Key |
+| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
+| `api_key` | `SecretStr \| None` | No |  | The SambaNova cloud API Key |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/tool_runtime/remote_bing-search.mdx b/docs/docs/providers/tool_runtime/remote_bing-search.mdx
index ec06bc20f..f97087d9e 100644
--- a/docs/docs/providers/tool_runtime/remote_bing-search.mdx
+++ b/docs/docs/providers/tool_runtime/remote_bing-search.mdx
@@ -15,7 +15,7 @@ Bing Search tool for web search capabilities using Microsoft's search engine.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  |  |
-| `top_k` | `<class 'int'>` | No | 3 |  |
+| `top_k` | `int` | No | 3 |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/tool_runtime/remote_brave-search.mdx b/docs/docs/providers/tool_runtime/remote_brave-search.mdx
index 3aeed67d5..987ce0e41 100644
--- a/docs/docs/providers/tool_runtime/remote_brave-search.mdx
+++ b/docs/docs/providers/tool_runtime/remote_brave-search.mdx
@@ -15,7 +15,7 @@ Brave Search tool for web search capabilities with privacy-focused results.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  | The Brave Search API Key |
-| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
+| `max_results` | `int` | No | 3 | The maximum number of results to return |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/tool_runtime/remote_tavily-search.mdx b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx
index fdca31bbe..36ad63646 100644
--- a/docs/docs/providers/tool_runtime/remote_tavily-search.mdx
+++ b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx
@@ -15,7 +15,7 @@ Tavily Search tool for AI-optimized web search with structured results.
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `api_key` | `str \| None` | No |  | The Tavily Search API Key |
-| `max_results` | `<class 'int'>` | No | 3 | The maximum number of results to return |
+| `max_results` | `int` | No | 3 | The maximum number of results to return |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_chromadb.mdx b/docs/docs/providers/vector_io/inline_chromadb.mdx
index 0be5cd5b3..d78a67b01 100644
--- a/docs/docs/providers/vector_io/inline_chromadb.mdx
+++ b/docs/docs/providers/vector_io/inline_chromadb.mdx
@@ -78,8 +78,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `db_path` | `<class 'str'>` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
+| `db_path` | `str` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_faiss.mdx b/docs/docs/providers/vector_io/inline_faiss.mdx
index 3a1fba055..c1eedf9db 100644
--- a/docs/docs/providers/vector_io/inline_faiss.mdx
+++ b/docs/docs/providers/vector_io/inline_faiss.mdx
@@ -95,7 +95,7 @@ more details about Faiss in general.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_meta-reference.mdx b/docs/docs/providers/vector_io/inline_meta-reference.mdx
index 17fd40cf5..9266b65b5 100644
--- a/docs/docs/providers/vector_io/inline_meta-reference.mdx
+++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx
@@ -14,7 +14,7 @@ Meta's reference implementation of a vector database.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_milvus.mdx b/docs/docs/providers/vector_io/inline_milvus.mdx
index 6063edab1..e8408a74f 100644
--- a/docs/docs/providers/vector_io/inline_milvus.mdx
+++ b/docs/docs/providers/vector_io/inline_milvus.mdx
@@ -16,9 +16,9 @@ Please refer to the remote provider documentation.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `db_path` | `<class 'str'>` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
-| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
+| `db_path` | `str` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend (SQLite only for now) |
+| `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_qdrant.mdx b/docs/docs/providers/vector_io/inline_qdrant.mdx
index 057d96761..8f6155732 100644
--- a/docs/docs/providers/vector_io/inline_qdrant.mdx
+++ b/docs/docs/providers/vector_io/inline_qdrant.mdx
@@ -97,8 +97,8 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `path` | `<class 'str'>` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `path` | `str` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
index 45631dff3..b63d9db72 100644
--- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx
@@ -407,8 +407,8 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
+| `db_path` | `str` | No |  | Path to the SQLite database file |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
index 67cbd0021..a25ff1b28 100644
--- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
+++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx
@@ -16,8 +16,8 @@ Please refer to the sqlite-vec provider documentation.
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `db_path` | `<class 'str'>` | No |  | Path to the SQLite database file |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend (SQLite only for now) |
+| `db_path` | `str` | No |  | Path to the SQLite database file |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/remote_chromadb.mdx b/docs/docs/providers/vector_io/remote_chromadb.mdx
index 2aee3eeca..970f4420f 100644
--- a/docs/docs/providers/vector_io/remote_chromadb.mdx
+++ b/docs/docs/providers/vector_io/remote_chromadb.mdx
@@ -78,7 +78,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
 | `url` | `str \| None` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/remote_milvus.mdx b/docs/docs/providers/vector_io/remote_milvus.mdx
index bf9935d61..3e8ae71cf 100644
--- a/docs/docs/providers/vector_io/remote_milvus.mdx
+++ b/docs/docs/providers/vector_io/remote_milvus.mdx
@@ -405,10 +405,10 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
 
 | Field | Type | Required | Default | Description |
 |-------|------|----------|---------|-------------|
-| `uri` | `<class 'str'>` | No |  | The URI of the Milvus server |
+| `uri` | `str` | No |  | The URI of the Milvus server |
 | `token` | `str \| None` | No |  | The token of the Milvus server |
-| `consistency_level` | `<class 'str'>` | No | Strong | The consistency level of the Milvus server |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  | Config for KV store backend |
+| `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server |
+| `persistence` | `KVStoreReference` | No |  | Config for KV store backend |
 | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. |
 
 :::note
diff --git a/docs/docs/providers/vector_io/remote_pgvector.mdx b/docs/docs/providers/vector_io/remote_pgvector.mdx
index cb70f35d1..cd69e2b2f 100644
--- a/docs/docs/providers/vector_io/remote_pgvector.mdx
+++ b/docs/docs/providers/vector_io/remote_pgvector.mdx
@@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de
 | `db` | `str \| None` | No | postgres |  |
 | `user` | `str \| None` | No | postgres |  |
 | `password` | `str \| None` | No | mysecretpassword |  |
-| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/remote_qdrant.mdx b/docs/docs/providers/vector_io/remote_qdrant.mdx
index dff9642b5..9b5117bcb 100644
--- a/docs/docs/providers/vector_io/remote_qdrant.mdx
+++ b/docs/docs/providers/vector_io/remote_qdrant.mdx
@@ -19,14 +19,14 @@ Please refer to the inline provider documentation.
 | `location` | `str \| None` | No |  |  |
 | `url` | `str \| None` | No |  |  |
 | `port` | `int \| None` | No | 6333 |  |
-| `grpc_port` | `<class 'int'>` | No | 6334 |  |
-| `prefer_grpc` | `<class 'bool'>` | No | False |  |
+| `grpc_port` | `int` | No | 6334 |  |
+| `prefer_grpc` | `bool` | No | False |  |
 | `https` | `bool \| None` | No |  |  |
 | `api_key` | `str \| None` | No |  |  |
 | `prefix` | `str \| None` | No |  |  |
 | `timeout` | `int \| None` | No |  |  |
 | `host` | `str \| None` | No |  |  |
-| `persistence` | `<class 'llama_stack.core.storage.datatypes.KVStoreReference'>` | No |  |  |
+| `persistence` | `KVStoreReference` | No |  |  |
 
 ## Sample Configuration
 
diff --git a/docs/docs/providers/vector_io/remote_weaviate.mdx b/docs/docs/providers/vector_io/remote_weaviate.mdx
index b809bed2e..7a29d0d48 100644
--- a/docs/docs/providers/vector_io/remote_weaviate.mdx
+++ b/docs/docs/providers/vector_io/remote_weaviate.mdx
@@ -75,7 +75,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more
 |-------|------|----------|---------|-------------|
 | `weaviate_api_key` | `str \| None` | No |  | The API key for the Weaviate instance |
 | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster |
-| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
+| `persistence` | `KVStoreReference \| None` | No |  | Config for KV store backend (SQLite only for now) |
 
 ## Sample Configuration
 
diff --git a/docs/static/deprecated-llama-stack-spec.yaml b/docs/static/deprecated-llama-stack-spec.yaml
index fa9c7aa2b..af4d13701 100644
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@@ -193,7 +193,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
@@ -549,7 +549,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
+              $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
@@ -7441,6 +7441,14 @@ components:
       - scores
       title: EvaluateResponse
       description: The response from an evaluation.
+    RunEvalRequest:
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - benchmark_config
+      title: RunEvalRequest
     Job:
       properties:
         job_id:
@@ -8024,6 +8032,67 @@ components:
       - $ref: '#/components/schemas/CompletionInputType'
         title: CompletionInputType
       title: StringType | ... (9 variants)
+    RegisterScoringFunctionRequest:
+      properties:
+        scoring_fn_id:
+          type: string
+          title: Scoring Fn Id
+        description:
+          type: string
+          title: Description
+        return_type:
+          anyOf:
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+        provider_scoring_fn_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        params:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequest
     RegisterShieldRequest:
       properties:
         shield_id:
@@ -8082,6 +8151,31 @@ components:
       - $ref: '#/components/schemas/RowsDataSource'
         title: RowsDataSource
       title: URIDataSource | RowsDataSource
+    RegisterDatasetRequest:
+      properties:
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          anyOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequest
     RegisterBenchmarkRequest:
       properties:
         benchmark_id:
@@ -8818,41 +8912,6 @@ components:
       required:
       - reasoning_tokens
       title: OutputTokensDetails
-    RegisterDatasetRequestLoose:
-      properties:
-        purpose:
-          title: Purpose
-        source:
-          title: Source
-        metadata:
-          title: Metadata
-        dataset_id:
-          title: Dataset Id
-      type: object
-      required:
-      - purpose
-      - source
-      title: RegisterDatasetRequestLoose
-    RegisterScoringFunctionRequestLoose:
-      properties:
-        scoring_fn_id:
-          title: Scoring Fn Id
-        description:
-          title: Description
-        return_type:
-          title: Return Type
-        provider_scoring_fn_id:
-          title: Provider Scoring Fn Id
-        provider_id:
-          title: Provider Id
-        params:
-          title: Params
-      type: object
-      required:
-      - scoring_fn_id
-      - description
-      - return_type
-      title: RegisterScoringFunctionRequestLoose
     SearchRankingOptions:
       properties:
         ranker:
diff --git a/docs/static/experimental-llama-stack-spec.yaml b/docs/static/experimental-llama-stack-spec.yaml
index e90750e1b..362361c72 100644
--- a/docs/static/experimental-llama-stack-spec.yaml
+++ b/docs/static/experimental-llama-stack-spec.yaml
@@ -300,7 +300,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/BenchmarkConfig'
+              $ref: '#/components/schemas/RunEvalRequest'
         required: true
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
@@ -6723,6 +6723,14 @@ components:
       - scores
       title: EvaluateResponse
       description: The response from an evaluation.
+    RunEvalRequest:
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - benchmark_config
+      title: RunEvalRequest
     Job:
       properties:
         job_id:
diff --git a/docs/static/stainless-llama-stack-spec.yaml b/docs/static/stainless-llama-stack-spec.yaml
index 5f14bc9c8..0623ea561 100644
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@@ -1810,7 +1810,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterScoringFunctionRequestLoose'
+              $ref: '#/components/schemas/RegisterScoringFunctionRequest'
         required: true
       deprecated: true
   /v1/scoring-functions/{scoring_fn_id}:
@@ -3300,7 +3300,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/RegisterDatasetRequestLoose'
+              $ref: '#/components/schemas/RegisterDatasetRequest'
         required: true
       deprecated: true
   /v1beta/datasets/{dataset_id}:
@@ -3557,7 +3557,7 @@ paths:
         content:
           application/json:
             schema:
-              $ref: '#/components/schemas/BenchmarkConfig'
+              $ref: '#/components/schemas/RunEvalRequest'
         required: true
   /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}:
     get:
@@ -10598,6 +10598,14 @@ components:
       - scores
       title: EvaluateResponse
       description: The response from an evaluation.
+    RunEvalRequest:
+      properties:
+        benchmark_config:
+          $ref: '#/components/schemas/BenchmarkConfig'
+      type: object
+      required:
+      - benchmark_config
+      title: RunEvalRequest
     Job:
       properties:
         job_id:
@@ -11181,6 +11189,67 @@ components:
       - $ref: '#/components/schemas/CompletionInputType'
         title: CompletionInputType
       title: StringType | ... (9 variants)
+    RegisterScoringFunctionRequest:
+      properties:
+        scoring_fn_id:
+          type: string
+          title: Scoring Fn Id
+        description:
+          type: string
+          title: Description
+        return_type:
+          anyOf:
+          - $ref: '#/components/schemas/StringType'
+            title: StringType
+          - $ref: '#/components/schemas/NumberType'
+            title: NumberType
+          - $ref: '#/components/schemas/BooleanType'
+            title: BooleanType
+          - $ref: '#/components/schemas/ArrayType'
+            title: ArrayType
+          - $ref: '#/components/schemas/ObjectType'
+            title: ObjectType
+          - $ref: '#/components/schemas/JsonType'
+            title: JsonType
+          - $ref: '#/components/schemas/UnionType'
+            title: UnionType
+          - $ref: '#/components/schemas/ChatCompletionInputType'
+            title: ChatCompletionInputType
+          - $ref: '#/components/schemas/CompletionInputType'
+            title: CompletionInputType
+          title: StringType | ... (9 variants)
+        provider_scoring_fn_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        provider_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+        params:
+          anyOf:
+          - oneOf:
+            - $ref: '#/components/schemas/LLMAsJudgeScoringFnParams'
+              title: LLMAsJudgeScoringFnParams
+            - $ref: '#/components/schemas/RegexParserScoringFnParams'
+              title: RegexParserScoringFnParams
+            - $ref: '#/components/schemas/BasicScoringFnParams'
+              title: BasicScoringFnParams
+            discriminator:
+              propertyName: type
+              mapping:
+                basic: '#/components/schemas/BasicScoringFnParams'
+                llm_as_judge: '#/components/schemas/LLMAsJudgeScoringFnParams'
+                regex_parser: '#/components/schemas/RegexParserScoringFnParams'
+            title: LLMAsJudgeScoringFnParams | RegexParserScoringFnParams | BasicScoringFnParams
+          - type: 'null'
+          title: Params
+      type: object
+      required:
+      - scoring_fn_id
+      - description
+      - return_type
+      title: RegisterScoringFunctionRequest
     RegisterShieldRequest:
       properties:
         shield_id:
@@ -11239,6 +11308,31 @@ components:
       - $ref: '#/components/schemas/RowsDataSource'
         title: RowsDataSource
       title: URIDataSource | RowsDataSource
+    RegisterDatasetRequest:
+      properties:
+        purpose:
+          $ref: '#/components/schemas/DatasetPurpose'
+        source:
+          anyOf:
+          - $ref: '#/components/schemas/URIDataSource'
+            title: URIDataSource
+          - $ref: '#/components/schemas/RowsDataSource'
+            title: RowsDataSource
+          title: URIDataSource | RowsDataSource
+        metadata:
+          anyOf:
+          - additionalProperties: true
+            type: object
+          - type: 'null'
+        dataset_id:
+          anyOf:
+          - type: string
+          - type: 'null'
+      type: object
+      required:
+      - purpose
+      - source
+      title: RegisterDatasetRequest
     RegisterBenchmarkRequest:
       properties:
         benchmark_id:
@@ -11975,41 +12069,6 @@ components:
       required:
       - reasoning_tokens
       title: OutputTokensDetails
-    RegisterDatasetRequestLoose:
-      properties:
-        purpose:
-          title: Purpose
-        source:
-          title: Source
-        metadata:
-          title: Metadata
-        dataset_id:
-          title: Dataset Id
-      type: object
-      required:
-      - purpose
-      - source
-      title: RegisterDatasetRequestLoose
-    RegisterScoringFunctionRequestLoose:
-      properties:
-        scoring_fn_id:
-          title: Scoring Fn Id
-        description:
-          title: Description
-        return_type:
-          title: Return Type
-        provider_scoring_fn_id:
-          title: Provider Scoring Fn Id
-        provider_id:
-          title: Provider Id
-        params:
-          title: Params
-      type: object
-      required:
-      - scoring_fn_id
-      - description
-      - return_type
-      title: RegisterScoringFunctionRequestLoose
     SearchRankingOptions:
       properties:
         ranker:
diff --git a/pyproject.toml b/pyproject.toml
index bdf8309ad..eea515b09 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,6 @@ dependencies = [
     "pyjwt[crypto]>=2.10.0",                          # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
     "pydantic>=2.11.9",
     "rich",
-    "starlette",
     "termcolor",
     "tiktoken",
     "pillow",
@@ -50,7 +49,6 @@ dependencies = [
     "aiosqlite>=0.21.0",                              # server - for metadata store
     "asyncpg",                                        # for metadata store
     "sqlalchemy[asyncio]>=2.0.41",                    # server - for conversations
-    "pyyaml>=6.0.2",
     "starlette>=0.49.1",
 ]
 
diff --git a/scripts/openapi_generator/__init__.py b/scripts/openapi_generator/__init__.py
index 7f6aaa1d1..834836f76 100644
--- a/scripts/openapi_generator/__init__.py
+++ b/scripts/openapi_generator/__init__.py
@@ -11,6 +11,13 @@ This module provides functionality to generate OpenAPI specifications
 from FastAPI applications.
 """
 
-from .main import generate_openapi_spec, main
-
 __all__ = ["generate_openapi_spec", "main"]
+
+
+def __getattr__(name: str):
+    if name in {"generate_openapi_spec", "main"}:
+        from .main import generate_openapi_spec as _gos
+        from .main import main as _main
+
+        return {"generate_openapi_spec": _gos, "main": _main}[name]
+    raise AttributeError(name)
diff --git a/scripts/openapi_generator/endpoints.py b/scripts/openapi_generator/endpoints.py
index 39086f47f..85203cb71 100644
--- a/scripts/openapi_generator/endpoints.py
+++ b/scripts/openapi_generator/endpoints.py
@@ -15,6 +15,7 @@ import typing
 from typing import Annotated, Any, get_args, get_origin
 
 from fastapi import FastAPI
+from fastapi.params import Body as FastAPIBody
 from pydantic import Field, create_model
 
 from llama_stack.log import get_logger
@@ -26,6 +27,8 @@ from .state import _extra_body_fields, register_dynamic_model
 
 logger = get_logger(name=__name__, category="core")
 
+type QueryParameter = tuple[str, type, Any, bool]
+
 
 def _to_pascal_case(segment: str) -> str:
     tokens = re.findall(r"[A-Za-z]+|\d+", segment)
@@ -75,12 +78,12 @@ def _create_endpoint_with_request_model(
     return endpoint
 
 
-def _build_field_definitions(query_parameters: list[tuple[str, type, Any]], use_any: bool = False) -> dict[str, tuple]:
+def _build_field_definitions(query_parameters: list[QueryParameter], use_any: bool = False) -> dict[str, tuple]:
     """Build field definitions for a Pydantic model from query parameters."""
     from typing import Any
 
     field_definitions = {}
-    for param_name, param_type, default_value in query_parameters:
+    for param_name, param_type, default_value, _ in query_parameters:
         if use_any:
             field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value)
             continue
@@ -108,10 +111,10 @@ def _build_field_definitions(query_parameters: list[tuple[str, type, Any]], use_
             field_definitions[param_name] = (Any, ... if default_value is inspect.Parameter.empty else default_value)
 
     # Ensure all parameters are included
-    expected_params = {name for name, _, _ in query_parameters}
+    expected_params = {name for name, _, _, _ in query_parameters}
     missing = expected_params - set(field_definitions.keys())
     if missing:
-        for param_name, _, default_value in query_parameters:
+        for param_name, _, default_value, _ in query_parameters:
             if param_name in missing:
                 field_definitions[param_name] = (
                     Any,
@@ -126,7 +129,7 @@ def _create_dynamic_request_model(
     webmethod,
     method_name: str,
     http_method: str,
-    query_parameters: list[tuple[str, type, Any]],
+    query_parameters: list[QueryParameter],
     use_any: bool = False,
     variant_suffix: str | None = None,
 ) -> type | None:
@@ -143,12 +146,12 @@ def _create_dynamic_request_model(
 
 
 def _build_signature_params(
-    query_parameters: list[tuple[str, type, Any]],
+    query_parameters: list[QueryParameter],
 ) -> tuple[list[inspect.Parameter], dict[str, type]]:
     """Build signature parameters and annotations from query parameters."""
     signature_params = []
     param_annotations = {}
-    for param_name, param_type, default_value in query_parameters:
+    for param_name, param_type, default_value, _ in query_parameters:
         param_annotations[param_name] = param_type
         signature_params.append(
             inspect.Parameter(
@@ -219,6 +222,19 @@ def _is_extra_body_field(metadata_item: Any) -> bool:
     return isinstance(metadata_item, ExtraBodyField)
 
 
+def _should_embed_parameter(param_type: Any) -> bool:
+    """Determine whether a parameter should be embedded (wrapped) in the request body."""
+    if get_origin(param_type) is Annotated:
+        args = get_args(param_type)
+        metadata = args[1:] if len(args) > 1 else []
+        for metadata_item in metadata:
+            if isinstance(metadata_item, FastAPIBody):
+                # FastAPI treats embed=None as False, so default to False when unset.
+                return bool(metadata_item.embed)
+    # Unannotated parameters default to embed=True through create_dynamic_typed_route.
+    return True
+
+
 def _is_async_iterator_type(type_obj: Any) -> bool:
     """Check if a type is AsyncIterator or AsyncIterable."""
     from collections.abc import AsyncIterable, AsyncIterator
@@ -282,7 +298,7 @@ def _find_models_for_endpoint(
 
     Returns:
         tuple: (request_model, response_model, query_parameters, file_form_params, streaming_response_model, response_schema_name)
-        where query_parameters is a list of (name, type, default_value) tuples
+        where query_parameters is a list of (name, type, default_value, should_embed) tuples
         and file_form_params is a list of inspect.Parameter objects for File()/Form() params
         and streaming_response_model is the model for streaming responses (AsyncIterator content)
     """
@@ -299,7 +315,7 @@ def _find_models_for_endpoint(
 
         # Find request model and collect all body parameters
         request_model = None
-        query_parameters = []
+        query_parameters: list[QueryParameter] = []
         file_form_params = []
         path_params = set()
         extra_body_params = []
@@ -325,6 +341,7 @@ def _find_models_for_endpoint(
 
             # Check if it's a File() or Form() parameter - these need special handling
             param_type = param.annotation
+            param_should_embed = _should_embed_parameter(param_type)
             if _is_file_or_form_param(param_type):
                 # File() and Form() parameters must be in the function signature directly
                 # They cannot be part of a Pydantic model
@@ -350,30 +367,14 @@ def _find_models_for_endpoint(
                     # Store as extra body parameter - exclude from request model
                     extra_body_params.append((param_name, base_type, extra_body_description))
                     continue
+                param_type = base_type
 
             # Check if it's a Pydantic model (for POST/PUT requests)
             if hasattr(param_type, "model_json_schema"):
-                # Collect all body parameters including Pydantic models
-                # We'll decide later whether to use a single model or create a combined one
-                query_parameters.append((param_name, param_type, param.default))
-            elif get_origin(param_type) is Annotated:
-                # Handle Annotated types - get the base type
-                args = get_args(param_type)
-                if args and hasattr(args[0], "model_json_schema"):
-                    # Collect Pydantic models from Annotated types
-                    query_parameters.append((param_name, args[0], param.default))
-                else:
-                    # Regular annotated parameter (but not File/Form, already handled above)
-                    query_parameters.append((param_name, param_type, param.default))
+                query_parameters.append((param_name, param_type, param.default, param_should_embed))
             else:
-                # This is likely a body parameter for POST/PUT or query parameter for GET
-                # Store the parameter info for later use
-                # Preserve inspect.Parameter.empty to distinguish "no default" from "default=None"
-                default_value = param.default
-
-                # Extract the base type from union types (e.g., str | None -> str)
-                # Also make it safe for FastAPI to avoid forward reference issues
-                query_parameters.append((param_name, param_type, default_value))
+                # Regular annotated parameter (but not File/Form, already handled above)
+                query_parameters.append((param_name, param_type, param.default, param_should_embed))
 
         # Store extra body fields for later use in post-processing
         # We'll store them when the endpoint is created, as we need the full path
@@ -385,8 +386,8 @@ def _find_models_for_endpoint(
         # Otherwise, we'll create a combined request model from all parameters
         # BUT: For GET requests, never create a request body - all parameters should be query parameters
         if is_post_put and len(query_parameters) == 1:
-            param_name, param_type, default_value = query_parameters[0]
-            if hasattr(param_type, "model_json_schema"):
+            param_name, param_type, default_value, should_embed = query_parameters[0]
+            if hasattr(param_type, "model_json_schema") and not should_embed:
                 request_model = param_type
                 query_parameters = []  # Clear query_parameters so we use the single model
 
@@ -495,7 +496,7 @@ def _create_fastapi_endpoint(app: FastAPI, route, webmethod, api: Api):
     if file_form_params and is_post_put:
         signature_params = list(file_form_params)
         param_annotations = {param.name: param.annotation for param in file_form_params}
-        for param_name, param_type, default_value in query_parameters:
+        for param_name, param_type, default_value, _ in query_parameters:
             signature_params.append(
                 inspect.Parameter(
                     param_name,
diff --git a/scripts/openapi_generator/stainless_config/__init__.py b/scripts/openapi_generator/stainless_config/__init__.py
new file mode 100644
index 000000000..bf44f82ba
--- /dev/null
+++ b/scripts/openapi_generator/stainless_config/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Package marker for Stainless config generation.
diff --git a/scripts/openapi_generator/stainless_config/generate_config.py b/scripts/openapi_generator/stainless_config/generate_config.py
new file mode 100644
index 000000000..dabc2119f
--- /dev/null
+++ b/scripts/openapi_generator/stainless_config/generate_config.py
@@ -0,0 +1,821 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+HEADER = "# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json\n\n"
+
+SECTION_ORDER = [
+    "organization",
+    "security",
+    "security_schemes",
+    "targets",
+    "client_settings",
+    "environments",
+    "pagination",
+    "settings",
+    "openapi",
+    "readme",
+    "resources",
+]
+
+ORGANIZATION = {
+    "name": "llama-stack-client",
+    "docs": "https://llama-stack.readthedocs.io/en/latest/",
+    "contact": "llamastack@meta.com",
+}
+
+SECURITY = [{}, {"BearerAuth": []}]
+
+SECURITY_SCHEMES = {"BearerAuth": {"type": "http", "scheme": "bearer"}}
+
+TARGETS = {
+    "node": {
+        "package_name": "llama-stack-client",
+        "production_repo": "llamastack/llama-stack-client-typescript",
+        "publish": {"npm": False},
+    },
+    "python": {
+        "package_name": "llama_stack_client",
+        "production_repo": "llamastack/llama-stack-client-python",
+        "options": {"use_uv": True},
+        "publish": {"pypi": True},
+        "project_name": "llama_stack_client",
+    },
+    "kotlin": {
+        "reverse_domain": "com.llama_stack_client.api",
+        "production_repo": None,
+        "publish": {"maven": False},
+    },
+    "go": {
+        "package_name": "llama-stack-client",
+        "production_repo": "llamastack/llama-stack-client-go",
+        "options": {"enable_v2": True, "back_compat_use_shared_package": False},
+    },
+}
+
+CLIENT_SETTINGS = {
+    "default_env_prefix": "LLAMA_STACK_CLIENT",
+    "opts": {
+        "api_key": {
+            "type": "string",
+            "read_env": "LLAMA_STACK_CLIENT_API_KEY",
+            "auth": {"security_scheme": "BearerAuth"},
+            "nullable": True,
+        }
+    },
+}
+
+ENVIRONMENTS = {"production": "http://any-hosted-llama-stack.com"}
+
+PAGINATION = [
+    {
+        "name": "datasets_iterrows",
+        "type": "offset",
+        "request": {
+            "dataset_id": {"type": "string"},
+            "start_index": {
+                "type": "integer",
+                "x-stainless-pagination-property": {"purpose": "offset_count_param"},
+            },
+            "limit": {"type": "integer"},
+        },
+        "response": {
+            "data": {"type": "array", "items": {"type": "object"}},
+            "next_index": {
+                "type": "integer",
+                "x-stainless-pagination-property": {"purpose": "offset_count_start_field"},
+            },
+        },
+    },
+    {
+        "name": "openai_cursor_page",
+        "type": "cursor",
+        "request": {
+            "limit": {"type": "integer"},
+            "after": {
+                "type": "string",
+                "x-stainless-pagination-property": {"purpose": "next_cursor_param"},
+            },
+        },
+        "response": {
+            "data": {"type": "array", "items": {}},
+            "has_more": {"type": "boolean"},
+            "last_id": {
+                "type": "string",
+                "x-stainless-pagination-property": {"purpose": "next_cursor_field"},
+            },
+        },
+    },
+]
+
+SETTINGS = {
+    "license": "MIT",
+    "unwrap_response_fields": ["data"],
+    "file_header": "Copyright (c) Meta Platforms, Inc. and affiliates.\n"
+    "All rights reserved.\n"
+    "\n"
+    "This source code is licensed under the terms described in the "
+    "LICENSE file in\n"
+    "the root directory of this source tree.\n",
+}
+
+OPENAPI = {
+    "transformations": [
+        {
+            "command": "mergeObject",
+            "reason": "Better return_type using enum",
+            "args": {
+                "target": ["$.components.schemas"],
+                "object": {
+                    "ReturnType": {
+                        "additionalProperties": False,
+                        "properties": {
+                            "type": {
+                                "enum": [
+                                    "string",
+                                    "number",
+                                    "boolean",
+                                    "array",
+                                    "object",
+                                    "json",
+                                    "union",
+                                    "chat_completion_input",
+                                    "completion_input",
+                                    "agent_turn_input",
+                                ]
+                            }
+                        },
+                        "required": ["type"],
+                        "type": "object",
+                    }
+                },
+            },
+        },
+        {
+            "command": "replaceProperties",
+            "reason": "Replace return type properties with better model (see above)",
+            "args": {
+                "filter": {
+                    "only": [
+                        "$.components.schemas.ScoringFn.properties.return_type",
+                        "$.components.schemas.RegisterScoringFunctionRequest.properties.return_type",
+                    ]
+                },
+                "value": {"$ref": "#/components/schemas/ReturnType"},
+            },
+        },
+        {
+            "command": "oneOfToAnyOf",
+            "reason": "Prism (mock server) doesn't like one of our "
+            "requests as it technically matches multiple "
+            "variants",
+        },
+    ]
+}
+
+README = {
+    "example_requests": {
+        "default": {
+            "type": "request",
+            "endpoint": "post /v1/chat/completions",
+            "params": {},
+        },
+        "headline": {"type": "request", "endpoint": "get /v1/models", "params": {}},
+        "pagination": {
+            "type": "request",
+            "endpoint": "post /v1/chat/completions",
+            "params": {},
+        },
+    }
+}
+
+ALL_RESOURCES = {
+    "$shared": {
+        "models": {
+            "interleaved_content_item": "InterleavedContentItem",
+            "interleaved_content": "InterleavedContent",
+            "param_type": "ParamType",
+            "safety_violation": "SafetyViolation",
+            "sampling_params": "SamplingParams",
+            "scoring_result": "ScoringResult",
+            "system_message": "SystemMessage",
+        }
+    },
+    "toolgroups": {
+        "models": {
+            "tool_group": "ToolGroup",
+            "list_tool_groups_response": "ListToolGroupsResponse",
+        },
+        "methods": {
+            "register": "post /v1/toolgroups",
+            "get": "get /v1/toolgroups/{toolgroup_id}",
+            "list": "get /v1/toolgroups",
+            "unregister": "delete /v1/toolgroups/{toolgroup_id}",
+        },
+    },
+    "tools": {
+        "methods": {
+            "get": "get /v1/tools/{tool_name}",
+            "list": {"paginated": False, "endpoint": "get /v1/tools"},
+        }
+    },
+    "tool_runtime": {
+        "models": {
+            "tool_def": "ToolDef",
+            "tool_invocation_result": "ToolInvocationResult",
+        },
+        "methods": {
+            "list_tools": {
+                "paginated": False,
+                "endpoint": "get /v1/tool-runtime/list-tools",
+            },
+            "invoke_tool": "post /v1/tool-runtime/invoke",
+        },
+    },
+    "responses": {
+        "models": {
+            "response_object_stream": "OpenAIResponseObjectStream",
+            "response_object": "OpenAIResponseObject",
+        },
+        "methods": {
+            "create": {
+                "type": "http",
+                "streaming": {
+                    "stream_event_model": "responses.response_object_stream",
+                    "param_discriminator": "stream",
+                },
+                "endpoint": "post /v1/responses",
+            },
+            "retrieve": "get /v1/responses/{response_id}",
+            "list": {"type": "http", "endpoint": "get /v1/responses"},
+            "delete": {
+                "type": "http",
+                "endpoint": "delete /v1/responses/{response_id}",
+            },
+        },
+        "subresources": {
+            "input_items": {
+                "methods": {
+                    "list": {
+                        "type": "http",
+                        "paginated": False,
+                        "endpoint": "get /v1/responses/{response_id}/input_items",
+                    }
+                }
+            }
+        },
+    },
+    "prompts": {
+        "models": {"prompt": "Prompt", "list_prompts_response": "ListPromptsResponse"},
+        "methods": {
+            "create": "post /v1/prompts",
+            "list": {"paginated": False, "endpoint": "get /v1/prompts"},
+            "retrieve": "get /v1/prompts/{prompt_id}",
+            "update": "post /v1/prompts/{prompt_id}",
+            "delete": "delete /v1/prompts/{prompt_id}",
+            "set_default_version": "post /v1/prompts/{prompt_id}/set-default-version",
+        },
+        "subresources": {
+            "versions": {
+                "methods": {
+                    "list": {
+                        "paginated": False,
+                        "endpoint": "get /v1/prompts/{prompt_id}/versions",
+                    }
+                }
+            }
+        },
+    },
+    "conversations": {
+        "models": {"conversation_object": "Conversation"},
+        "methods": {
+            "create": {"type": "http", "endpoint": "post /v1/conversations"},
+            "retrieve": "get /v1/conversations/{conversation_id}",
+            "update": {
+                "type": "http",
+                "endpoint": "post /v1/conversations/{conversation_id}",
+            },
+            "delete": {
+                "type": "http",
+                "endpoint": "delete /v1/conversations/{conversation_id}",
+            },
+        },
+        "subresources": {
+            "items": {
+                "methods": {
+                    "get": {
+                        "type": "http",
+                        "endpoint": "get /v1/conversations/{conversation_id}/items/{item_id}",
+                    },
+                    "list": {
+                        "type": "http",
+                        "endpoint": "get /v1/conversations/{conversation_id}/items",
+                    },
+                    "create": {
+                        "type": "http",
+                        "endpoint": "post /v1/conversations/{conversation_id}/items",
+                    },
+                    "delete": {
+                        "type": "http",
+                        "endpoint": "delete /v1/conversations/{conversation_id}/items/{item_id}",
+                    },
+                }
+            }
+        },
+    },
+    "inspect": {
+        "models": {
+            "healthInfo": "HealthInfo",
+            "providerInfo": "ProviderInfo",
+            "routeInfo": "RouteInfo",
+            "versionInfo": "VersionInfo",
+        },
+        "methods": {"health": "get /v1/health", "version": "get /v1/version"},
+    },
+    "embeddings": {
+        "models": {"create_embeddings_response": "OpenAIEmbeddingsResponse"},
+        "methods": {"create": "post /v1/embeddings"},
+    },
+    "chat": {
+        "models": {"chat_completion_chunk": "OpenAIChatCompletionChunk"},
+        "subresources": {
+            "completions": {
+                "methods": {
+                    "create": {
+                        "type": "http",
+                        "streaming": {
+                            "stream_event_model": "chat.chat_completion_chunk",
+                            "param_discriminator": "stream",
+                        },
+                        "endpoint": "post /v1/chat/completions",
+                    },
+                    "list": {
+                        "type": "http",
+                        "paginated": False,
+                        "endpoint": "get /v1/chat/completions",
+                    },
+                    "retrieve": {
+                        "type": "http",
+                        "endpoint": "get /v1/chat/completions/{completion_id}",
+                    },
+                }
+            }
+        },
+    },
+    "completions": {
+        "methods": {
+            "create": {
+                "type": "http",
+                "streaming": {"param_discriminator": "stream"},
+                "endpoint": "post /v1/completions",
+            }
+        }
+    },
+    "vector_io": {
+        "models": {"queryChunksResponse": "QueryChunksResponse"},
+        "methods": {
+            "insert": "post /v1/vector-io/insert",
+            "query": "post /v1/vector-io/query",
+        },
+    },
+    "vector_stores": {
+        "models": {
+            "vector_store": "VectorStoreObject",
+            "list_vector_stores_response": "VectorStoreListResponse",
+            "vector_store_delete_response": "VectorStoreDeleteResponse",
+            "vector_store_search_response": "VectorStoreSearchResponsePage",
+        },
+        "methods": {
+            "create": "post /v1/vector_stores",
+            "list": "get /v1/vector_stores",
+            "retrieve": "get /v1/vector_stores/{vector_store_id}",
+            "update": "post /v1/vector_stores/{vector_store_id}",
+            "delete": "delete /v1/vector_stores/{vector_store_id}",
+            "search": "post /v1/vector_stores/{vector_store_id}/search",
+        },
+        "subresources": {
+            "files": {
+                "models": {"vector_store_file": "VectorStoreFileObject"},
+                "methods": {
+                    "list": "get /v1/vector_stores/{vector_store_id}/files",
+                    "retrieve": "get /v1/vector_stores/{vector_store_id}/files/{file_id}",
+                    "update": "post /v1/vector_stores/{vector_store_id}/files/{file_id}",
+                    "delete": "delete /v1/vector_stores/{vector_store_id}/files/{file_id}",
+                    "create": "post /v1/vector_stores/{vector_store_id}/files",
+                    "content": "get /v1/vector_stores/{vector_store_id}/files/{file_id}/content",
+                },
+            },
+            "file_batches": {
+                "models": {
+                    "vector_store_file_batches": "VectorStoreFileBatchObject",
+                    "list_vector_store_files_in_batch_response": "VectorStoreFilesListInBatchResponse",
+                },
+                "methods": {
+                    "create": "post /v1/vector_stores/{vector_store_id}/file_batches",
+                    "retrieve": "get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
+                    "list_files": "get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
+                    "cancel": "post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
+                },
+            },
+        },
+    },
+    "models": {
+        "models": {
+            "model": "OpenAIModel",
+            "list_models_response": "OpenAIListModelsResponse",
+        },
+        "methods": {
+            "list": {"paginated": False, "endpoint": "get /v1/models"},
+            "retrieve": "get /v1/models/{model_id}",
+            "register": "post /v1/models",
+            "unregister": "delete /v1/models/{model_id}",
+        },
+        "subresources": {"openai": {"methods": {"list": {"paginated": False, "endpoint": "get /v1/models"}}}},
+    },
+    "providers": {
+        "models": {"list_providers_response": "ListProvidersResponse"},
+        "methods": {
+            "list": {"paginated": False, "endpoint": "get /v1/providers"},
+            "retrieve": "get /v1/providers/{provider_id}",
+        },
+    },
+    "routes": {
+        "models": {"list_routes_response": "ListRoutesResponse"},
+        "methods": {"list": {"paginated": False, "endpoint": "get /v1/inspect/routes"}},
+    },
+    "moderations": {
+        "models": {"create_response": "ModerationObject"},
+        "methods": {"create": "post /v1/moderations"},
+    },
+    "safety": {
+        "models": {"run_shield_response": "RunShieldResponse"},
+        "methods": {"run_shield": "post /v1/safety/run-shield"},
+    },
+    "shields": {
+        "models": {"shield": "Shield", "list_shields_response": "ListShieldsResponse"},
+        "methods": {
+            "retrieve": "get /v1/shields/{identifier}",
+            "list": {"paginated": False, "endpoint": "get /v1/shields"},
+            "register": "post /v1/shields",
+            "delete": "delete /v1/shields/{identifier}",
+        },
+    },
+    "scoring": {
+        "methods": {
+            "score": "post /v1/scoring/score",
+            "score_batch": "post /v1/scoring/score-batch",
+        }
+    },
+    "scoring_functions": {
+        "models": {
+            "scoring_fn": "ScoringFn",
+            "scoring_fn_params": "ScoringFnParams",
+            "list_scoring_functions_response": "ListScoringFunctionsResponse",
+        },
+        "methods": {
+            "retrieve": "get /v1/scoring-functions/{scoring_fn_id}",
+            "list": {"paginated": False, "endpoint": "get /v1/scoring-functions"},
+            "register": "post /v1/scoring-functions",
+            "unregister": "delete /v1/scoring-functions/{scoring_fn_id}",
+        },
+    },
+    "files": {
+        "models": {
+            "file": "OpenAIFileObject",
+            "list_files_response": "ListOpenAIFileResponse",
+            "delete_file_response": "OpenAIFileDeleteResponse",
+        },
+        "methods": {
+            "create": "post /v1/files",
+            "list": "get /v1/files",
+            "retrieve": "get /v1/files/{file_id}",
+            "delete": "delete /v1/files/{file_id}",
+            "content": "get /v1/files/{file_id}/content",
+        },
+    },
+    "batches": {
+        "methods": {
+            "create": "post /v1/batches",
+            "list": "get /v1/batches",
+            "retrieve": "get /v1/batches/{batch_id}",
+            "cancel": "post /v1/batches/{batch_id}/cancel",
+        }
+    },
+    "alpha": {
+        "subresources": {
+            "inference": {"methods": {"rerank": "post /v1alpha/inference/rerank"}},
+            "post_training": {
+                "models": {
+                    "algorithm_config": "AlgorithmConfig",
+                    "post_training_job": "PostTrainingJob",
+                    "list_post_training_jobs_response": "ListPostTrainingJobsResponse",
+                },
+                "methods": {
+                    "preference_optimize": "post /v1alpha/post-training/preference-optimize",
+                    "supervised_fine_tune": "post /v1alpha/post-training/supervised-fine-tune",
+                },
+                "subresources": {
+                    "job": {
+                        "methods": {
+                            "artifacts": "get /v1alpha/post-training/job/artifacts",
+                            "cancel": "post /v1alpha/post-training/job/cancel",
+                            "status": "get /v1alpha/post-training/job/status",
+                            "list": {
+                                "paginated": False,
+                                "endpoint": "get /v1alpha/post-training/jobs",
+                            },
+                        }
+                    }
+                },
+            },
+            "benchmarks": {
+                "models": {
+                    "benchmark": "Benchmark",
+                    "list_benchmarks_response": "ListBenchmarksResponse",
+                },
+                "methods": {
+                    "retrieve": "get /v1alpha/eval/benchmarks/{benchmark_id}",
+                    "list": {
+                        "paginated": False,
+                        "endpoint": "get /v1alpha/eval/benchmarks",
+                    },
+                    "register": "post /v1alpha/eval/benchmarks",
+                    "unregister": "delete /v1alpha/eval/benchmarks/{benchmark_id}",
+                },
+            },
+            "eval": {
+                "models": {
+                    "evaluate_response": "EvaluateResponse",
+                    "benchmark_config": "BenchmarkConfig",
+                    "job": "Job",
+                },
+                "methods": {
+                    "evaluate_rows": "post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+                    "run_eval": "post /v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+                    "evaluate_rows_alpha": "post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations",
+                    "run_eval_alpha": "post /v1alpha/eval/benchmarks/{benchmark_id}/jobs",
+                },
+                "subresources": {
+                    "jobs": {
+                        "methods": {
+                            "cancel": "delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+                            "status": "get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+                            "retrieve": "get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+                        }
+                    }
+                },
+            },
+        }
+    },
+    "beta": {
+        "subresources": {
+            "datasets": {
+                "models": {"list_datasets_response": "ListDatasetsResponse"},
+                "methods": {
+                    "register": "post /v1beta/datasets",
+                    "retrieve": "get /v1beta/datasets/{dataset_id}",
+                    "list": {"paginated": False, "endpoint": "get /v1beta/datasets"},
+                    "unregister": "delete /v1beta/datasets/{dataset_id}",
+                    "iterrows": "get /v1beta/datasetio/iterrows/{dataset_id}",
+                    "appendrows": "post /v1beta/datasetio/append-rows/{dataset_id}",
+                },
+            }
+        }
+    },
+}
+
+
+HTTP_METHODS = {"get", "post", "put", "patch", "delete", "options", "head"}
+
+
+@dataclass
+class Endpoint:
+    method: str
+    path: str
+    extra: dict[str, Any] = field(default_factory=dict)
+
+    @classmethod
+    def from_config(cls, value: Any) -> Endpoint:
+        if isinstance(value, str):
+            method, _, path = value.partition(" ")
+            return cls._from_parts(method, path)
+        if isinstance(value, dict) and "endpoint" in value:
+            method, _, path = value["endpoint"].partition(" ")
+            extra = {k: v for k, v in value.items() if k != "endpoint"}
+            endpoint = cls._from_parts(method, path)
+            endpoint.extra.update(extra)
+            return endpoint
+        raise ValueError(f"Unsupported endpoint value: {value!r}")
+
+    @classmethod
+    def _from_parts(cls, method: str, path: str) -> Endpoint:
+        method = method.strip().lower()
+        path = path.strip()
+        if method not in HTTP_METHODS:
+            raise ValueError(f"Unsupported HTTP method for Stainless config: {method!r}")
+        if not path.startswith("/"):
+            raise ValueError(f"Endpoint path must start with '/': {path!r}")
+        return cls(method=method, path=path)
+
+    def to_config(self) -> Any:
+        if not self.extra:
+            return f"{self.method} {self.path}"
+        data = dict(self.extra)
+        data["endpoint"] = f"{self.method} {self.path}"
+        return data
+
+    def route_key(self) -> str:
+        return f"{self.method} {self.path}"
+
+
+@dataclass
+class Resource:
+    models: dict[str, str] | None = None
+    methods: dict[str, Endpoint] = field(default_factory=dict)
+    subresources: dict[str, Resource] = field(default_factory=dict)
+
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> Resource:
+        models = data.get("models")
+        methods = {name: Endpoint.from_config(value) for name, value in data.get("methods", {}).items()}
+        subresources = {name: cls.from_dict(value) for name, value in data.get("subresources", {}).items()}
+        return cls(models=models, methods=methods, subresources=subresources)
+
+    def to_config(self) -> dict[str, Any]:
+        result: dict[str, Any] = {}
+        if self.models:
+            result["models"] = self.models
+        if self.methods:
+            result["methods"] = {name: endpoint.to_config() for name, endpoint in self.methods.items()}
+        if self.subresources:
+            result["subresources"] = {name: resource.to_config() for name, resource in self.subresources.items()}
+        return result
+
+    def collect_endpoint_paths(self) -> set[str]:
+        paths = {endpoint.route_key() for endpoint in self.methods.values()}
+        for subresource in self.subresources.values():
+            paths.update(subresource.collect_endpoint_paths())
+        return paths
+
+    def iter_endpoints(self, prefix: str) -> Iterator[tuple[str, str]]:
+        for method_name, endpoint in self.methods.items():
+            label = f"{prefix}.{method_name}" if prefix else method_name
+            yield endpoint.route_key(), label
+        for sub_name, subresource in self.subresources.items():
+            sub_prefix = f"{prefix}.{sub_name}" if prefix else sub_name
+            yield from subresource.iter_endpoints(sub_prefix)
+
+
+_RESOURCES = {name: Resource.from_dict(data) for name, data in ALL_RESOURCES.items()}
+
+
+def _load_openapi_paths(openapi_path: Path) -> set[str]:
+    spec = yaml.safe_load(openapi_path.read_text()) or {}
+    paths: set[str] = set()
+    for path, path_item in (spec.get("paths") or {}).items():
+        if not isinstance(path_item, dict):
+            continue
+        for method, operation in path_item.items():
+            if not isinstance(operation, dict):
+                continue
+            paths.add(f"{str(method).lower()} {path}")
+    return paths
+
+
+@dataclass(frozen=True)
+class StainlessConfig:
+    organization: dict[str, Any]
+    security: list[Any]
+    security_schemes: dict[str, Any]
+    targets: dict[str, Any]
+    client_settings: dict[str, Any]
+    environments: dict[str, Any]
+    pagination: list[dict[str, Any]]
+    settings: dict[str, Any]
+    openapi: dict[str, Any]
+    readme: dict[str, Any]
+    resources: dict[str, Resource]
+
+    @classmethod
+    def make(cls) -> StainlessConfig:
+        return cls(
+            organization=ORGANIZATION,
+            security=SECURITY,
+            security_schemes=SECURITY_SCHEMES,
+            targets=TARGETS,
+            client_settings=CLIENT_SETTINGS,
+            environments=ENVIRONMENTS,
+            pagination=PAGINATION,
+            settings=SETTINGS,
+            openapi=OPENAPI,
+            readme=README,
+            resources=dict(_RESOURCES),
+        )
+
+    def referenced_paths(self) -> set[str]:
+        paths: set[str] = set()
+        for resource in self.resources.values():
+            paths.update(resource.collect_endpoint_paths())
+        paths.update(self.readme_endpoint_paths())
+        return paths
+
+    def readme_endpoint_paths(self) -> set[str]:
+        example_requests = self.readme.get("example_requests", {}) if self.readme else {}
+        paths: set[str] = set()
+        for entry in example_requests.values():
+            endpoint = entry.get("endpoint") if isinstance(entry, dict) else None
+            if isinstance(endpoint, str):
+                method, _, route = endpoint.partition(" ")
+                method = method.strip().lower()
+                route = route.strip()
+                if method and route:
+                    paths.add(f"{method} {route}")
+        return paths
+
+    def endpoint_map(self) -> dict[str, list[str]]:
+        mapping: dict[str, list[str]] = {}
+        for resource_name, resource in self.resources.items():
+            for route, label in resource.iter_endpoints(resource_name):
+                mapping.setdefault(route, []).append(label)
+        return mapping
+
+    def validate_unique_endpoints(self) -> None:
+        duplicates: dict[str, list[str]] = {}
+        for route, labels in self.endpoint_map().items():
+            top_levels = {label.split(".", 1)[0] for label in labels}
+            if len(top_levels) > 1:
+                duplicates[route] = labels
+        if duplicates:
+            formatted = "\n".join(
+                f"  - {route} defined in: {', '.join(sorted(labels))}" for route, labels in sorted(duplicates.items())
+            )
+            raise ValueError("Duplicate endpoints found across resources:\n" + formatted)
+
+    def validate_readme_endpoints(self) -> None:
+        resource_paths: set[str] = set()
+        for resource in self.resources.values():
+            resource_paths.update(resource.collect_endpoint_paths())
+        missing = sorted(path for path in self.readme_endpoint_paths() if path not in resource_paths)
+        if missing:
+            formatted = "\n".join(f"  - {path}" for path in missing)
+            raise ValueError("README example endpoints are not present in Stainless resources:\n" + formatted)
+
+    def to_dict(self) -> dict[str, Any]:
+        cfg: dict[str, Any] = {}
+        for section in SECTION_ORDER:
+            if section == "resources":
+                cfg[section] = {name: resource.to_config() for name, resource in self.resources.items()}
+                continue
+            cfg[section] = getattr(self, section)
+        return cfg
+
+    def validate_against_openapi(self, openapi_path: Path) -> None:
+        if not openapi_path.exists():
+            raise FileNotFoundError(f"OpenAPI spec not found at {openapi_path}")
+        spec_paths = _load_openapi_paths(openapi_path)
+        config_paths = self.referenced_paths()
+        missing = sorted(path for path in config_paths if path not in spec_paths)
+        if missing:
+            formatted = "\n".join(f"  - {path}" for path in missing)
+            raise ValueError("Stainless config references missing endpoints:\n" + formatted)
+
+    def validate(self, openapi_path: Path | None = None) -> None:
+        self.validate_unique_endpoints()
+        self.validate_readme_endpoints()
+        if openapi_path is not None:
+            self.validate_against_openapi(openapi_path)
+
+
+def build_config() -> dict[str, Any]:
+    return StainlessConfig.make().to_dict()
+
+
+def write_config(repo_root: Path, openapi_path: Path | None = None) -> Path:
+    stainless_config = StainlessConfig.make()
+    spec_path = (openapi_path or (repo_root / "client-sdks" / "stainless" / "openapi.yml")).resolve()
+    stainless_config.validate(spec_path)
+    yaml_text = yaml.safe_dump(stainless_config.to_dict(), sort_keys=False)
+    output = repo_root / "client-sdks" / "stainless" / "config.yml"
+    output.write_text(HEADER + yaml_text)
+    return output
+
+
+def main() -> None:
+    repo_root = Path(__file__).resolve().parents[3]
+    output = write_config(repo_root)
+    print(f"Wrote Stainless config: {output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py
index d62d626ad..0eec46bc2 100755
--- a/scripts/provider_codegen.py
+++ b/scripts/provider_codegen.py
@@ -8,7 +8,8 @@
 import subprocess
 import sys
 from pathlib import Path
-from typing import Any
+from types import UnionType
+from typing import Annotated, Any, Union, get_args, get_origin
 
 from pydantic_core import PydanticUndefined
 from rich.progress import Progress, SpinnerColumn, TextColumn
@@ -51,6 +52,41 @@ class ChangedPathTracker:
         return self._changed_paths
 
 
+def extract_type_annotation(annotation: Any) -> str:
+    """extract a type annotation into a clean string representation."""
+    if annotation is None:
+        return "Any"
+
+    if annotation is type(None):
+        return "None"
+
+    origin = get_origin(annotation)
+    args = get_args(annotation)
+
+    # recursive workaround for Annotated types to ignore FieldInfo part
+    if origin is Annotated and args:
+        return extract_type_annotation(args[0])
+
+    if origin in [Union, UnionType]:
+        non_none_args = [arg for arg in args if arg is not type(None)]
+        has_none = len(non_none_args) < len(args)
+
+        if len(non_none_args) == 1:
+            formatted = extract_type_annotation(non_none_args[0])
+            return f"{formatted} | None" if has_none else formatted
+        else:
+            formatted_args = [extract_type_annotation(arg) for arg in non_none_args]
+            result = " | ".join(formatted_args)
+            return f"{result} | None" if has_none else result
+
+    if origin is not None and args:
+        origin_name = getattr(origin, "__name__", str(origin))
+        formatted_args = [extract_type_annotation(arg) for arg in args]
+        return f"{origin_name}[{', '.join(formatted_args)}]"
+
+    return annotation.__name__ if hasattr(annotation, "__name__") else str(annotation)
+
+
 def get_config_class_info(config_class_path: str) -> dict[str, Any]:
     """Extract configuration information from a config class."""
     try:
@@ -78,14 +114,8 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]:
             for field_name, field in config_class.model_fields.items():
                 if getattr(field, "exclude", False):
                     continue
-                field_type = str(field.annotation) if field.annotation else "Any"
 
-                # this string replace is ridiculous
-                field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "")
-                field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "")
-                field_type = field_type.replace("llama_stack_api.inference.", "")
-                field_type = field_type.replace("llama_stack.providers.", "")
-                field_type = field_type.replace("llama_stack_api.datatypes.", "")
+                field_type = extract_type_annotation(field.annotation)
 
                 default_value = field.default
                 if field.default_factory is not None:
@@ -345,8 +375,16 @@ def generate_index_docs(api_name: str, api_docstring: str | None, provider_entri
     # Add YAML frontmatter for index
     md_lines.append("---")
     if api_docstring:
-        clean_desc = api_docstring.strip().replace('"', '\\"')
-        md_lines.append(f'description: "{clean_desc}"')
+        # Handle multi-line descriptions in YAML
+        if "\n" in api_docstring.strip():
+            md_lines.append("description: |")
+            for line in api_docstring.strip().split("\n"):
+                # Avoid trailing whitespace by only adding spaces to non-empty lines
+                md_lines.append(f"  {line}" if line.strip() else "")
+        else:
+            # For single line descriptions, format properly for YAML
+            clean_desc = api_docstring.strip().replace('"', '\\"')
+            md_lines.append(f'description: "{clean_desc}"')
     md_lines.append(f"sidebar_label: {sidebar_label}")
     md_lines.append(f"title: {api_name.title()}")
     md_lines.append("---")
diff --git a/scripts/run_openapi_generator.sh b/scripts/run_openapi_generator.sh
index 946b2886f..d4e3b2ec7 100755
--- a/scripts/run_openapi_generator.sh
+++ b/scripts/run_openapi_generator.sh
@@ -17,3 +17,5 @@ PYTHONPATH=$PYTHONPATH:$stack_dir \
   python3 -m scripts.openapi_generator "$stack_dir"/docs/static
 
 cp "$stack_dir"/docs/static/stainless-llama-stack-spec.yaml "$stack_dir"/client-sdks/stainless/openapi.yml
+PYTHONPATH=$PYTHONPATH:$stack_dir \
+  python3 -m scripts.openapi_generator.stainless_config.generate_config
diff --git a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
index 649bddecb..97b044dbf 100644
--- a/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
+++ b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
@@ -48,16 +48,10 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
         if mcp_endpoint is None:
             raise ValueError("mcp_endpoint is required")
 
-        # Phase 1: Support both old header-based auth AND new authorization parameter
-        # Get headers and auth from provider data (old approach)
-        provider_headers, provider_auth = await self.get_headers_from_request(mcp_endpoint.uri)
+        # Get other headers from provider data (but NOT authorization)
+        provider_headers = await self.get_headers_from_request(mcp_endpoint.uri)
 
-        # New authorization parameter takes precedence over provider data
-        final_authorization = authorization or provider_auth
-
-        return await list_mcp_tools(
-            endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=final_authorization
-        )
+        return await list_mcp_tools(endpoint=mcp_endpoint.uri, headers=provider_headers, authorization=authorization)
 
     async def invoke_tool(
         self, tool_name: str, kwargs: dict[str, Any], authorization: str | None = None
@@ -69,39 +63,38 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
         if urlparse(endpoint).scheme not in ("http", "https"):
             raise ValueError(f"Endpoint {endpoint} is not a valid HTTP(S) URL")
 
-        # Phase 1: Support both old header-based auth AND new authorization parameter
-        # Get headers and auth from provider data (old approach)
-        provider_headers, provider_auth = await self.get_headers_from_request(endpoint)
-
-        # New authorization parameter takes precedence over provider data
-        final_authorization = authorization or provider_auth
+        # Get other headers from provider data (but NOT authorization)
+        provider_headers = await self.get_headers_from_request(endpoint)
 
         return await invoke_mcp_tool(
             endpoint=endpoint,
             tool_name=tool_name,
             kwargs=kwargs,
             headers=provider_headers,
-            authorization=final_authorization,
+            authorization=authorization,
         )
 
-    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> tuple[dict[str, str], str | None]:
+    async def get_headers_from_request(self, mcp_endpoint_uri: str) -> dict[str, str]:
         """
-        Extract headers and authorization from request provider data (Phase 1 backward compatibility).
+        Extract headers from request provider data, excluding authorization.
 
-        Phase 1: Temporarily allows Authorization to be passed via mcp_headers for backward compatibility.
-        Phase 2: Will enforce that Authorization should use the dedicated authorization parameter instead.
+        Authorization must be provided via the dedicated authorization parameter.
+        If Authorization is found in mcp_headers, raise an error to guide users to the correct approach.
+
+        Args:
+            mcp_endpoint_uri: The MCP endpoint URI to match against provider data
 
         Returns:
-            Tuple of (headers_dict, authorization_token)
-            - headers_dict: All headers except Authorization
-            - authorization_token: Token from Authorization header (with "Bearer " prefix removed), or None
+            dict[str, str]: Headers dictionary (without Authorization)
+
+        Raises:
+            ValueError: If Authorization header is found in mcp_headers
         """
 
         def canonicalize_uri(uri: str) -> str:
             return f"{urlparse(uri).netloc or ''}/{urlparse(uri).path or ''}"
 
         headers = {}
-        authorization = None
 
         provider_data = self.get_request_provider_data()
         if provider_data and hasattr(provider_data, "mcp_headers") and provider_data.mcp_headers:
@@ -109,17 +102,14 @@ class ModelContextProtocolToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime
                 if canonicalize_uri(uri) != canonicalize_uri(mcp_endpoint_uri):
                     continue
 
-                # Phase 1: Extract Authorization from mcp_headers for backward compatibility
-                # (Phase 2 will reject this and require the dedicated authorization parameter)
+                # Reject Authorization in mcp_headers - must use authorization parameter
                 for key in values.keys():
                     if key.lower() == "authorization":
-                        # Extract authorization token and strip "Bearer " prefix if present
-                        auth_value = values[key]
-                        if auth_value.startswith("Bearer "):
-                            authorization = auth_value[7:]  # Remove "Bearer " prefix
-                        else:
-                            authorization = auth_value
-                    else:
-                        headers[key] = values[key]
+                        raise ValueError(
+                            "Authorization cannot be provided via mcp_headers in provider_data. "
+                            "Please use the dedicated 'authorization' parameter instead. "
+                            "Example: tool_runtime.invoke_tool(..., authorization='your-token')"
+                        )
+                    headers[key] = values[key]
 
-        return headers, authorization
+        return headers
diff --git a/src/llama_stack_api/__init__.py b/src/llama_stack_api/__init__.py
index b7efcc543..b6fe2fd23 100644
--- a/src/llama_stack_api/__init__.py
+++ b/src/llama_stack_api/__init__.py
@@ -22,7 +22,7 @@ and considered a code smell. All exported symbols are explicitly listed in __all
 __version__ = "0.4.0.dev0"
 
 # Import submodules for those who need them
-from . import common, strong_typing  # noqa: F401
+from . import common  # noqa: F401
 
 # Import all public API symbols
 from .agents import Agents, ResponseGuardrail, ResponseGuardrailSpec
@@ -393,8 +393,6 @@ from .shields import (
     ShieldInput,
     Shields,
 )
-
-# Import from strong_typing
 from .tools import (
     ListToolDefsResponse,
     ListToolGroupsResponse,
@@ -449,7 +447,6 @@ from .version import (
 __all__ = [
     # Submodules
     "common",
-    "strong_typing",
     # Version constants
     "LLAMA_STACK_API_V1",
     "LLAMA_STACK_API_V1ALPHA",
diff --git a/src/llama_stack_api/strong_typing/py.typed b/src/llama_stack_api/strong_typing/py.typed
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/integration/inference/test_tools_with_schemas.py b/tests/integration/inference/test_tools_with_schemas.py
index 5b6e69ae3..ab033c381 100644
--- a/tests/integration/inference/test_tools_with_schemas.py
+++ b/tests/integration/inference/test_tools_with_schemas.py
@@ -9,8 +9,6 @@ Integration tests for inference/chat completion with JSON Schema-based tools.
 Tests that tools pass through correctly to various LLM providers.
 """
 
-import json
-
 import pytest
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
@@ -193,22 +191,11 @@ class TestMCPToolsInChatCompletion:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
+        # Use the dedicated authorization parameter
         # Get the tools from MCP
         tools_response = llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Convert to OpenAI format for inference
diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py
index 1b7f509d2..074a92afb 100644
--- a/tests/integration/tool_runtime/test_mcp.py
+++ b/tests/integration/tool_runtime/test_mcp.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import json
-
 import pytest
 from llama_stack_client.lib.agents.agent import Agent
 from llama_stack_client.lib.agents.turn_events import StepCompleted, StepProgress, ToolCallIssuedDelta
@@ -37,32 +35,20 @@ def test_mcp_invocation(llama_stack_client, text_model_id, mcp_server):
         mcp_endpoint=dict(uri=uri),
     )
 
-    # Use old header-based approach for Phase 1 (backward compatibility)
-    provider_data = {
-        "mcp_headers": {
-            uri: {
-                "Authorization": f"Bearer {AUTH_TOKEN}",
-            },
-        },
-    }
-    auth_headers = {
-        "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-    }
-
-    with pytest.raises(Exception, match="Unauthorized"):
-        llama_stack_client.tools.list(toolgroup_id=test_toolgroup_id)
-
-    tools_list = llama_stack_client.tools.list(
-        toolgroup_id=test_toolgroup_id,
-        extra_headers=auth_headers,  # Use old header-based approach
+    # Use the dedicated authorization parameter (no more provider_data headers)
+    # This tests direct tool_runtime.invoke_tool API calls
+    tools_list = llama_stack_client.tool_runtime.list_tools(
+        tool_group_id=test_toolgroup_id,
+        authorization=AUTH_TOKEN,  # Use dedicated authorization parameter
     )
     assert len(tools_list) == 2
     assert {t.name for t in tools_list} == {"greet_everyone", "get_boiling_point"}
 
+    # Invoke tool with authorization parameter
     response = llama_stack_client.tool_runtime.invoke_tool(
         tool_name="greet_everyone",
         kwargs=dict(url="https://www.google.com"),
-        extra_headers=auth_headers,  # Use old header-based approach
+        authorization=AUTH_TOKEN,  # Use dedicated authorization parameter
     )
     content = response.content
     assert len(content) == 1
diff --git a/tests/integration/tool_runtime/test_mcp_json_schema.py b/tests/integration/tool_runtime/test_mcp_json_schema.py
index 719588c7f..6be71caaf 100644
--- a/tests/integration/tool_runtime/test_mcp_json_schema.py
+++ b/tests/integration/tool_runtime/test_mcp_json_schema.py
@@ -8,8 +8,6 @@
 Tests $ref, $defs, and other JSON Schema features through MCP integration.
 """
 
-import json
-
 import pytest
 
 from llama_stack.core.library_client import LlamaStackAsLibraryClient
@@ -122,22 +120,11 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
+        # Use the dedicated authorization parameter
         # List runtime tools
         response = llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         tools = response
@@ -173,22 +160,11 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
+        # Use the dedicated authorization parameter
         # List tools
         response = llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Find book_flight tool (which should have $ref/$defs)
@@ -230,21 +206,10 @@ class TestMCPSchemaPreservation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
+        # Use the dedicated authorization parameter
         response = llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Find get_weather tool
@@ -284,22 +249,10 @@ class TestMCPToolInvocation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
-        # List tools to populate the tool index
+        # Use the dedicated authorization parameter
         llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Invoke tool with complex nested data
@@ -311,7 +264,7 @@ class TestMCPToolInvocation:
                     "shipping": {"address": {"street": "123 Main St", "city": "San Francisco", "zipcode": "94102"}},
                 }
             },
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Should succeed without schema validation errors
@@ -337,29 +290,17 @@ class TestMCPToolInvocation:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
-        # List tools to populate the tool index
+        # Use the dedicated authorization parameter
         llama_stack_client.tool_runtime.list_tools(
             tool_group_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         # Test with email format
         result_email = llama_stack_client.tool_runtime.invoke_tool(
             tool_name="flexible_contact",
             kwargs={"contact_info": "user@example.com"},
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         assert result_email.error_message is None
@@ -368,7 +309,7 @@ class TestMCPToolInvocation:
         result_phone = llama_stack_client.tool_runtime.invoke_tool(
             tool_name="flexible_contact",
             kwargs={"contact_info": "+15551234567"},
-            extra_headers=auth_headers,
+            authorization=AUTH_TOKEN,
         )
 
         assert result_phone.error_message is None
@@ -400,21 +341,10 @@ class TestAgentWithMCPTools:
             mcp_endpoint=dict(uri=uri),
         )
 
-        # Use old header-based approach for Phase 1 (backward compatibility)
-        provider_data = {
-            "mcp_headers": {
-                uri: {
-                    "Authorization": f"Bearer {AUTH_TOKEN}",
-                },
-            },
-        }
-        auth_headers = {
-            "X-LlamaStack-Provider-Data": json.dumps(provider_data),
-        }
-
-        tools_list = llama_stack_client.tools.list(
-            toolgroup_id=test_toolgroup_id,
-            extra_headers=auth_headers,
+        # Use the dedicated authorization parameter
+        tools_list = llama_stack_client.tool_runtime.list_tools(
+            tool_group_id=test_toolgroup_id,
+            authorization=AUTH_TOKEN,
         )
         tool_defs = [
             {
diff --git a/uv.lock b/uv.lock
index a343eb5d8..8c648c362 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2165,10 +2165,8 @@ requires-dist = [
     { name = "python-dotenv" },
     { name = "python-multipart", specifier = ">=0.0.20" },
     { name = "pyyaml", specifier = ">=6.0" },
-    { name = "pyyaml", specifier = ">=6.0.2" },
     { name = "rich" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
-    { name = "starlette" },
     { name = "starlette", specifier = ">=0.49.1" },
     { name = "termcolor" },
     { name = "tiktoken" },
@@ -4656,6 +4654,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6b/fa/3234f913fe9a6525a7b97c6dad1f51e72b917e6872e051a5e2ffd8b16fbb/ruamel.yaml.clib-0.2.14-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:70eda7703b8126f5e52fcf276e6c0f40b0d314674f896fc58c47b0aef2b9ae83", size = 137970, upload-time = "2025-09-22T19:51:09.472Z" },
     { url = "https://files.pythonhosted.org/packages/ef/ec/4edbf17ac2c87fa0845dd366ef8d5852b96eb58fcd65fc1ecf5fe27b4641/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a0cb71ccc6ef9ce36eecb6272c81afdc2f565950cdcec33ae8e6cd8f7fc86f27", size = 739639, upload-time = "2025-09-22T19:51:10.566Z" },
     { url = "https://files.pythonhosted.org/packages/15/18/b0e1fafe59051de9e79cdd431863b03593ecfa8341c110affad7c8121efc/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7cb9ad1d525d40f7d87b6df7c0ff916a66bc52cb61b66ac1b2a16d0c1b07640", size = 764456, upload-time = "2025-09-22T19:51:11.736Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/cd/150fdb96b8fab27fe08d8a59fe67554568727981806e6bc2677a16081ec7/ruamel_yaml_clib-0.2.14-cp314-cp314-win32.whl", hash = "sha256:9b4104bf43ca0cd4e6f738cb86326a3b2f6eef00f417bd1e7efb7bdffe74c539", size = 102394, upload-time = "2025-11-14T21:57:36.703Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/e6/a3fa40084558c7e1dc9546385f22a93949c890a8b2e445b2ba43935f51da/ruamel_yaml_clib-0.2.14-cp314-cp314-win_amd64.whl", hash = "sha256:13997d7d354a9890ea1ec5937a219817464e5cc344805b37671562a401ca3008", size = 122673, upload-time = "2025-11-14T21:57:38.177Z" },
 ]
 
 [[package]]