From 8e5ed739ec11984e769be113a2a2971f1611dee1 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 27 Oct 2025 11:27:58 -0700
Subject: [PATCH] chore(package): migrate to src/ layout

Moved package code from llama_stack/ to src/llama_stack/ following Python
packaging best practices. Updated pyproject.toml, MANIFEST.in, and tool
configurations accordingly.

Public API and import paths remain unchanged. Developers will need to
reinstall in editable mode after pulling this change.

Also updated paths in pre-commit config, scripts, and GitHub workflows.
---
 .github/workflows/integration-auth-tests.yml  |   2 +-
 .../workflows/integration-sql-store-tests.yml |   2 +-
 .github/workflows/integration-tests.yml       |   2 +-
 .../workflows/integration-vector-io-tests.yml |   2 +-
 .github/workflows/pre-commit.yml              |   4 +-
 .github/workflows/precommit-trigger.yml       |   4 +-
 .github/workflows/providers-build.yml         |  28 +-
 .github/workflows/providers-list-deps.yml     |  24 +-
 .github/workflows/python-build-test.yml       |   2 +-
 .../test-external-provider-module.yml         |   2 +-
 .github/workflows/test-external.yml           |   2 +-
 .github/workflows/ui-unit-tests.yml           |  12 +-
 .github/workflows/unit-tests.yml              |   2 +-
 .pre-commit-config.yaml                       |  12 +-
 MANIFEST.in                                   |  18 +-
 pyproject.toml                                | 152 +++++-----
 scripts/check-init-py.sh                      |   2 +-
 scripts/distro_codegen.py                     |   2 +-
 scripts/run-ui-linter.sh                      |   2 +-
 {llama_stack => src/llama_stack}/__init__.py  |   0
 .../llama_stack}/apis/__init__.py             |   0
 .../llama_stack}/apis/agents/__init__.py      |   0
 .../llama_stack}/apis/agents/agents.py        |   0
 .../apis/agents/openai_responses.py           |   0
 .../llama_stack}/apis/batches/__init__.py     |   0
 .../llama_stack}/apis/batches/batches.py      |   0
 .../llama_stack}/apis/benchmarks/__init__.py  |   0
 .../apis/benchmarks/benchmarks.py             |   0
 .../llama_stack}/apis/common/__init__.py      |   0
 .../llama_stack}/apis/common/content_types.py |   0
 .../llama_stack}/apis/common/errors.py        |   0
 .../llama_stack}/apis/common/job_types.py     |   0
 .../llama_stack}/apis/common/responses.py     |   0
 .../apis/common/training_types.py             |   0
 .../llama_stack}/apis/common/type_system.py   |   0
 .../apis/conversations/__init__.py            |   0
 .../apis/conversations/conversations.py       |   0
 .../llama_stack}/apis/datasetio/__init__.py   |   0
 .../llama_stack}/apis/datasetio/datasetio.py  |   0
 .../llama_stack}/apis/datasets/__init__.py    |   0
 .../llama_stack}/apis/datasets/datasets.py    |   0
 .../llama_stack}/apis/datatypes.py            |   0
 .../llama_stack}/apis/eval/__init__.py        |   0
 .../llama_stack}/apis/eval/eval.py            |   0
 .../llama_stack}/apis/files/__init__.py       |   0
 .../llama_stack}/apis/files/files.py          |   0
 .../llama_stack}/apis/inference/__init__.py   |   0
 .../apis/inference/event_logger.py            |   0
 .../llama_stack}/apis/inference/inference.py  |   0
 .../llama_stack}/apis/inspect/__init__.py     |   0
 .../llama_stack}/apis/inspect/inspect.py      |   0
 .../llama_stack}/apis/models/__init__.py      |   0
 .../llama_stack}/apis/models/models.py        |   0
 .../apis/post_training/__init__.py            |   0
 .../apis/post_training/post_training.py       |   0
 .../llama_stack}/apis/prompts/__init__.py     |   0
 .../llama_stack}/apis/prompts/prompts.py      |   0
 .../llama_stack}/apis/providers/__init__.py   |   0
 .../llama_stack}/apis/providers/providers.py  |   0
 .../llama_stack}/apis/resource.py             |   0
 .../llama_stack}/apis/safety/__init__.py      |   0
 .../llama_stack}/apis/safety/safety.py        |   0
 .../llama_stack}/apis/scoring/__init__.py     |   0
 .../llama_stack}/apis/scoring/scoring.py      |   0
 .../apis/scoring_functions/__init__.py        |   0
 .../scoring_functions/scoring_functions.py    |   0
 .../llama_stack}/apis/shields/__init__.py     |   0
 .../llama_stack}/apis/shields/shields.py      |   0
 .../synthetic_data_generation/__init__.py     |   0
 .../synthetic_data_generation.py              |   0
 .../llama_stack}/apis/telemetry/__init__.py   |   0
 .../llama_stack}/apis/telemetry/telemetry.py  |   0
 .../llama_stack}/apis/tools/__init__.py       |   0
 .../llama_stack}/apis/tools/rag_tool.py       |   0
 .../llama_stack}/apis/tools/tools.py          |   0
 .../llama_stack}/apis/vector_io/__init__.py   |   0
 .../llama_stack}/apis/vector_io/vector_io.py  |   0
 .../apis/vector_stores/__init__.py            |   0
 .../apis/vector_stores/vector_stores.py       |   0
 .../llama_stack}/apis/version.py              |   0
 .../llama_stack}/cli/__init__.py              |   0
 {llama_stack => src/llama_stack}/cli/llama.py |   0
 .../llama_stack}/cli/scripts/__init__.py      |   0
 .../scripts/install-wheel-from-presigned.sh   |   0
 .../llama_stack}/cli/scripts/run.py           |   0
 .../llama_stack}/cli/stack/__init__.py        |   0
 .../llama_stack}/cli/stack/_list_deps.py      |   0
 .../llama_stack}/cli/stack/list_apis.py       |   0
 .../llama_stack}/cli/stack/list_deps.py       |   0
 .../llama_stack}/cli/stack/list_providers.py  |   0
 .../llama_stack}/cli/stack/list_stacks.py     |   0
 .../llama_stack}/cli/stack/remove.py          |   0
 .../llama_stack}/cli/stack/run.py             |   0
 .../llama_stack}/cli/stack/stack.py           |   0
 .../llama_stack}/cli/stack/utils.py           |   0
 .../llama_stack}/cli/subcommand.py            |   0
 {llama_stack => src/llama_stack}/cli/table.py |   0
 {llama_stack => src/llama_stack}/cli/utils.py |   0
 .../llama_stack}/core/__init__.py             |   0
 .../core/access_control/__init__.py           |   0
 .../core/access_control/access_control.py     |   0
 .../core/access_control/conditions.py         |   0
 .../core/access_control/datatypes.py          |   0
 .../llama_stack}/core/build.py                |   0
 .../llama_stack}/core/client.py               |   0
 .../llama_stack}/core/common.sh               |   0
 .../llama_stack}/core/configure.py            |   0
 .../core/conversations/__init__.py            |   0
 .../core/conversations/conversations.py       |   0
 .../llama_stack}/core/datatypes.py            |   0
 .../llama_stack}/core/distribution.py         |   0
 .../llama_stack}/core/external.py             |   0
 .../llama_stack}/core/id_generation.py        |   0
 .../llama_stack}/core/inspect.py              |   0
 .../llama_stack}/core/library_client.py       |   0
 .../llama_stack}/core/prompts/__init__.py     |   0
 .../llama_stack}/core/prompts/prompts.py      |   0
 .../llama_stack}/core/providers.py            |   0
 .../llama_stack}/core/request_headers.py      |   0
 .../llama_stack}/core/resolver.py             |   0
 .../llama_stack}/core/routers/__init__.py     |   0
 .../llama_stack}/core/routers/datasets.py     |   0
 .../llama_stack}/core/routers/eval_scoring.py |   0
 .../llama_stack}/core/routers/inference.py    |   0
 .../llama_stack}/core/routers/safety.py       |   0
 .../llama_stack}/core/routers/tool_runtime.py |   0
 .../llama_stack}/core/routers/vector_io.py    |   0
 .../core/routing_tables/__init__.py           |   0
 .../core/routing_tables/benchmarks.py         |   0
 .../core/routing_tables/common.py             |   0
 .../core/routing_tables/datasets.py           |   0
 .../core/routing_tables/models.py             |   0
 .../core/routing_tables/scoring_functions.py  |   0
 .../core/routing_tables/shields.py            |   0
 .../core/routing_tables/toolgroups.py         |   0
 .../core/routing_tables/vector_stores.py      |   0
 .../llama_stack}/core/server/__init__.py      |   0
 .../llama_stack}/core/server/auth.py          |   0
 .../core/server/auth_providers.py             |   0
 .../llama_stack}/core/server/quota.py         |   0
 .../llama_stack}/core/server/routes.py        |   0
 .../llama_stack}/core/server/server.py        |   0
 .../llama_stack}/core/server/tracing.py       |   0
 .../llama_stack}/core/stack.py                |   0
 .../llama_stack}/core/start_stack.sh          |   0
 .../llama_stack}/core/storage/__init__.py     |   0
 .../llama_stack}/core/storage/datatypes.py    |   0
 .../llama_stack}/core/store/__init__.py       |   0
 .../llama_stack}/core/store/registry.py       |   0
 .../llama_stack}/core/telemetry/__init__.py   |   0
 .../llama_stack}/core/telemetry/telemetry.py  |   0
 .../core/telemetry/trace_protocol.py          |   0
 .../llama_stack}/core/telemetry/tracing.py    |   0
 .../llama_stack}/core/testing_context.py      |   0
 .../llama_stack}/core/ui/Containerfile        |   0
 .../llama_stack}/core/ui/README.md            |   0
 .../llama_stack}/core/ui/__init__.py          |   0
 .../llama_stack}/core/ui/app.py               |   0
 .../llama_stack}/core/ui/modules/__init__.py  |   0
 .../llama_stack}/core/ui/modules/api.py       |   0
 .../llama_stack}/core/ui/modules/utils.py     |   0
 .../llama_stack}/core/ui/page/__init__.py     |   0
 .../core/ui/page/distribution/__init__.py     |   0
 .../core/ui/page/distribution/datasets.py     |   0
 .../core/ui/page/distribution/eval_tasks.py   |   0
 .../core/ui/page/distribution/models.py       |   0
 .../core/ui/page/distribution/providers.py    |   0
 .../core/ui/page/distribution/resources.py    |   0
 .../ui/page/distribution/scoring_functions.py |   0
 .../core/ui/page/distribution/shields.py      |   0
 .../core/ui/page/evaluations/__init__.py      |   0
 .../core/ui/page/evaluations/app_eval.py      |   0
 .../core/ui/page/evaluations/native_eval.py   |   0
 .../core/ui/page/playground/__init__.py       |   0
 .../core/ui/page/playground/chat.py           |   0
 .../core/ui/page/playground/tools.py          |   0
 .../llama_stack}/core/ui/requirements.txt     |   0
 .../llama_stack}/core/utils/__init__.py       |   0
 .../llama_stack}/core/utils/config.py         |   0
 .../llama_stack}/core/utils/config_dirs.py    |   0
 .../core/utils/config_resolution.py           |   0
 .../llama_stack}/core/utils/context.py        |   0
 .../llama_stack}/core/utils/dynamic.py        |   0
 .../llama_stack}/core/utils/exec.py           |   0
 .../llama_stack}/core/utils/image_types.py    |   0
 .../llama_stack}/core/utils/model_utils.py    |   0
 .../core/utils/prompt_for_config.py           |   0
 .../llama_stack}/core/utils/serialize.py      |   0
 .../llama_stack}/distributions/__init__.py    |   0
 .../distributions/ci-tests/__init__.py        |   0
 .../distributions/ci-tests/build.yaml         |  59 ++++
 .../distributions/ci-tests/ci_tests.py        |   0
 .../distributions/ci-tests/run.yaml           | 278 +++++++++++++++++
 .../distributions/dell/__init__.py            |   0
 src/llama_stack/distributions/dell/build.yaml |  33 ++
 .../llama_stack}/distributions/dell/dell.py   |   0
 .../distributions/dell/doc_template.md        |   0
 .../distributions/dell/run-with-safety.yaml   | 141 +++++++++
 src/llama_stack/distributions/dell/run.yaml   | 132 ++++++++
 .../meta-reference-gpu/__init__.py            |   0
 .../meta-reference-gpu/build.yaml             |  32 ++
 .../meta-reference-gpu/doc_template.md        |   0
 .../meta-reference-gpu/meta_reference.py      |   0
 .../meta-reference-gpu/run-with-safety.yaml   | 154 ++++++++++
 .../distributions/meta-reference-gpu/run.yaml | 139 +++++++++
 .../distributions/nvidia/__init__.py          |   0
 .../distributions/nvidia/build.yaml           |  29 ++
 .../distributions/nvidia/doc_template.md      |   0
 .../distributions/nvidia/nvidia.py            |   0
 .../distributions/nvidia/run-with-safety.yaml | 137 +++++++++
 src/llama_stack/distributions/nvidia/run.yaml | 116 ++++++++
 .../distributions/open-benchmark/__init__.py  |   0
 .../distributions/open-benchmark/build.yaml   |  36 +++
 .../open-benchmark/open_benchmark.py          |   0
 .../distributions/open-benchmark/run.yaml     | 252 ++++++++++++++++
 .../distributions/postgres-demo/__init__.py   |   0
 .../distributions/postgres-demo/build.yaml    |  23 ++
 .../postgres-demo/postgres_demo.py            |   0
 .../distributions/postgres-demo/run.yaml      | 115 +++++++
 .../distributions/starter-gpu/__init__.py     |   0
 .../distributions/starter-gpu/build.yaml      |  60 ++++
 .../distributions/starter-gpu/run.yaml        | 281 ++++++++++++++++++
 .../distributions/starter-gpu/starter_gpu.py  |   0
 .../distributions/starter/__init__.py         |   0
 .../distributions/starter/build.yaml          |  60 ++++
 .../distributions/starter/run.yaml            | 278 +++++++++++++++++
 .../distributions/starter/starter.py          |   0
 .../llama_stack}/distributions/template.py    |   0
 .../distributions/watsonx/__init__.py         |   0
 .../distributions/watsonx/build.yaml          |  33 ++
 .../distributions/watsonx/run.yaml            | 133 +++++++++
 .../distributions/watsonx/watsonx.py          |   0
 {llama_stack => src/llama_stack}/env.py       |   0
 {llama_stack => src/llama_stack}/log.py       |   0
 .../llama_stack}/models/__init__.py           |   0
 .../llama_stack}/models/llama/__init__.py     |   0
 .../llama_stack}/models/llama/checkpoint.py   |   0
 .../llama_stack}/models/llama/datatypes.py    |   0
 .../models/llama/hadamard_utils.py            |   0
 .../models/llama/llama3/__init__.py           |   0
 .../llama_stack}/models/llama/llama3/args.py  |   0
 .../models/llama/llama3/chat_format.py        |   0
 .../llama_stack}/models/llama/llama3/dog.jpg  | Bin
 .../models/llama/llama3/generation.py         |   0
 .../models/llama/llama3/interface.py          |   0
 .../llama_stack}/models/llama/llama3/model.py |   0
 .../llama/llama3/multimodal/__init__.py       |   0
 .../llama/llama3/multimodal/encoder_utils.py  |   0
 .../llama3/multimodal/image_transform.py      |   0
 .../models/llama/llama3/multimodal/model.py   |   0
 .../models/llama/llama3/multimodal/utils.py   |   0
 .../models/llama/llama3/pasta.jpeg            | Bin
 .../llama/llama3/prompt_templates/__init__.py |   0
 .../llama/llama3/prompt_templates/base.py     |   0
 .../llama3/prompt_templates/system_prompts.py |   0
 .../llama3/prompt_templates/tool_response.py  |   0
 .../llama/llama3/quantization/__init__.py     |   0
 .../llama/llama3/quantization/loader.py       |   0
 .../models/llama/llama3/template_data.py      |   0
 .../models/llama/llama3/tokenizer.model       |   0
 .../models/llama/llama3/tokenizer.py          |   0
 .../models/llama/llama3/tool_utils.py         |   0
 .../models/llama/llama3_1/__init__.py         |   0
 .../models/llama/llama3_1/prompt_format.md    |   0
 .../models/llama/llama3_1/prompts.py          |   0
 .../models/llama/llama3_2/__init__.py         |   0
 .../models/llama/llama3_2/prompts_text.py     |   0
 .../models/llama/llama3_2/prompts_vision.py   |   0
 .../llama/llama3_2/text_prompt_format.md      |   0
 .../llama/llama3_2/vision_prompt_format.md    |   0
 .../models/llama/llama3_3/__init__.py         |   0
 .../models/llama/llama3_3/prompts.py          |   0
 .../models/llama/llama4/__init__.py           |   0
 .../llama_stack}/models/llama/llama4/args.py  |   0
 .../models/llama/llama4/chat_format.py        |   0
 .../models/llama/llama4/datatypes.py          |   0
 .../llama_stack}/models/llama/llama4/ffn.py   |   0
 .../models/llama/llama4/generation.py         |   0
 .../llama_stack}/models/llama/llama4/model.py |   0
 .../llama_stack}/models/llama/llama4/moe.py   |   0
 .../models/llama/llama4/preprocess.py         |   0
 .../models/llama/llama4/prompt_format.md      |   0
 .../llama/llama4/prompt_templates/__init__.py |   0
 .../llama4/prompt_templates/system_prompts.py |   0
 .../models/llama/llama4/prompts.py            |   0
 .../llama/llama4/quantization/__init__.py     |   0
 .../llama/llama4/quantization/loader.py       |   0
 .../models/llama/llama4/tokenizer.model       |   0
 .../models/llama/llama4/tokenizer.py          |   0
 .../models/llama/llama4/vision/__init__.py    |   0
 .../models/llama/llama4/vision/embedding.py   |   0
 .../models/llama/llama4/vision/encoder.py     |   0
 .../models/llama/prompt_format.py             |   0
 .../models/llama/quantize_impls.py            |   0
 .../models/llama/resources/dog.jpg            | Bin
 .../models/llama/resources/pasta.jpeg         | Bin
 .../models/llama/resources/small_dog.jpg      | Bin
 .../llama_stack}/models/llama/sku_list.py     |   0
 .../llama_stack}/models/llama/sku_types.py    |   0
 .../models/llama/tokenizer_utils.py           |   0
 .../llama_stack}/providers/__init__.py        |   0
 .../llama_stack}/providers/datatypes.py       |   0
 .../llama_stack}/providers/inline/__init__.py |   0
 .../providers/inline/agents/__init__.py       |   0
 .../inline/agents/meta_reference/__init__.py  |   0
 .../agents/meta_reference/agent_instance.py   |   0
 .../inline/agents/meta_reference/agents.py    |   0
 .../inline/agents/meta_reference/config.py    |   0
 .../agents/meta_reference/persistence.py      |   0
 .../meta_reference/responses/__init__.py      |   0
 .../responses/openai_responses.py             |   0
 .../meta_reference/responses/streaming.py     |   0
 .../meta_reference/responses/tool_executor.py |   0
 .../agents/meta_reference/responses/types.py  |   0
 .../agents/meta_reference/responses/utils.py  |   0
 .../inline/agents/meta_reference/safety.py    |   0
 .../providers/inline/batches/__init__.py      |   0
 .../inline/batches/reference/__init__.py      |   0
 .../inline/batches/reference/batches.py       |   0
 .../inline/batches/reference/config.py        |   0
 .../providers/inline/datasetio/__init__.py    |   0
 .../inline/datasetio/localfs/__init__.py      |   0
 .../inline/datasetio/localfs/config.py        |   0
 .../inline/datasetio/localfs/datasetio.py     |   0
 .../providers/inline/eval/__init__.py         |   0
 .../inline/eval/meta_reference/__init__.py    |   0
 .../inline/eval/meta_reference/config.py      |   0
 .../inline/eval/meta_reference/eval.py        |   0
 .../inline/files/localfs/__init__.py          |   0
 .../providers/inline/files/localfs/config.py  |   0
 .../providers/inline/files/localfs/files.py   |   0
 .../providers/inline/inference/__init__.py    |   0
 .../inference/meta_reference/__init__.py      |   0
 .../inline/inference/meta_reference/common.py |   0
 .../inline/inference/meta_reference/config.py |   0
 .../inference/meta_reference/generators.py    |   0
 .../inference/meta_reference/inference.py     |   0
 .../meta_reference/model_parallel.py          |   0
 .../meta_reference/parallel_utils.py          |   0
 .../sentence_transformers/__init__.py         |   0
 .../inference/sentence_transformers/config.py |   0
 .../sentence_transformers.py                  |   0
 .../project.pbxproj                           |   0
 .../contents.xcworkspacedata                  |   0
 .../xcshareddata/IDEWorkspaceChecks.plist     |   0
 .../LocalInferenceImpl/LocalInference.h       |   0
 .../LocalInferenceImpl/LocalInference.swift   |   0
 .../LocalInferenceImpl/Parsing.swift          |   0
 .../LocalInferenceImpl/PromptTemplate.swift   |   0
 .../LocalInferenceImpl/SystemPrompts.swift    |   0
 .../inline/post_training/__init__.py          |   0
 .../inline/post_training/common/__init__.py   |   0
 .../inline/post_training/common/utils.py      |   0
 .../inline/post_training/common/validator.py  |   0
 .../post_training/huggingface/__init__.py     |   0
 .../post_training/huggingface/config.py       |   0
 .../huggingface/post_training.py              |   0
 .../huggingface/recipes/__init__.py           |   0
 .../recipes/finetune_single_device.py         |   0
 .../recipes/finetune_single_device_dpo.py     |   0
 .../inline/post_training/huggingface/utils.py |   0
 .../post_training/torchtune/__init__.py       |   0
 .../torchtune/common/__init__.py              |   0
 .../torchtune/common/checkpointer.py          |   0
 .../post_training/torchtune/common/utils.py   |   0
 .../inline/post_training/torchtune/config.py  |   0
 .../torchtune/datasets/__init__.py            |   0
 .../torchtune/datasets/format_adapter.py      |   0
 .../post_training/torchtune/datasets/sft.py   |   0
 .../post_training/torchtune/post_training.py  |   0
 .../torchtune/recipes/__init__.py             |   0
 .../recipes/lora_finetuning_single_device.py  |   0
 .../providers/inline/safety/__init__.py       |   0
 .../inline/safety/code_scanner/__init__.py    |   0
 .../safety/code_scanner/code_scanner.py       |   0
 .../inline/safety/code_scanner/config.py      |   0
 .../inline/safety/llama_guard/__init__.py     |   0
 .../inline/safety/llama_guard/config.py       |   0
 .../inline/safety/llama_guard/llama_guard.py  |   0
 .../inline/safety/prompt_guard/__init__.py    |   0
 .../inline/safety/prompt_guard/config.py      |   0
 .../safety/prompt_guard/prompt_guard.py       |   0
 .../providers/inline/scoring/__init__.py      |   0
 .../inline/scoring/basic/__init__.py          |   0
 .../providers/inline/scoring/basic/config.py  |   0
 .../providers/inline/scoring/basic/scoring.py |   0
 .../scoring/basic/scoring_fn/__init__.py      |   0
 .../basic/scoring_fn/docvqa_scoring_fn.py     |   0
 .../basic/scoring_fn/equality_scoring_fn.py   |   0
 .../basic/scoring_fn/fn_defs/__init__.py      |   0
 .../basic/scoring_fn/fn_defs/docvqa.py        |   0
 .../basic/scoring_fn/fn_defs/equality.py      |   0
 .../basic/scoring_fn/fn_defs/ifeval.py        |   0
 .../fn_defs/regex_parser_math_response.py     |   0
 .../regex_parser_multiple_choice_answer.py    |   0
 .../basic/scoring_fn/fn_defs/subset_of.py     |   0
 .../basic/scoring_fn/ifeval_scoring_fn.py     |   0
 .../regex_parser_math_response_scoring_fn.py  |   0
 .../scoring_fn/regex_parser_scoring_fn.py     |   0
 .../basic/scoring_fn/subset_of_scoring_fn.py  |   0
 .../inline/scoring/basic/utils/__init__.py    |   0
 .../scoring/basic/utils/ifeval_utils.py       |   0
 .../inline/scoring/basic/utils/math_utils.py  |   0
 .../inline/scoring/braintrust/__init__.py     |   0
 .../inline/scoring/braintrust/braintrust.py   |   0
 .../inline/scoring/braintrust/config.py       |   0
 .../scoring/braintrust/scoring_fn/__init__.py |   0
 .../braintrust/scoring_fn/fn_defs/__init__.py |   0
 .../scoring_fn/fn_defs/answer_correctness.py  |   0
 .../scoring_fn/fn_defs/answer_relevancy.py    |   0
 .../scoring_fn/fn_defs/answer_similarity.py   |   0
 .../fn_defs/context_entity_recall.py          |   0
 .../scoring_fn/fn_defs/context_precision.py   |   0
 .../scoring_fn/fn_defs/context_recall.py      |   0
 .../scoring_fn/fn_defs/context_relevancy.py   |   0
 .../scoring_fn/fn_defs/factuality.py          |   0
 .../scoring_fn/fn_defs/faithfulness.py        |   0
 .../inline/scoring/llm_as_judge/__init__.py   |   0
 .../inline/scoring/llm_as_judge/config.py     |   0
 .../inline/scoring/llm_as_judge/scoring.py    |   0
 .../llm_as_judge/scoring_fn/__init__.py       |   0
 .../scoring_fn/fn_defs/__init__.py            |   0
 .../fn_defs/llm_as_judge_405b_simpleqa.py     |   0
 .../scoring_fn/fn_defs/llm_as_judge_base.py   |   0
 .../scoring_fn/llm_as_judge_scoring_fn.py     |   0
 .../providers/inline/tool_runtime/__init__.py |   0
 .../inline/tool_runtime/rag/__init__.py       |   0
 .../inline/tool_runtime/rag/config.py         |   0
 .../tool_runtime/rag/context_retriever.py     |   0
 .../inline/tool_runtime/rag/memory.py         |   0
 .../providers/inline/vector_io/__init__.py    |   0
 .../inline/vector_io/chroma/__init__.py       |   0
 .../inline/vector_io/chroma/config.py         |   0
 .../inline/vector_io/faiss/__init__.py        |   0
 .../inline/vector_io/faiss/config.py          |   0
 .../providers/inline/vector_io/faiss/faiss.py |   0
 .../inline/vector_io/milvus/__init__.py       |   0
 .../inline/vector_io/milvus/config.py         |   0
 .../inline/vector_io/qdrant/__init__.py       |   0
 .../inline/vector_io/qdrant/config.py         |   0
 .../inline/vector_io/sqlite_vec/__init__.py   |   0
 .../inline/vector_io/sqlite_vec/config.py     |   0
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |   0
 .../providers/registry/__init__.py            |   0
 .../llama_stack}/providers/registry/agents.py |   0
 .../providers/registry/batches.py             |   0
 .../providers/registry/datasetio.py           |   0
 .../llama_stack}/providers/registry/eval.py   |   0
 .../llama_stack}/providers/registry/files.py  |   0
 .../providers/registry/inference.py           |   0
 .../providers/registry/post_training.py       |   0
 .../llama_stack}/providers/registry/safety.py |   0
 .../providers/registry/scoring.py             |   0
 .../providers/registry/tool_runtime.py        |   0
 .../providers/registry/vector_io.py           |   0
 .../llama_stack}/providers/remote/__init__.py |   0
 .../providers/remote/agents/__init__.py       |   0
 .../providers/remote/datasetio/__init__.py    |   0
 .../remote/datasetio/huggingface/__init__.py  |   0
 .../remote/datasetio/huggingface/config.py    |   0
 .../datasetio/huggingface/huggingface.py      |   0
 .../remote/datasetio/nvidia/README.md         |   0
 .../remote/datasetio/nvidia/__init__.py       |   0
 .../remote/datasetio/nvidia/config.py         |   0
 .../remote/datasetio/nvidia/datasetio.py      |   0
 .../providers/remote/eval/__init__.py         |   0
 .../providers/remote/eval/nvidia/README.md    |   0
 .../providers/remote/eval/nvidia/__init__.py  |   0
 .../providers/remote/eval/nvidia/config.py    |   0
 .../providers/remote/eval/nvidia/eval.py      |   0
 .../providers/remote/files/s3/README.md       |   0
 .../providers/remote/files/s3/__init__.py     |   0
 .../providers/remote/files/s3/config.py       |   0
 .../providers/remote/files/s3/files.py        |   0
 .../providers/remote/inference/__init__.py    |   0
 .../remote/inference/anthropic/__init__.py    |   0
 .../remote/inference/anthropic/anthropic.py   |   0
 .../remote/inference/anthropic/config.py      |   0
 .../remote/inference/azure/__init__.py        |   0
 .../providers/remote/inference/azure/azure.py |   0
 .../remote/inference/azure/config.py          |   0
 .../remote/inference/bedrock/__init__.py      |   0
 .../remote/inference/bedrock/bedrock.py       |   0
 .../remote/inference/bedrock/config.py        |   0
 .../remote/inference/bedrock/models.py        |   0
 .../remote/inference/cerebras/__init__.py     |   0
 .../remote/inference/cerebras/cerebras.py     |   0
 .../remote/inference/cerebras/config.py       |   0
 .../remote/inference/databricks/__init__.py   |   0
 .../remote/inference/databricks/config.py     |   0
 .../remote/inference/databricks/databricks.py |   0
 .../remote/inference/fireworks/__init__.py    |   0
 .../remote/inference/fireworks/config.py      |   0
 .../remote/inference/fireworks/fireworks.py   |   0
 .../remote/inference/gemini/__init__.py       |   0
 .../remote/inference/gemini/config.py         |   0
 .../remote/inference/gemini/gemini.py         |   0
 .../remote/inference/groq/__init__.py         |   0
 .../providers/remote/inference/groq/config.py |   0
 .../providers/remote/inference/groq/groq.py   |   0
 .../inference/llama_openai_compat/__init__.py |   0
 .../inference/llama_openai_compat/config.py   |   0
 .../inference/llama_openai_compat/llama.py    |   0
 .../remote/inference/nvidia/NVIDIA.md         |   0
 .../remote/inference/nvidia/__init__.py       |   0
 .../remote/inference/nvidia/config.py         |   0
 .../remote/inference/nvidia/nvidia.py         |   0
 .../remote/inference/nvidia/utils.py          |   0
 .../remote/inference/ollama/__init__.py       |   0
 .../remote/inference/ollama/config.py         |   0
 .../remote/inference/ollama/ollama.py         |   0
 .../remote/inference/openai/__init__.py       |   0
 .../remote/inference/openai/config.py         |   0
 .../remote/inference/openai/openai.py         |   0
 .../remote/inference/passthrough/__init__.py  |   0
 .../remote/inference/passthrough/config.py    |   0
 .../inference/passthrough/passthrough.py      |   0
 .../remote/inference/runpod/__init__.py       |   0
 .../remote/inference/runpod/config.py         |   0
 .../remote/inference/runpod/runpod.py         |   0
 .../remote/inference/sambanova/__init__.py    |   0
 .../remote/inference/sambanova/config.py      |   0
 .../remote/inference/sambanova/sambanova.py   |   0
 .../remote/inference/tgi/__init__.py          |   0
 .../providers/remote/inference/tgi/config.py  |   0
 .../providers/remote/inference/tgi/tgi.py     |   0
 .../remote/inference/together/__init__.py     |   0
 .../remote/inference/together/config.py       |   0
 .../remote/inference/together/together.py     |   0
 .../remote/inference/vertexai/__init__.py     |   0
 .../remote/inference/vertexai/config.py       |   0
 .../remote/inference/vertexai/vertexai.py     |   0
 .../remote/inference/vllm/__init__.py         |   0
 .../providers/remote/inference/vllm/config.py |   0
 .../providers/remote/inference/vllm/vllm.py   |   0
 .../remote/inference/watsonx/__init__.py      |   0
 .../remote/inference/watsonx/config.py        |   0
 .../remote/inference/watsonx/watsonx.py       |   0
 .../remote/post_training/__init__.py          |   0
 .../remote/post_training/nvidia/README.md     |   0
 .../remote/post_training/nvidia/__init__.py   |   0
 .../remote/post_training/nvidia/config.py     |   0
 .../remote/post_training/nvidia/models.py     |   0
 .../post_training/nvidia/post_training.py     |   0
 .../remote/post_training/nvidia/utils.py      |   0
 .../providers/remote/safety/__init__.py       |   0
 .../remote/safety/bedrock/__init__.py         |   0
 .../remote/safety/bedrock/bedrock.py          |   0
 .../providers/remote/safety/bedrock/config.py |   0
 .../providers/remote/safety/nvidia/README.md  |   0
 .../remote/safety/nvidia/__init__.py          |   0
 .../providers/remote/safety/nvidia/config.py  |   0
 .../providers/remote/safety/nvidia/nvidia.py  |   0
 .../remote/safety/sambanova/__init__.py       |   0
 .../remote/safety/sambanova/config.py         |   0
 .../remote/safety/sambanova/sambanova.py      |   0
 .../providers/remote/tool_runtime/__init__.py |   0
 .../tool_runtime/bing_search/__init__.py      |   0
 .../tool_runtime/bing_search/bing_search.py   |   0
 .../remote/tool_runtime/bing_search/config.py |   0
 .../tool_runtime/brave_search/__init__.py     |   0
 .../tool_runtime/brave_search/brave_search.py |   0
 .../tool_runtime/brave_search/config.py       |   0
 .../model_context_protocol/__init__.py        |   0
 .../model_context_protocol/config.py          |   0
 .../model_context_protocol.py                 |   0
 .../tool_runtime/tavily_search/__init__.py    |   0
 .../tool_runtime/tavily_search/config.py      |   0
 .../tavily_search/tavily_search.py            |   0
 .../tool_runtime/wolfram_alpha/__init__.py    |   0
 .../tool_runtime/wolfram_alpha/config.py      |   0
 .../wolfram_alpha/wolfram_alpha.py            |   0
 .../providers/remote/vector_io/__init__.py    |   0
 .../remote/vector_io/chroma/__init__.py       |   0
 .../remote/vector_io/chroma/chroma.py         |   0
 .../remote/vector_io/chroma/config.py         |   0
 .../remote/vector_io/milvus/__init__.py       |   0
 .../remote/vector_io/milvus/config.py         |   0
 .../remote/vector_io/milvus/milvus.py         |   0
 .../remote/vector_io/pgvector/__init__.py     |   0
 .../remote/vector_io/pgvector/config.py       |   0
 .../remote/vector_io/pgvector/pgvector.py     |   0
 .../remote/vector_io/qdrant/__init__.py       |   0
 .../remote/vector_io/qdrant/config.py         |   0
 .../remote/vector_io/qdrant/qdrant.py         |   0
 .../remote/vector_io/weaviate/__init__.py     |   0
 .../remote/vector_io/weaviate/config.py       |   0
 .../remote/vector_io/weaviate/weaviate.py     |   0
 .../llama_stack}/providers/utils/__init__.py  |   0
 .../providers/utils/bedrock/__init__.py       |   0
 .../providers/utils/bedrock/client.py         |   0
 .../providers/utils/bedrock/config.py         |   0
 .../utils/bedrock/refreshable_boto_session.py |   0
 .../providers/utils/common/__init__.py        |   0
 .../utils/common/data_schema_validator.py     |   0
 .../providers/utils/datasetio/__init__.py     |   0
 .../providers/utils/datasetio/url_utils.py    |   0
 .../providers/utils/files/__init__.py         |   0
 .../providers/utils/files/form_data.py        |   0
 .../providers/utils/inference/__init__.py     |   0
 .../utils/inference/embedding_mixin.py        |   0
 .../utils/inference/inference_store.py        |   0
 .../utils/inference/litellm_openai_mixin.py   |   0
 .../utils/inference/model_registry.py         |   0
 .../utils/inference/openai_compat.py          |   0
 .../providers/utils/inference/openai_mixin.py |   0
 .../utils/inference/prompt_adapter.py         |   0
 .../providers/utils/kvstore/__init__.py       |   0
 .../providers/utils/kvstore/api.py            |   0
 .../providers/utils/kvstore/config.py         |   0
 .../providers/utils/kvstore/kvstore.py        |   0
 .../utils/kvstore/mongodb/__init__.py         |   0
 .../utils/kvstore/mongodb/mongodb.py          |   0
 .../utils/kvstore/postgres/__init__.py        |   0
 .../utils/kvstore/postgres/postgres.py        |   0
 .../providers/utils/kvstore/redis/__init__.py |   0
 .../providers/utils/kvstore/redis/redis.py    |   0
 .../utils/kvstore/sqlite/__init__.py          |   0
 .../providers/utils/kvstore/sqlite/config.py  |   0
 .../providers/utils/kvstore/sqlite/sqlite.py  |   0
 .../providers/utils/memory/__init__.py        |   0
 .../providers/utils/memory/file_utils.py      |   0
 .../utils/memory/openai_vector_store_mixin.py |   0
 .../providers/utils/memory/vector_store.py    |   0
 .../providers/utils/pagination.py             |   0
 .../providers/utils/responses/__init__.py     |   0
 .../utils/responses/responses_store.py        |   0
 .../llama_stack}/providers/utils/scheduler.py |   0
 .../providers/utils/scoring/__init__.py       |   0
 .../utils/scoring/aggregation_utils.py        |   0
 .../utils/scoring/base_scoring_fn.py          |   0
 .../utils/scoring/basic_scoring_utils.py      |   0
 .../providers/utils/sqlstore/__init__.py      |   0
 .../providers/utils/sqlstore/api.py           |   0
 .../utils/sqlstore/authorized_sqlstore.py     |   0
 .../utils/sqlstore/sqlalchemy_sqlstore.py     |   0
 .../providers/utils/sqlstore/sqlstore.py      |   0
 .../providers/utils/tools/__init__.py         |   0
 .../llama_stack}/providers/utils/tools/mcp.py |   0
 .../providers/utils/tools/ttl_dict.py         |   0
 .../providers/utils/vector_io/__init__.py     |   0
 .../providers/utils/vector_io/vector_utils.py |   0
 .../llama_stack}/schema_utils.py              |   0
 .../llama_stack}/strong_typing/__init__.py    |   0
 .../llama_stack}/strong_typing/auxiliary.py   |  23 +-
 .../llama_stack}/strong_typing/classdef.py    |  96 +++---
 .../llama_stack}/strong_typing/core.py        |  12 +-
 .../strong_typing/deserializer.py             | 129 ++++----
 .../llama_stack}/strong_typing/docstring.py   |  33 +-
 .../llama_stack}/strong_typing/exception.py   |   0
 .../llama_stack}/strong_typing/inspection.py  | 111 ++++---
 .../llama_stack}/strong_typing/mapping.py     |   3 +-
 .../llama_stack}/strong_typing/name.py        |   8 +-
 .../llama_stack}/strong_typing/py.typed       |   0
 .../llama_stack}/strong_typing/schema.py      |  83 +++---
 .../strong_typing/serialization.py            |   4 +-
 .../llama_stack}/strong_typing/serializer.py  |  72 ++---
 .../llama_stack}/strong_typing/slots.py       |   8 +-
 .../llama_stack}/strong_typing/topological.py |  15 +-
 .../llama_stack}/testing/__init__.py          |   0
 .../llama_stack}/testing/api_recorder.py      |   0
 .../llama_stack}/ui/.gitignore                |   0
 {llama_stack => src/llama_stack}/ui/.nvmrc    |   0
 .../llama_stack}/ui/.prettierignore           |   0
 .../llama_stack}/ui/.prettierrc               |   0
 {llama_stack => src/llama_stack}/ui/README.md |   0
 .../ui/app/api/auth/[...nextauth]/route.ts    |   0
 .../ui/app/api/v1/[...path]/route.ts          |   0
 .../llama_stack}/ui/app/auth/signin/page.tsx  |   0
 .../chat-playground/chunk-processor.test.tsx  |   0
 .../ui/app/chat-playground/page.test.tsx      |   0
 .../ui/app/chat-playground/page.tsx           |   0
 .../llama_stack}/ui/app/globals.css           |   0
 .../llama_stack}/ui/app/layout.tsx            |   0
 .../app/logs/chat-completions/[id]/page.tsx   |   0
 .../ui/app/logs/chat-completions/layout.tsx   |   0
 .../ui/app/logs/chat-completions/page.tsx     |   0
 .../ui/app/logs/responses/[id]/page.tsx       |   0
 .../ui/app/logs/responses/layout.tsx          |   0
 .../ui/app/logs/responses/page.tsx            |   0
 .../contents/[contentId]/page.test.tsx        |   0
 .../[fileId]/contents/[contentId]/page.tsx    |   0
 .../files/[fileId]/contents/page.test.tsx     |   0
 .../[id]/files/[fileId]/contents/page.tsx     |   0
 .../[id]/files/[fileId]/page.test.tsx         |   0
 .../[id]/files/[fileId]/page.tsx              |   0
 .../ui/app/logs/vector-stores/[id]/page.tsx   |   0
 .../ui/app/logs/vector-stores/layout.tsx      |   0
 .../ui/app/logs/vector-stores/page.tsx        |   0
 .../llama_stack}/ui/app/page.tsx              |   0
 .../llama_stack}/ui/components.json           |   0
 .../chat-completion-detail.test.tsx           |   0
 .../chat-completion-detail.tsx                |   0
 .../chat-completion-table.test.tsx            |   0
 .../chat-completions-table.tsx                |   0
 .../chat-completions/chat-messasge-item.tsx   |   0
 .../chat-playground/chat-message.tsx          |   0
 .../ui/components/chat-playground/chat.tsx    |   0
 .../chat-playground/conversations.test.tsx    |   0
 .../chat-playground/conversations.tsx         |   0
 .../chat-playground/interrupt-prompt.tsx      |   0
 .../chat-playground/markdown-renderer.tsx     |   0
 .../chat-playground/message-components.tsx    |   0
 .../chat-playground/message-input.tsx         |   0
 .../chat-playground/message-list.tsx          |   0
 .../chat-playground/prompt-suggestions.tsx    |   0
 .../chat-playground/typing-indicator.tsx      |   0
 .../chat-playground/vector-db-creator.tsx     |   0
 .../ui/components/layout/app-sidebar.tsx      |   0
 .../ui/components/layout/detail-layout.tsx    |   0
 .../ui/components/layout/logs-layout.tsx      |   0
 .../ui/components/layout/page-breadcrumb.tsx  |   0
 .../logs/logs-table-scroll.test.tsx           |   0
 .../ui/components/logs/logs-table.test.tsx    |   0
 .../ui/components/logs/logs-table.tsx         |   0
 .../components/providers/session-provider.tsx |   0
 .../grouping/grouped-items-display.tsx        |   0
 .../responses/hooks/function-call-grouping.ts |   0
 .../responses/items/function-call-item.tsx    |   0
 .../responses/items/generic-item.tsx          |   0
 .../items/grouped-function-call-item.tsx      |   0
 .../ui/components/responses/items/index.ts    |   0
 .../responses/items/item-renderer.tsx         |   0
 .../responses/items/message-item.tsx          |   0
 .../responses/items/web-search-item.tsx       |   0
 .../responses/responses-detail.test.tsx       |   0
 .../components/responses/responses-detail.tsx |   0
 .../responses/responses-table.test.tsx        |   0
 .../components/responses/responses-table.tsx  |   0
 .../components/responses/utils/item-types.ts  |   0
 .../ui/components/ui/audio-visualizer.tsx     |   0
 .../ui/components/ui/breadcrumb.tsx           |   0
 .../llama_stack}/ui/components/ui/button.tsx  |   0
 .../llama_stack}/ui/components/ui/card.tsx    |   0
 .../ui/components/ui/collapsible.tsx          |   0
 .../ui/components/ui/copy-button.tsx          |   0
 .../ui/components/ui/dropdown-menu.tsx        |   0
 .../ui/components/ui/file-preview.tsx         |   0
 .../llama_stack}/ui/components/ui/input.tsx   |   0
 .../ui/components/ui/mode-toggle.tsx          |   0
 .../llama_stack}/ui/components/ui/select.tsx  |   0
 .../ui/components/ui/separator.tsx            |   0
 .../llama_stack}/ui/components/ui/sheet.tsx   |   0
 .../llama_stack}/ui/components/ui/sidebar.tsx |   0
 .../ui/components/ui/sign-in-button.tsx       |   0
 .../ui/components/ui/skeleton.tsx             |   0
 .../llama_stack}/ui/components/ui/sonner.tsx  |   0
 .../llama_stack}/ui/components/ui/table.tsx   |   0
 .../ui/components/ui/theme-provider.tsx       |   0
 .../llama_stack}/ui/components/ui/tooltip.tsx |   0
 .../vector-store-detail.test.tsx              |   0
 .../vector-stores/vector-store-detail.tsx     |   0
 .../ui/e2e/logs-table-scroll.spec.ts          |   0
 .../llama_stack}/ui/eslint.config.mjs         |   0
 .../ui/hooks/use-audio-recording.ts           |   0
 .../llama_stack}/ui/hooks/use-auth-client.ts  |   0
 .../llama_stack}/ui/hooks/use-auto-scroll.ts  |   0
 .../ui/hooks/use-autosize-textarea.ts         |   0
 .../ui/hooks/use-copy-to-clipboard.ts         |   0
 .../ui/hooks/use-infinite-scroll.ts           |   0
 .../llama_stack}/ui/hooks/use-mobile.ts       |   0
 .../llama_stack}/ui/hooks/use-pagination.ts   |   0
 .../llama_stack}/ui/instrumentation.ts        |   0
 .../llama_stack}/ui/jest.config.ts            |   0
 .../llama_stack}/ui/jest.setup.ts             |   0
 .../llama_stack}/ui/lib/audio-utils.ts        |   0
 .../llama_stack}/ui/lib/auth.ts               |   0
 .../llama_stack}/ui/lib/config-validator.ts   |   0
 .../llama_stack}/ui/lib/contents-api.ts       |   0
 .../ui/lib/format-message-content.test.ts     |   0
 .../ui/lib/format-message-content.ts          |   0
 .../llama_stack}/ui/lib/format-tool-call.tsx  |   0
 .../ui/lib/message-content-utils.ts           |   0
 .../llama_stack}/ui/lib/truncate-text.ts      |   0
 .../llama_stack}/ui/lib/types.ts              |   0
 .../llama_stack}/ui/lib/utils.tsx             |   0
 .../llama_stack}/ui/next.config.ts            |   0
 .../llama_stack}/ui/package-lock.json         |   0
 .../llama_stack}/ui/package.json              |   0
 .../llama_stack}/ui/playwright.config.ts      |   0
 .../llama_stack}/ui/postcss.config.mjs        |   0
 .../llama_stack}/ui/public/favicon.ico        | Bin
 .../llama_stack}/ui/public/file.svg           |   0
 .../llama_stack}/ui/public/globe.svg          |   0
 .../llama_stack}/ui/public/logo.webp          | Bin
 .../llama_stack}/ui/public/next.svg           |   0
 .../llama_stack}/ui/public/vercel.svg         |   0
 .../llama_stack}/ui/public/window.svg         |   0
 .../llama_stack}/ui/tsconfig.json             |   0
 .../llama_stack}/ui/types/next-auth.d.ts      |   0
 790 files changed, 2947 insertions(+), 447 deletions(-)
 rename {llama_stack => src/llama_stack}/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/agents/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/agents/agents.py (100%)
 rename {llama_stack => src/llama_stack}/apis/agents/openai_responses.py (100%)
 rename {llama_stack => src/llama_stack}/apis/batches/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/batches/batches.py (100%)
 rename {llama_stack => src/llama_stack}/apis/benchmarks/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/benchmarks/benchmarks.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/content_types.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/errors.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/job_types.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/responses.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/training_types.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/type_system.py (100%)
 rename {llama_stack => src/llama_stack}/apis/conversations/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/conversations/conversations.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datasetio/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datasetio/datasetio.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datasets/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datasets/datasets.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/apis/eval/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/eval/eval.py (100%)
 rename {llama_stack => src/llama_stack}/apis/files/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/files/files.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inference/event_logger.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inference/inference.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inspect/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inspect/inspect.py (100%)
 rename {llama_stack => src/llama_stack}/apis/models/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/models/models.py (100%)
 rename {llama_stack => src/llama_stack}/apis/post_training/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/post_training/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/apis/prompts/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/prompts/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/apis/providers/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/providers/providers.py (100%)
 rename {llama_stack => src/llama_stack}/apis/resource.py (100%)
 rename {llama_stack => src/llama_stack}/apis/safety/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/safety/safety.py (100%)
 rename {llama_stack => src/llama_stack}/apis/scoring/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/scoring/scoring.py (100%)
 rename {llama_stack => src/llama_stack}/apis/scoring_functions/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/scoring_functions/scoring_functions.py (100%)
 rename {llama_stack => src/llama_stack}/apis/shields/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/shields/shields.py (100%)
 rename {llama_stack => src/llama_stack}/apis/synthetic_data_generation/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/synthetic_data_generation/synthetic_data_generation.py (100%)
 rename {llama_stack => src/llama_stack}/apis/telemetry/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/telemetry/telemetry.py (100%)
 rename {llama_stack => src/llama_stack}/apis/tools/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/tools/rag_tool.py (100%)
 rename {llama_stack => src/llama_stack}/apis/tools/tools.py (100%)
 rename {llama_stack => src/llama_stack}/apis/vector_io/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/vector_io/vector_io.py (100%)
 rename {llama_stack => src/llama_stack}/apis/vector_stores/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/vector_stores/vector_stores.py (100%)
 rename {llama_stack => src/llama_stack}/apis/version.py (100%)
 rename {llama_stack => src/llama_stack}/cli/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/cli/llama.py (100%)
 rename {llama_stack => src/llama_stack}/cli/scripts/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/cli/scripts/install-wheel-from-presigned.sh (100%)
 rename {llama_stack => src/llama_stack}/cli/scripts/run.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/_list_deps.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/list_apis.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/list_deps.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/list_providers.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/list_stacks.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/remove.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/run.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/stack.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/utils.py (100%)
 rename {llama_stack => src/llama_stack}/cli/subcommand.py (100%)
 rename {llama_stack => src/llama_stack}/cli/table.py (100%)
 rename {llama_stack => src/llama_stack}/cli/utils.py (100%)
 rename {llama_stack => src/llama_stack}/core/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/access_control/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/access_control/access_control.py (100%)
 rename {llama_stack => src/llama_stack}/core/access_control/conditions.py (100%)
 rename {llama_stack => src/llama_stack}/core/access_control/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/core/build.py (100%)
 rename {llama_stack => src/llama_stack}/core/client.py (100%)
 rename {llama_stack => src/llama_stack}/core/common.sh (100%)
 rename {llama_stack => src/llama_stack}/core/configure.py (100%)
 rename {llama_stack => src/llama_stack}/core/conversations/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/conversations/conversations.py (100%)
 rename {llama_stack => src/llama_stack}/core/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/core/distribution.py (100%)
 rename {llama_stack => src/llama_stack}/core/external.py (100%)
 rename {llama_stack => src/llama_stack}/core/id_generation.py (100%)
 rename {llama_stack => src/llama_stack}/core/inspect.py (100%)
 rename {llama_stack => src/llama_stack}/core/library_client.py (100%)
 rename {llama_stack => src/llama_stack}/core/prompts/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/prompts/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/core/providers.py (100%)
 rename {llama_stack => src/llama_stack}/core/request_headers.py (100%)
 rename {llama_stack => src/llama_stack}/core/resolver.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/datasets.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/eval_scoring.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/inference.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/safety.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/tool_runtime.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/vector_io.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/benchmarks.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/common.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/datasets.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/models.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/scoring_functions.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/shields.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/toolgroups.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/vector_stores.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/auth.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/auth_providers.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/quota.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/routes.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/server.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/tracing.py (100%)
 rename {llama_stack => src/llama_stack}/core/stack.py (100%)
 rename {llama_stack => src/llama_stack}/core/start_stack.sh (100%)
 rename {llama_stack => src/llama_stack}/core/storage/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/storage/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/core/store/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/store/registry.py (100%)
 rename {llama_stack => src/llama_stack}/core/telemetry/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/telemetry/telemetry.py (100%)
 rename {llama_stack => src/llama_stack}/core/telemetry/trace_protocol.py (100%)
 rename {llama_stack => src/llama_stack}/core/telemetry/tracing.py (100%)
 rename {llama_stack => src/llama_stack}/core/testing_context.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/Containerfile (100%)
 rename {llama_stack => src/llama_stack}/core/ui/README.md (100%)
 rename {llama_stack => src/llama_stack}/core/ui/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/app.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/modules/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/modules/api.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/modules/utils.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/datasets.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/eval_tasks.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/models.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/providers.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/resources.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/scoring_functions.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/shields.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/evaluations/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/evaluations/app_eval.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/evaluations/native_eval.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/playground/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/playground/chat.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/playground/tools.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/requirements.txt (100%)
 rename {llama_stack => src/llama_stack}/core/utils/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/config.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/config_dirs.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/config_resolution.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/context.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/dynamic.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/exec.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/image_types.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/model_utils.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/prompt_for_config.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/serialize.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/ci-tests/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/ci-tests/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/ci-tests/ci_tests.py (100%)
 create mode 100644 src/llama_stack/distributions/ci-tests/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/dell/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/dell/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/dell/dell.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/dell/doc_template.md (100%)
 create mode 100644 src/llama_stack/distributions/dell/run-with-safety.yaml
 create mode 100644 src/llama_stack/distributions/dell/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/meta-reference-gpu/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/meta-reference-gpu/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/meta-reference-gpu/doc_template.md (100%)
 rename {llama_stack => src/llama_stack}/distributions/meta-reference-gpu/meta_reference.py (100%)
 create mode 100644 src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
 create mode 100644 src/llama_stack/distributions/meta-reference-gpu/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/nvidia/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/nvidia/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/nvidia/doc_template.md (100%)
 rename {llama_stack => src/llama_stack}/distributions/nvidia/nvidia.py (100%)
 create mode 100644 src/llama_stack/distributions/nvidia/run-with-safety.yaml
 create mode 100644 src/llama_stack/distributions/nvidia/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/open-benchmark/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/open-benchmark/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/open-benchmark/open_benchmark.py (100%)
 create mode 100644 src/llama_stack/distributions/open-benchmark/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/postgres-demo/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/postgres-demo/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/postgres-demo/postgres_demo.py (100%)
 create mode 100644 src/llama_stack/distributions/postgres-demo/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/starter-gpu/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/starter-gpu/build.yaml
 create mode 100644 src/llama_stack/distributions/starter-gpu/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/starter-gpu/starter_gpu.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/starter/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/starter/build.yaml
 create mode 100644 src/llama_stack/distributions/starter/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/starter/starter.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/template.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/watsonx/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/watsonx/build.yaml
 create mode 100644 src/llama_stack/distributions/watsonx/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/watsonx/watsonx.py (100%)
 rename {llama_stack => src/llama_stack}/env.py (100%)
 rename {llama_stack => src/llama_stack}/log.py (100%)
 rename {llama_stack => src/llama_stack}/models/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/checkpoint.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/hadamard_utils.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/args.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/chat_format.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/dog.jpg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/generation.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/interface.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/model.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/encoder_utils.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/image_transform.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/model.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/utils.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/pasta.jpeg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/prompt_templates/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/prompt_templates/base.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/prompt_templates/system_prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/prompt_templates/tool_response.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/quantization/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/quantization/loader.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/template_data.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/tokenizer.model (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/tokenizer.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/tool_utils.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_1/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_1/prompt_format.md (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_1/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/prompts_text.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/prompts_vision.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/text_prompt_format.md (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/vision_prompt_format.md (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_3/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_3/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/args.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/chat_format.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/ffn.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/generation.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/model.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/moe.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/preprocess.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/prompt_format.md (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/prompt_templates/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/prompt_templates/system_prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/quantization/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/quantization/loader.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/tokenizer.model (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/tokenizer.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/vision/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/vision/embedding.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/vision/encoder.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/prompt_format.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/quantize_impls.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/resources/dog.jpg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/resources/pasta.jpeg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/resources/small_dog.jpg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/sku_list.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/sku_types.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/tokenizer_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/agent_instance.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/agents.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/persistence.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/openai_responses.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/streaming.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/tool_executor.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/types.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/safety.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/batches/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/batches/reference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/batches/reference/batches.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/batches/reference/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/datasetio/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/datasetio/localfs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/datasetio/localfs/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/datasetio/localfs/datasetio.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/eval/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/eval/meta_reference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/eval/meta_reference/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/eval/meta_reference/eval.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/files/localfs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/files/localfs/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/files/localfs/files.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/common.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/generators.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/inference.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/model_parallel.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/parallel_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/sentence_transformers/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/sentence_transformers/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/sentence_transformers/sentence_transformers.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/common/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/common/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/common/validator.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/recipes/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/recipes/finetune_single_device.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/common/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/common/checkpointer.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/common/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/datasets/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/datasets/format_adapter.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/datasets/sft.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/recipes/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/code_scanner/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/code_scanner/code_scanner.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/code_scanner/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/llama_guard/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/llama_guard/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/llama_guard/llama_guard.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/prompt_guard/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/prompt_guard/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/prompt_guard/prompt_guard.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/utils/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/utils/ifeval_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/utils/math_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/braintrust.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/rag/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/rag/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/rag/context_retriever.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/rag/memory.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/chroma/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/chroma/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/faiss/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/faiss/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/faiss/faiss.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/milvus/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/milvus/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/qdrant/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/qdrant/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/sqlite_vec/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/sqlite_vec/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/sqlite_vec/sqlite_vec.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/agents.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/batches.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/datasetio.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/eval.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/files.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/inference.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/safety.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/scoring.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/tool_runtime.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/vector_io.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/agents/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/huggingface/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/huggingface/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/huggingface/huggingface.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/nvidia/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/nvidia/datasetio.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/nvidia/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/nvidia/eval.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/files/s3/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/files/s3/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/files/s3/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/files/s3/files.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/anthropic/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/anthropic/anthropic.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/anthropic/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/azure/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/azure/azure.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/azure/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/bedrock/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/bedrock/bedrock.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/bedrock/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/bedrock/models.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/cerebras/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/cerebras/cerebras.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/cerebras/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/databricks/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/databricks/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/databricks/databricks.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/fireworks/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/fireworks/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/fireworks/fireworks.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/gemini/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/gemini/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/gemini/gemini.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/groq/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/groq/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/groq/groq.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/llama_openai_compat/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/llama_openai_compat/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/llama_openai_compat/llama.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/NVIDIA.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/nvidia.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/ollama/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/ollama/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/ollama/ollama.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/openai/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/openai/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/openai/openai.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/passthrough/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/passthrough/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/passthrough/passthrough.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/runpod/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/runpod/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/runpod/runpod.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/sambanova/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/sambanova/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/sambanova/sambanova.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/tgi/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/tgi/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/tgi/tgi.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/together/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/together/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/together/together.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vertexai/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vertexai/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vertexai/vertexai.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vllm/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vllm/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vllm/vllm.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/watsonx/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/watsonx/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/watsonx/watsonx.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/models.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/bedrock/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/bedrock/bedrock.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/bedrock/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/nvidia/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/nvidia/nvidia.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/sambanova/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/sambanova/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/sambanova/sambanova.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/bing_search/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/bing_search/bing_search.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/bing_search/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/brave_search/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/brave_search/brave_search.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/brave_search/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/model_context_protocol/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/model_context_protocol/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/tavily_search/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/tavily_search/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/tavily_search/tavily_search.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/wolfram_alpha/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/wolfram_alpha/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/chroma/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/chroma/chroma.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/chroma/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/milvus/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/milvus/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/milvus/milvus.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/pgvector/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/pgvector/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/pgvector/pgvector.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/qdrant/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/qdrant/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/qdrant/qdrant.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/weaviate/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/weaviate/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/weaviate/weaviate.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/bedrock/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/bedrock/client.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/bedrock/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/bedrock/refreshable_boto_session.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/common/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/common/data_schema_validator.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/datasetio/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/datasetio/url_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/files/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/files/form_data.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/embedding_mixin.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/inference_store.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/litellm_openai_mixin.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/model_registry.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/openai_compat.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/openai_mixin.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/prompt_adapter.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/api.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/kvstore.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/mongodb/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/mongodb/mongodb.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/postgres/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/postgres/postgres.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/redis/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/redis/redis.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/sqlite/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/sqlite/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/sqlite/sqlite.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/memory/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/memory/file_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/memory/openai_vector_store_mixin.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/memory/vector_store.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/pagination.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/responses/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/responses/responses_store.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scheduler.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scoring/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scoring/aggregation_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scoring/base_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scoring/basic_scoring_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/api.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/authorized_sqlstore.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/sqlalchemy_sqlstore.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/sqlstore.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/tools/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/tools/mcp.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/tools/ttl_dict.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/vector_io/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/vector_io/vector_utils.py (100%)
 rename {llama_stack => src/llama_stack}/schema_utils.py (100%)
 rename {llama_stack => src/llama_stack}/strong_typing/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/strong_typing/auxiliary.py (89%)
 rename {llama_stack => src/llama_stack}/strong_typing/classdef.py (86%)
 rename {llama_stack => src/llama_stack}/strong_typing/core.py (80%)
 rename {llama_stack => src/llama_stack}/strong_typing/deserializer.py (89%)
 rename {llama_stack => src/llama_stack}/strong_typing/docstring.py (93%)
 rename {llama_stack => src/llama_stack}/strong_typing/exception.py (100%)
 rename {llama_stack => src/llama_stack}/strong_typing/inspection.py (91%)
 rename {llama_stack => src/llama_stack}/strong_typing/mapping.py (89%)
 rename {llama_stack => src/llama_stack}/strong_typing/name.py (95%)
 rename {llama_stack => src/llama_stack}/strong_typing/py.typed (100%)
 rename {llama_stack => src/llama_stack}/strong_typing/schema.py (92%)
 rename {llama_stack => src/llama_stack}/strong_typing/serialization.py (95%)
 rename {llama_stack => src/llama_stack}/strong_typing/serializer.py (87%)
 rename {llama_stack => src/llama_stack}/strong_typing/slots.py (71%)
 rename {llama_stack => src/llama_stack}/strong_typing/topological.py (88%)
 rename {llama_stack => src/llama_stack}/testing/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/testing/api_recorder.py (100%)
 rename {llama_stack => src/llama_stack}/ui/.gitignore (100%)
 rename {llama_stack => src/llama_stack}/ui/.nvmrc (100%)
 rename {llama_stack => src/llama_stack}/ui/.prettierignore (100%)
 rename {llama_stack => src/llama_stack}/ui/.prettierrc (100%)
 rename {llama_stack => src/llama_stack}/ui/README.md (100%)
 rename {llama_stack => src/llama_stack}/ui/app/api/auth/[...nextauth]/route.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/app/api/v1/[...path]/route.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/app/auth/signin/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/chat-playground/chunk-processor.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/chat-playground/page.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/chat-playground/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/globals.css (100%)
 rename {llama_stack => src/llama_stack}/ui/app/layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/chat-completions/[id]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/chat-completions/layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/chat-completions/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/responses/[id]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/responses/layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/responses/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components.json (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-completion-detail.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-completion-detail.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-completion-table.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-completions-table.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-messasge-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/chat-message.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/chat.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/conversations.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/conversations.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/interrupt-prompt.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/markdown-renderer.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/message-components.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/message-input.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/message-list.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/prompt-suggestions.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/typing-indicator.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/vector-db-creator.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/layout/app-sidebar.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/layout/detail-layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/layout/logs-layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/layout/page-breadcrumb.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/logs/logs-table-scroll.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/logs/logs-table.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/logs/logs-table.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/providers/session-provider.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/grouping/grouped-items-display.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/hooks/function-call-grouping.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/function-call-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/generic-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/grouped-function-call-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/index.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/item-renderer.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/message-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/web-search-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/responses-detail.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/responses-detail.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/responses-table.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/responses-table.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/utils/item-types.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/audio-visualizer.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/breadcrumb.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/button.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/card.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/collapsible.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/copy-button.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/dropdown-menu.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/file-preview.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/input.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/mode-toggle.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/select.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/separator.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/sheet.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/sidebar.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/sign-in-button.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/skeleton.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/sonner.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/table.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/theme-provider.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/tooltip.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/vector-stores/vector-store-detail.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/vector-stores/vector-store-detail.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/e2e/logs-table-scroll.spec.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/eslint.config.mjs (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-audio-recording.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-auth-client.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-auto-scroll.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-autosize-textarea.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-copy-to-clipboard.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-infinite-scroll.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-mobile.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-pagination.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/instrumentation.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/jest.config.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/jest.setup.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/audio-utils.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/auth.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/config-validator.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/contents-api.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/format-message-content.test.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/format-message-content.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/format-tool-call.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/message-content-utils.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/truncate-text.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/types.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/utils.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/next.config.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/package-lock.json (100%)
 rename {llama_stack => src/llama_stack}/ui/package.json (100%)
 rename {llama_stack => src/llama_stack}/ui/playwright.config.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/postcss.config.mjs (100%)
 rename {llama_stack => src/llama_stack}/ui/public/favicon.ico (100%)
 rename {llama_stack => src/llama_stack}/ui/public/file.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/public/globe.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/public/logo.webp (100%)
 rename {llama_stack => src/llama_stack}/ui/public/next.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/public/vercel.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/public/window.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/tsconfig.json (100%)
 rename {llama_stack => src/llama_stack}/ui/types/next-auth.d.ts (100%)

diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
index c13ed6cbe..a7917e824 100644
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@@ -9,7 +9,7 @@ on:
     branches: [ main ]
     paths:
       - 'distributions/**'
-      - 'llama_stack/**'
+      - 'src/llama_stack/**'
       - '!llama_stack/ui/**'
       - 'tests/integration/**'
       - 'uv.lock'
diff --git a/.github/workflows/integration-sql-store-tests.yml b/.github/workflows/integration-sql-store-tests.yml
index 47f6d546a..0653b3fa8 100644
--- a/.github/workflows/integration-sql-store-tests.yml
+++ b/.github/workflows/integration-sql-store-tests.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/providers/utils/sqlstore/**'
+      - 'src/llama_stack/providers/utils/sqlstore/**'
       - 'tests/integration/sqlstore/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index d38e8337b..36a75cbe4 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -9,7 +9,7 @@ on:
     branches: [ main ]
     types: [opened, synchronize, reopened]
     paths:
-      - 'llama_stack/**'
+      - 'src/llama_stack/**'
       - '!llama_stack/ui/**'
       - 'tests/**'
       - 'uv.lock'
diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml
index ee837a159..9a0f82058 100644
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/**'
+      - 'src/llama_stack/**'
       - '!llama_stack/ui/**'
       - 'tests/integration/vector_io/**'
       - 'uv.lock'
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 0fdd50acc..99ef87196 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -41,11 +41,11 @@ jobs:
         with:
           node-version: '20'
           cache: 'npm'
-          cache-dependency-path: 'llama_stack/ui/'
+          cache-dependency-path: 'src/llama_stack/ui/'
 
       - name: Install npm dependencies
         run: npm ci
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
 
       - name: Run pre-commit
         id: precommit
diff --git a/.github/workflows/precommit-trigger.yml b/.github/workflows/precommit-trigger.yml
index b05898d29..502230448 100644
--- a/.github/workflows/precommit-trigger.yml
+++ b/.github/workflows/precommit-trigger.yml
@@ -145,12 +145,12 @@ jobs:
         with:
           node-version: '20'
           cache: 'npm'
-          cache-dependency-path: 'llama_stack/ui/'
+          cache-dependency-path: 'src/llama_stack/ui/'
 
       - name: Install npm dependencies
         if: steps.check_author.outputs.authorized == 'true'
         run: npm ci
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
 
       - name: Run pre-commit
         if: steps.check_author.outputs.authorized == 'true'
diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
index ffc44f9c1..f116a0a52 100644
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@@ -7,24 +7,24 @@ on:
     branches:
       - main
     paths:
-      - 'llama_stack/cli/stack/build.py'
-      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/core/build.*'
-      - 'llama_stack/core/*.sh'
+      - 'src/llama_stack/cli/stack/build.py'
+      - 'src/llama_stack/cli/stack/_build.py'
+      - 'src/llama_stack/core/build.*'
+      - 'src/llama_stack/core/*.sh'
       - '.github/workflows/providers-build.yml'
-      - 'llama_stack/distributions/**'
+      - 'src/llama_stack/distributions/**'
       - 'pyproject.toml'
       - 'containers/Containerfile'
       - '.dockerignore'
 
   pull_request:
     paths:
-      - 'llama_stack/cli/stack/build.py'
-      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/core/build.*'
-      - 'llama_stack/core/*.sh'
+      - 'src/llama_stack/cli/stack/build.py'
+      - 'src/llama_stack/cli/stack/_build.py'
+      - 'src/llama_stack/core/build.*'
+      - 'src/llama_stack/core/*.sh'
       - '.github/workflows/providers-build.yml'
-      - 'llama_stack/distributions/**'
+      - 'src/llama_stack/distributions/**'
       - 'pyproject.toml'
       - 'containers/Containerfile'
       - '.dockerignore'
@@ -45,7 +45,7 @@ jobs:
       - name: Generate Distribution List
         id: set-matrix
         run: |
-          distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
+          distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
           echo "distros=$distros" >> "$GITHUB_OUTPUT"
 
   build:
@@ -107,7 +107,7 @@ jobs:
 
       - name: Build container image
         run: |
-          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
+          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
           docker build . \
             -f containers/Containerfile \
             --build-arg INSTALL_MODE=editable \
@@ -143,11 +143,11 @@ jobs:
         run: |
           yq -i '
             .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
-          ' llama_stack/distributions/ci-tests/build.yaml
+          ' src/llama_stack/distributions/ci-tests/build.yaml
 
       - name: Build UBI9 container image
         run: |
-          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
+          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
           docker build . \
             -f containers/Containerfile \
             --build-arg INSTALL_MODE=editable \
diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml
index e30e1e5fb..88659dbe3 100644
--- a/.github/workflows/providers-list-deps.yml
+++ b/.github/workflows/providers-list-deps.yml
@@ -7,22 +7,22 @@ on:
     branches:
       - main
     paths:
-      - 'llama_stack/cli/stack/list_deps.py'
-      - 'llama_stack/cli/stack/_list_deps.py'
-      - 'llama_stack/core/build.*'
-      - 'llama_stack/core/*.sh'
+      - 'src/llama_stack/cli/stack/list_deps.py'
+      - 'src/llama_stack/cli/stack/_list_deps.py'
+      - 'src/llama_stack/core/build.*'
+      - 'src/llama_stack/core/*.sh'
       - '.github/workflows/providers-list-deps.yml'
-      - 'llama_stack/templates/**'
+      - 'src/llama_stack/templates/**'
       - 'pyproject.toml'
 
   pull_request:
     paths:
-      - 'llama_stack/cli/stack/list_deps.py'
-      - 'llama_stack/cli/stack/_list_deps.py'
-      - 'llama_stack/core/build.*'
-      - 'llama_stack/core/*.sh'
+      - 'src/llama_stack/cli/stack/list_deps.py'
+      - 'src/llama_stack/cli/stack/_list_deps.py'
+      - 'src/llama_stack/core/build.*'
+      - 'src/llama_stack/core/*.sh'
       - '.github/workflows/providers-list-deps.yml'
-      - 'llama_stack/templates/**'
+      - 'src/llama_stack/templates/**'
       - 'pyproject.toml'
 
 concurrency:
@@ -41,7 +41,7 @@ jobs:
       - name: Generate Distribution List
         id: set-matrix
         run: |
-          distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
+          distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
           echo "distros=$distros" >> "$GITHUB_OUTPUT"
 
   list-deps:
@@ -102,4 +102,4 @@ jobs:
           USE_COPY_NOT_MOUNT: "true"
           LLAMA_STACK_DIR: "."
         run: |
-          uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
+          uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml
diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index e36ea8780..49caea6b3 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -10,7 +10,7 @@ on:
     branches:
       - main
     paths-ignore:
-        - 'llama_stack/ui/**'
+        - 'src/llama_stack/ui/**'
 
 jobs:
   build:
diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml
index ded29dc8f..39f2356aa 100644
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/**'
+      - 'src/llama_stack/**'
       - 'tests/integration/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml
index 19cc2057d..b31e6ac55 100644
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/**'
+      - 'src/llama_stack/**'
       - '!llama_stack/ui/**'
       - 'tests/integration/**'
       - 'uv.lock'
diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml
index e8f318b8e..a2ae1c2c3 100644
--- a/.github/workflows/ui-unit-tests.yml
+++ b/.github/workflows/ui-unit-tests.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/ui/**'
+      - 'src/llama_stack/ui/**'
       - '.github/workflows/ui-unit-tests.yml' # This workflow
   workflow_dispatch:
 
@@ -33,22 +33,22 @@ jobs:
         with:
           node-version: ${{ matrix.node-version }}
           cache: 'npm'
-          cache-dependency-path: 'llama_stack/ui/package-lock.json'
+          cache-dependency-path: 'src/llama_stack/ui/package-lock.json'
 
       - name: Install dependencies
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
         run: npm ci
 
       - name: Run linting
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
         run: npm run lint
 
       - name: Run format check
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
         run: npm run format:check
 
       - name: Run unit tests
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
         env:
           CI: true
 
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 4c3b68624..61c11b0be 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/**'
+      - 'src/llama_stack/**'
       - '!llama_stack/ui/**'
       - 'tests/unit/**'
       - 'uv.lock'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b7880a9fc..1aac22f6c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
     hooks:
     -   id: ruff
         args: [ --fix ]
-        exclude: ^llama_stack/strong_typing/.*$
+        exclude: ^src/llama_stack/strong_typing/.*$
     -   id: ruff-format
 
 -   repo: https://github.com/adamchainz/blacken-docs
@@ -86,7 +86,7 @@ repos:
         language: python
         pass_filenames: false
         require_serial: true
-        files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
+        files: ^src/llama_stack/distributions/.*$|^src/llama_stack/providers/.*/inference/.*/models\.py$
       - id: provider-codegen
         name: Provider Codegen
         additional_dependencies:
@@ -95,7 +95,7 @@ repos:
         language: python
         pass_filenames: false
         require_serial: true
-        files: ^llama_stack/providers/.*$
+        files: ^src/llama_stack/providers/.*$
       - id: openapi-codegen
         name: API Spec Codegen
         additional_dependencies:
@@ -104,7 +104,7 @@ repos:
         language: python
         pass_filenames: false
         require_serial: true
-        files: ^llama_stack/apis/|^docs/openapi_generator/
+        files: ^src/llama_stack/apis/|^docs/openapi_generator/
       - id: check-workflows-use-hashes
         name: Check GitHub Actions use SHA-pinned actions
         entry: ./scripts/check-workflows-use-hashes.sh
@@ -120,7 +120,7 @@ repos:
         pass_filenames: false
         require_serial: true
         always_run: true
-        files: ^llama_stack/.*$
+        files: ^src/llama_stack/.*$
       - id: forbid-pytest-asyncio
         name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture
         entry: bash
@@ -150,7 +150,7 @@ repos:
         name: Format & Lint UI
         entry: bash ./scripts/run-ui-linter.sh
         language: system
-        files: ^llama_stack/ui/.*\.(ts|tsx)$
+        files: ^src/llama_stack/ui/.*\.(ts|tsx)$
         pass_filenames: false
         require_serial: true
 
diff --git a/MANIFEST.in b/MANIFEST.in
index b10795c92..09206f2fb 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,11 +1,11 @@
 include pyproject.toml
-include llama_stack/models/llama/llama3/tokenizer.model
-include llama_stack/models/llama/llama4/tokenizer.model
-include llama_stack/core/*.sh
-include llama_stack/cli/scripts/*.sh
-include llama_stack/distributions/*/*.yaml
-exclude llama_stack/distributions/ci-tests
+include src/llama_stack/models/llama/llama3/tokenizer.model
+include src/llama_stack/models/llama/llama4/tokenizer.model
+include src/llama_stack/core/*.sh
+include src/llama_stack/cli/scripts/*.sh
+include src/llama_stack/distributions/*/*.yaml
+exclude src/llama_stack/distributions/ci-tests
 include tests/integration/test_cases/inference/*.json
-include llama_stack/models/llama/*/*.md
-include llama_stack/tests/integration/*.jpg
-prune llama_stack/distributions/ci-tests
+include src/llama_stack/models/llama/*/*.md
+include src/llama_stack/tests/integration/*.jpg
+prune src/llama_stack/distributions/ci-tests
diff --git a/pyproject.toml b/pyproject.toml
index 741dd17e5..9b26f7ae8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -150,7 +150,7 @@ llama = "llama_stack.cli.llama:main"
 install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
 
 [tool.setuptools.packages.find]
-where = ["."]
+where = ["src"]
 include = ["llama_stack", "llama_stack.*"]
 
 [[tool.uv.index]]
@@ -217,17 +217,17 @@ unfixable = [
 # Ignore the following errors for the following files
 [tool.ruff.lint.per-file-ignores]
 "tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
-"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
-"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
+"src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
+"src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
     "RUF001",
     "PLE2515",
 ]
-"llama_stack/apis/**/__init__.py" = [
+"src/llama_stack/apis/**/__init__.py" = [
     "F403",
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
-mypy_path = ["llama_stack"]
+mypy_path = ["src"]
 packages = ["llama_stack"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
@@ -239,77 +239,77 @@ follow_imports = "silent"
 # to exclude the entire directory.
 exclude = [
     # As we fix more and more of these, we should remove them from the list
-    "^llama_stack.core/build\\.py$",
-    "^llama_stack.core/client\\.py$",
-    "^llama_stack.core/request_headers\\.py$",
-    "^llama_stack.core/routers/",
-    "^llama_stack.core/routing_tables/",
-    "^llama_stack.core/server/endpoints\\.py$",
-    "^llama_stack.core/server/server\\.py$",
-    "^llama_stack.core/stack\\.py$",
-    "^llama_stack.core/store/registry\\.py$",
-    "^llama_stack.core/utils/exec\\.py$",
-    "^llama_stack.core/utils/prompt_for_config\\.py$",
-    "^llama_stack/models/llama/llama3/interface\\.py$",
-    "^llama_stack/models/llama/llama3/tokenizer\\.py$",
-    "^llama_stack/models/llama/llama3/tool_utils\\.py$",
-    "^llama_stack/providers/inline/agents/meta_reference/",
-    "^llama_stack/providers/inline/datasetio/localfs/",
-    "^llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
-    "^llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
-    "^llama_stack/models/llama/llama3/generation\\.py$",
-    "^llama_stack/models/llama/llama3/multimodal/model\\.py$",
-    "^llama_stack/models/llama/llama4/",
-    "^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
-    "^llama_stack/providers/inline/post_training/common/validator\\.py$",
-    "^llama_stack/providers/inline/safety/code_scanner/",
-    "^llama_stack/providers/inline/safety/llama_guard/",
-    "^llama_stack/providers/inline/scoring/basic/",
-    "^llama_stack/providers/inline/scoring/braintrust/",
-    "^llama_stack/providers/inline/scoring/llm_as_judge/",
-    "^llama_stack/providers/remote/agents/sample/",
-    "^llama_stack/providers/remote/datasetio/huggingface/",
-    "^llama_stack/providers/remote/datasetio/nvidia/",
-    "^llama_stack/providers/remote/inference/bedrock/",
-    "^llama_stack/providers/remote/inference/nvidia/",
-    "^llama_stack/providers/remote/inference/passthrough/",
-    "^llama_stack/providers/remote/inference/runpod/",
-    "^llama_stack/providers/remote/inference/tgi/",
-    "^llama_stack/providers/remote/inference/watsonx/",
-    "^llama_stack/providers/remote/safety/bedrock/",
-    "^llama_stack/providers/remote/safety/nvidia/",
-    "^llama_stack/providers/remote/safety/sambanova/",
-    "^llama_stack/providers/remote/safety/sample/",
-    "^llama_stack/providers/remote/tool_runtime/bing_search/",
-    "^llama_stack/providers/remote/tool_runtime/brave_search/",
-    "^llama_stack/providers/remote/tool_runtime/model_context_protocol/",
-    "^llama_stack/providers/remote/tool_runtime/tavily_search/",
-    "^llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
-    "^llama_stack/providers/remote/post_training/nvidia/",
-    "^llama_stack/providers/remote/vector_io/chroma/",
-    "^llama_stack/providers/remote/vector_io/milvus/",
-    "^llama_stack/providers/remote/vector_io/pgvector/",
-    "^llama_stack/providers/remote/vector_io/qdrant/",
-    "^llama_stack/providers/remote/vector_io/sample/",
-    "^llama_stack/providers/remote/vector_io/weaviate/",
-    "^llama_stack/providers/utils/bedrock/client\\.py$",
-    "^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
-    "^llama_stack/providers/utils/inference/embedding_mixin\\.py$",
-    "^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
-    "^llama_stack/providers/utils/inference/model_registry\\.py$",
-    "^llama_stack/providers/utils/inference/openai_compat\\.py$",
-    "^llama_stack/providers/utils/inference/prompt_adapter\\.py$",
-    "^llama_stack/providers/utils/kvstore/kvstore\\.py$",
-    "^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
-    "^llama_stack/providers/utils/kvstore/redis/redis\\.py$",
-    "^llama_stack/providers/utils/memory/vector_store\\.py$",
-    "^llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
-    "^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
-    "^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
-    "^llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
-    "^llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^llama_stack/strong_typing/auxiliary\\.py$",
-    "^llama_stack/distributions/template\\.py$",
+    "^src/llama_stack/core/build\\.py$",
+    "^src/llama_stack/core/client\\.py$",
+    "^src/llama_stack/core/request_headers\\.py$",
+    "^src/llama_stack/core/routers/",
+    "^src/llama_stack/core/routing_tables/",
+    "^src/llama_stack/core/server/endpoints\\.py$",
+    "^src/llama_stack/core/server/server\\.py$",
+    "^src/llama_stack/core/stack\\.py$",
+    "^src/llama_stack/core/store/registry\\.py$",
+    "^src/llama_stack/core/utils/exec\\.py$",
+    "^src/llama_stack/core/utils/prompt_for_config\\.py$",
+    "^src/llama_stack/models/llama/llama3/interface\\.py$",
+    "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
+    "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
+    "^src/llama_stack/providers/inline/agents/meta_reference/",
+    "^src/llama_stack/providers/inline/datasetio/localfs/",
+    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
+    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
+    "^src/llama_stack/models/llama/llama3/generation\\.py$",
+    "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
+    "^src/llama_stack/models/llama/llama4/",
+    "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
+    "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
+    "^src/llama_stack/providers/inline/safety/code_scanner/",
+    "^src/llama_stack/providers/inline/safety/llama_guard/",
+    "^src/llama_stack/providers/inline/scoring/basic/",
+    "^src/llama_stack/providers/inline/scoring/braintrust/",
+    "^src/llama_stack/providers/inline/scoring/llm_as_judge/",
+    "^src/llama_stack/providers/remote/agents/sample/",
+    "^src/llama_stack/providers/remote/datasetio/huggingface/",
+    "^src/llama_stack/providers/remote/datasetio/nvidia/",
+    "^src/llama_stack/providers/remote/inference/bedrock/",
+    "^src/llama_stack/providers/remote/inference/nvidia/",
+    "^src/llama_stack/providers/remote/inference/passthrough/",
+    "^src/llama_stack/providers/remote/inference/runpod/",
+    "^src/llama_stack/providers/remote/inference/tgi/",
+    "^src/llama_stack/providers/remote/inference/watsonx/",
+    "^src/llama_stack/providers/remote/safety/bedrock/",
+    "^src/llama_stack/providers/remote/safety/nvidia/",
+    "^src/llama_stack/providers/remote/safety/sambanova/",
+    "^src/llama_stack/providers/remote/safety/sample/",
+    "^src/llama_stack/providers/remote/tool_runtime/bing_search/",
+    "^src/llama_stack/providers/remote/tool_runtime/brave_search/",
+    "^src/llama_stack/providers/remote/tool_runtime/model_context_protocol/",
+    "^src/llama_stack/providers/remote/tool_runtime/tavily_search/",
+    "^src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
+    "^src/llama_stack/providers/remote/post_training/nvidia/",
+    "^src/llama_stack/providers/remote/vector_io/chroma/",
+    "^src/llama_stack/providers/remote/vector_io/milvus/",
+    "^src/llama_stack/providers/remote/vector_io/pgvector/",
+    "^src/llama_stack/providers/remote/vector_io/qdrant/",
+    "^src/llama_stack/providers/remote/vector_io/sample/",
+    "^src/llama_stack/providers/remote/vector_io/weaviate/",
+    "^src/llama_stack/providers/utils/bedrock/client\\.py$",
+    "^src/llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
+    "^src/llama_stack/providers/utils/inference/embedding_mixin\\.py$",
+    "^src/llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
+    "^src/llama_stack/providers/utils/inference/model_registry\\.py$",
+    "^src/llama_stack/providers/utils/inference/openai_compat\\.py$",
+    "^src/llama_stack/providers/utils/inference/prompt_adapter\\.py$",
+    "^src/llama_stack/providers/utils/kvstore/kvstore\\.py$",
+    "^src/llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
+    "^src/llama_stack/providers/utils/kvstore/redis/redis\\.py$",
+    "^src/llama_stack/providers/utils/memory/vector_store\\.py$",
+    "^src/llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
+    "^src/llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
+    "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
+    "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
+    "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
+    "^src/llama_stack/strong_typing/auxiliary\\.py$",
+    "^src/llama_stack/distributions/template\\.py$",
 ]
 
 [[tool.mypy.overrides]]
diff --git a/scripts/check-init-py.sh b/scripts/check-init-py.sh
index c6e8fd417..76b3ed8e2 100755
--- a/scripts/check-init-py.sh
+++ b/scripts/check-init-py.sh
@@ -16,7 +16,7 @@ if (( BASH_VERSINFO[0] < 4 )); then
     exit 1
 fi
 
-PACKAGE_DIR="${1:-llama_stack}"
+PACKAGE_DIR="${1:-src/llama_stack}"
 
 if [ ! -d "$PACKAGE_DIR" ]; then
     echo "ERROR: Package directory '$PACKAGE_DIR' does not exist"
diff --git a/scripts/distro_codegen.py b/scripts/distro_codegen.py
index ff5025b78..68190c7f5 100755
--- a/scripts/distro_codegen.py
+++ b/scripts/distro_codegen.py
@@ -93,7 +93,7 @@ def pre_import_distros(distro_dirs: list[Path]) -> None:
 
 
 def main():
-    distros_dir = REPO_ROOT / "llama_stack" / "distributions"
+    distros_dir = REPO_ROOT / "src" / "llama_stack" / "distributions"
     change_tracker = ChangedPathTracker()
 
     with Progress(
diff --git a/scripts/run-ui-linter.sh b/scripts/run-ui-linter.sh
index 3ced4483b..b63c44e7a 100755
--- a/scripts/run-ui-linter.sh
+++ b/scripts/run-ui-linter.sh
@@ -6,7 +6,7 @@
 # the root directory of this source tree.
 
 set -e
-cd llama_stack/ui
+cd src/llama_stack/ui
 
 if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then
   echo "UI dependencies not installed, skipping prettier/linter check"
diff --git a/llama_stack/__init__.py b/src/llama_stack/__init__.py
similarity index 100%
rename from llama_stack/__init__.py
rename to src/llama_stack/__init__.py
diff --git a/llama_stack/apis/__init__.py b/src/llama_stack/apis/__init__.py
similarity index 100%
rename from llama_stack/apis/__init__.py
rename to src/llama_stack/apis/__init__.py
diff --git a/llama_stack/apis/agents/__init__.py b/src/llama_stack/apis/agents/__init__.py
similarity index 100%
rename from llama_stack/apis/agents/__init__.py
rename to src/llama_stack/apis/agents/__init__.py
diff --git a/llama_stack/apis/agents/agents.py b/src/llama_stack/apis/agents/agents.py
similarity index 100%
rename from llama_stack/apis/agents/agents.py
rename to src/llama_stack/apis/agents/agents.py
diff --git a/llama_stack/apis/agents/openai_responses.py b/src/llama_stack/apis/agents/openai_responses.py
similarity index 100%
rename from llama_stack/apis/agents/openai_responses.py
rename to src/llama_stack/apis/agents/openai_responses.py
diff --git a/llama_stack/apis/batches/__init__.py b/src/llama_stack/apis/batches/__init__.py
similarity index 100%
rename from llama_stack/apis/batches/__init__.py
rename to src/llama_stack/apis/batches/__init__.py
diff --git a/llama_stack/apis/batches/batches.py b/src/llama_stack/apis/batches/batches.py
similarity index 100%
rename from llama_stack/apis/batches/batches.py
rename to src/llama_stack/apis/batches/batches.py
diff --git a/llama_stack/apis/benchmarks/__init__.py b/src/llama_stack/apis/benchmarks/__init__.py
similarity index 100%
rename from llama_stack/apis/benchmarks/__init__.py
rename to src/llama_stack/apis/benchmarks/__init__.py
diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/src/llama_stack/apis/benchmarks/benchmarks.py
similarity index 100%
rename from llama_stack/apis/benchmarks/benchmarks.py
rename to src/llama_stack/apis/benchmarks/benchmarks.py
diff --git a/llama_stack/apis/common/__init__.py b/src/llama_stack/apis/common/__init__.py
similarity index 100%
rename from llama_stack/apis/common/__init__.py
rename to src/llama_stack/apis/common/__init__.py
diff --git a/llama_stack/apis/common/content_types.py b/src/llama_stack/apis/common/content_types.py
similarity index 100%
rename from llama_stack/apis/common/content_types.py
rename to src/llama_stack/apis/common/content_types.py
diff --git a/llama_stack/apis/common/errors.py b/src/llama_stack/apis/common/errors.py
similarity index 100%
rename from llama_stack/apis/common/errors.py
rename to src/llama_stack/apis/common/errors.py
diff --git a/llama_stack/apis/common/job_types.py b/src/llama_stack/apis/common/job_types.py
similarity index 100%
rename from llama_stack/apis/common/job_types.py
rename to src/llama_stack/apis/common/job_types.py
diff --git a/llama_stack/apis/common/responses.py b/src/llama_stack/apis/common/responses.py
similarity index 100%
rename from llama_stack/apis/common/responses.py
rename to src/llama_stack/apis/common/responses.py
diff --git a/llama_stack/apis/common/training_types.py b/src/llama_stack/apis/common/training_types.py
similarity index 100%
rename from llama_stack/apis/common/training_types.py
rename to src/llama_stack/apis/common/training_types.py
diff --git a/llama_stack/apis/common/type_system.py b/src/llama_stack/apis/common/type_system.py
similarity index 100%
rename from llama_stack/apis/common/type_system.py
rename to src/llama_stack/apis/common/type_system.py
diff --git a/llama_stack/apis/conversations/__init__.py b/src/llama_stack/apis/conversations/__init__.py
similarity index 100%
rename from llama_stack/apis/conversations/__init__.py
rename to src/llama_stack/apis/conversations/__init__.py
diff --git a/llama_stack/apis/conversations/conversations.py b/src/llama_stack/apis/conversations/conversations.py
similarity index 100%
rename from llama_stack/apis/conversations/conversations.py
rename to src/llama_stack/apis/conversations/conversations.py
diff --git a/llama_stack/apis/datasetio/__init__.py b/src/llama_stack/apis/datasetio/__init__.py
similarity index 100%
rename from llama_stack/apis/datasetio/__init__.py
rename to src/llama_stack/apis/datasetio/__init__.py
diff --git a/llama_stack/apis/datasetio/datasetio.py b/src/llama_stack/apis/datasetio/datasetio.py
similarity index 100%
rename from llama_stack/apis/datasetio/datasetio.py
rename to src/llama_stack/apis/datasetio/datasetio.py
diff --git a/llama_stack/apis/datasets/__init__.py b/src/llama_stack/apis/datasets/__init__.py
similarity index 100%
rename from llama_stack/apis/datasets/__init__.py
rename to src/llama_stack/apis/datasets/__init__.py
diff --git a/llama_stack/apis/datasets/datasets.py b/src/llama_stack/apis/datasets/datasets.py
similarity index 100%
rename from llama_stack/apis/datasets/datasets.py
rename to src/llama_stack/apis/datasets/datasets.py
diff --git a/llama_stack/apis/datatypes.py b/src/llama_stack/apis/datatypes.py
similarity index 100%
rename from llama_stack/apis/datatypes.py
rename to src/llama_stack/apis/datatypes.py
diff --git a/llama_stack/apis/eval/__init__.py b/src/llama_stack/apis/eval/__init__.py
similarity index 100%
rename from llama_stack/apis/eval/__init__.py
rename to src/llama_stack/apis/eval/__init__.py
diff --git a/llama_stack/apis/eval/eval.py b/src/llama_stack/apis/eval/eval.py
similarity index 100%
rename from llama_stack/apis/eval/eval.py
rename to src/llama_stack/apis/eval/eval.py
diff --git a/llama_stack/apis/files/__init__.py b/src/llama_stack/apis/files/__init__.py
similarity index 100%
rename from llama_stack/apis/files/__init__.py
rename to src/llama_stack/apis/files/__init__.py
diff --git a/llama_stack/apis/files/files.py b/src/llama_stack/apis/files/files.py
similarity index 100%
rename from llama_stack/apis/files/files.py
rename to src/llama_stack/apis/files/files.py
diff --git a/llama_stack/apis/inference/__init__.py b/src/llama_stack/apis/inference/__init__.py
similarity index 100%
rename from llama_stack/apis/inference/__init__.py
rename to src/llama_stack/apis/inference/__init__.py
diff --git a/llama_stack/apis/inference/event_logger.py b/src/llama_stack/apis/inference/event_logger.py
similarity index 100%
rename from llama_stack/apis/inference/event_logger.py
rename to src/llama_stack/apis/inference/event_logger.py
diff --git a/llama_stack/apis/inference/inference.py b/src/llama_stack/apis/inference/inference.py
similarity index 100%
rename from llama_stack/apis/inference/inference.py
rename to src/llama_stack/apis/inference/inference.py
diff --git a/llama_stack/apis/inspect/__init__.py b/src/llama_stack/apis/inspect/__init__.py
similarity index 100%
rename from llama_stack/apis/inspect/__init__.py
rename to src/llama_stack/apis/inspect/__init__.py
diff --git a/llama_stack/apis/inspect/inspect.py b/src/llama_stack/apis/inspect/inspect.py
similarity index 100%
rename from llama_stack/apis/inspect/inspect.py
rename to src/llama_stack/apis/inspect/inspect.py
diff --git a/llama_stack/apis/models/__init__.py b/src/llama_stack/apis/models/__init__.py
similarity index 100%
rename from llama_stack/apis/models/__init__.py
rename to src/llama_stack/apis/models/__init__.py
diff --git a/llama_stack/apis/models/models.py b/src/llama_stack/apis/models/models.py
similarity index 100%
rename from llama_stack/apis/models/models.py
rename to src/llama_stack/apis/models/models.py
diff --git a/llama_stack/apis/post_training/__init__.py b/src/llama_stack/apis/post_training/__init__.py
similarity index 100%
rename from llama_stack/apis/post_training/__init__.py
rename to src/llama_stack/apis/post_training/__init__.py
diff --git a/llama_stack/apis/post_training/post_training.py b/src/llama_stack/apis/post_training/post_training.py
similarity index 100%
rename from llama_stack/apis/post_training/post_training.py
rename to src/llama_stack/apis/post_training/post_training.py
diff --git a/llama_stack/apis/prompts/__init__.py b/src/llama_stack/apis/prompts/__init__.py
similarity index 100%
rename from llama_stack/apis/prompts/__init__.py
rename to src/llama_stack/apis/prompts/__init__.py
diff --git a/llama_stack/apis/prompts/prompts.py b/src/llama_stack/apis/prompts/prompts.py
similarity index 100%
rename from llama_stack/apis/prompts/prompts.py
rename to src/llama_stack/apis/prompts/prompts.py
diff --git a/llama_stack/apis/providers/__init__.py b/src/llama_stack/apis/providers/__init__.py
similarity index 100%
rename from llama_stack/apis/providers/__init__.py
rename to src/llama_stack/apis/providers/__init__.py
diff --git a/llama_stack/apis/providers/providers.py b/src/llama_stack/apis/providers/providers.py
similarity index 100%
rename from llama_stack/apis/providers/providers.py
rename to src/llama_stack/apis/providers/providers.py
diff --git a/llama_stack/apis/resource.py b/src/llama_stack/apis/resource.py
similarity index 100%
rename from llama_stack/apis/resource.py
rename to src/llama_stack/apis/resource.py
diff --git a/llama_stack/apis/safety/__init__.py b/src/llama_stack/apis/safety/__init__.py
similarity index 100%
rename from llama_stack/apis/safety/__init__.py
rename to src/llama_stack/apis/safety/__init__.py
diff --git a/llama_stack/apis/safety/safety.py b/src/llama_stack/apis/safety/safety.py
similarity index 100%
rename from llama_stack/apis/safety/safety.py
rename to src/llama_stack/apis/safety/safety.py
diff --git a/llama_stack/apis/scoring/__init__.py b/src/llama_stack/apis/scoring/__init__.py
similarity index 100%
rename from llama_stack/apis/scoring/__init__.py
rename to src/llama_stack/apis/scoring/__init__.py
diff --git a/llama_stack/apis/scoring/scoring.py b/src/llama_stack/apis/scoring/scoring.py
similarity index 100%
rename from llama_stack/apis/scoring/scoring.py
rename to src/llama_stack/apis/scoring/scoring.py
diff --git a/llama_stack/apis/scoring_functions/__init__.py b/src/llama_stack/apis/scoring_functions/__init__.py
similarity index 100%
rename from llama_stack/apis/scoring_functions/__init__.py
rename to src/llama_stack/apis/scoring_functions/__init__.py
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/src/llama_stack/apis/scoring_functions/scoring_functions.py
similarity index 100%
rename from llama_stack/apis/scoring_functions/scoring_functions.py
rename to src/llama_stack/apis/scoring_functions/scoring_functions.py
diff --git a/llama_stack/apis/shields/__init__.py b/src/llama_stack/apis/shields/__init__.py
similarity index 100%
rename from llama_stack/apis/shields/__init__.py
rename to src/llama_stack/apis/shields/__init__.py
diff --git a/llama_stack/apis/shields/shields.py b/src/llama_stack/apis/shields/shields.py
similarity index 100%
rename from llama_stack/apis/shields/shields.py
rename to src/llama_stack/apis/shields/shields.py
diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/src/llama_stack/apis/synthetic_data_generation/__init__.py
similarity index 100%
rename from llama_stack/apis/synthetic_data_generation/__init__.py
rename to src/llama_stack/apis/synthetic_data_generation/__init__.py
diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/src/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
similarity index 100%
rename from llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
rename to src/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
diff --git a/llama_stack/apis/telemetry/__init__.py b/src/llama_stack/apis/telemetry/__init__.py
similarity index 100%
rename from llama_stack/apis/telemetry/__init__.py
rename to src/llama_stack/apis/telemetry/__init__.py
diff --git a/llama_stack/apis/telemetry/telemetry.py b/src/llama_stack/apis/telemetry/telemetry.py
similarity index 100%
rename from llama_stack/apis/telemetry/telemetry.py
rename to src/llama_stack/apis/telemetry/telemetry.py
diff --git a/llama_stack/apis/tools/__init__.py b/src/llama_stack/apis/tools/__init__.py
similarity index 100%
rename from llama_stack/apis/tools/__init__.py
rename to src/llama_stack/apis/tools/__init__.py
diff --git a/llama_stack/apis/tools/rag_tool.py b/src/llama_stack/apis/tools/rag_tool.py
similarity index 100%
rename from llama_stack/apis/tools/rag_tool.py
rename to src/llama_stack/apis/tools/rag_tool.py
diff --git a/llama_stack/apis/tools/tools.py b/src/llama_stack/apis/tools/tools.py
similarity index 100%
rename from llama_stack/apis/tools/tools.py
rename to src/llama_stack/apis/tools/tools.py
diff --git a/llama_stack/apis/vector_io/__init__.py b/src/llama_stack/apis/vector_io/__init__.py
similarity index 100%
rename from llama_stack/apis/vector_io/__init__.py
rename to src/llama_stack/apis/vector_io/__init__.py
diff --git a/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py
similarity index 100%
rename from llama_stack/apis/vector_io/vector_io.py
rename to src/llama_stack/apis/vector_io/vector_io.py
diff --git a/llama_stack/apis/vector_stores/__init__.py b/src/llama_stack/apis/vector_stores/__init__.py
similarity index 100%
rename from llama_stack/apis/vector_stores/__init__.py
rename to src/llama_stack/apis/vector_stores/__init__.py
diff --git a/llama_stack/apis/vector_stores/vector_stores.py b/src/llama_stack/apis/vector_stores/vector_stores.py
similarity index 100%
rename from llama_stack/apis/vector_stores/vector_stores.py
rename to src/llama_stack/apis/vector_stores/vector_stores.py
diff --git a/llama_stack/apis/version.py b/src/llama_stack/apis/version.py
similarity index 100%
rename from llama_stack/apis/version.py
rename to src/llama_stack/apis/version.py
diff --git a/llama_stack/cli/__init__.py b/src/llama_stack/cli/__init__.py
similarity index 100%
rename from llama_stack/cli/__init__.py
rename to src/llama_stack/cli/__init__.py
diff --git a/llama_stack/cli/llama.py b/src/llama_stack/cli/llama.py
similarity index 100%
rename from llama_stack/cli/llama.py
rename to src/llama_stack/cli/llama.py
diff --git a/llama_stack/cli/scripts/__init__.py b/src/llama_stack/cli/scripts/__init__.py
similarity index 100%
rename from llama_stack/cli/scripts/__init__.py
rename to src/llama_stack/cli/scripts/__init__.py
diff --git a/llama_stack/cli/scripts/install-wheel-from-presigned.sh b/src/llama_stack/cli/scripts/install-wheel-from-presigned.sh
similarity index 100%
rename from llama_stack/cli/scripts/install-wheel-from-presigned.sh
rename to src/llama_stack/cli/scripts/install-wheel-from-presigned.sh
diff --git a/llama_stack/cli/scripts/run.py b/src/llama_stack/cli/scripts/run.py
similarity index 100%
rename from llama_stack/cli/scripts/run.py
rename to src/llama_stack/cli/scripts/run.py
diff --git a/llama_stack/cli/stack/__init__.py b/src/llama_stack/cli/stack/__init__.py
similarity index 100%
rename from llama_stack/cli/stack/__init__.py
rename to src/llama_stack/cli/stack/__init__.py
diff --git a/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
similarity index 100%
rename from llama_stack/cli/stack/_list_deps.py
rename to src/llama_stack/cli/stack/_list_deps.py
diff --git a/llama_stack/cli/stack/list_apis.py b/src/llama_stack/cli/stack/list_apis.py
similarity index 100%
rename from llama_stack/cli/stack/list_apis.py
rename to src/llama_stack/cli/stack/list_apis.py
diff --git a/llama_stack/cli/stack/list_deps.py b/src/llama_stack/cli/stack/list_deps.py
similarity index 100%
rename from llama_stack/cli/stack/list_deps.py
rename to src/llama_stack/cli/stack/list_deps.py
diff --git a/llama_stack/cli/stack/list_providers.py b/src/llama_stack/cli/stack/list_providers.py
similarity index 100%
rename from llama_stack/cli/stack/list_providers.py
rename to src/llama_stack/cli/stack/list_providers.py
diff --git a/llama_stack/cli/stack/list_stacks.py b/src/llama_stack/cli/stack/list_stacks.py
similarity index 100%
rename from llama_stack/cli/stack/list_stacks.py
rename to src/llama_stack/cli/stack/list_stacks.py
diff --git a/llama_stack/cli/stack/remove.py b/src/llama_stack/cli/stack/remove.py
similarity index 100%
rename from llama_stack/cli/stack/remove.py
rename to src/llama_stack/cli/stack/remove.py
diff --git a/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py
similarity index 100%
rename from llama_stack/cli/stack/run.py
rename to src/llama_stack/cli/stack/run.py
diff --git a/llama_stack/cli/stack/stack.py b/src/llama_stack/cli/stack/stack.py
similarity index 100%
rename from llama_stack/cli/stack/stack.py
rename to src/llama_stack/cli/stack/stack.py
diff --git a/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
similarity index 100%
rename from llama_stack/cli/stack/utils.py
rename to src/llama_stack/cli/stack/utils.py
diff --git a/llama_stack/cli/subcommand.py b/src/llama_stack/cli/subcommand.py
similarity index 100%
rename from llama_stack/cli/subcommand.py
rename to src/llama_stack/cli/subcommand.py
diff --git a/llama_stack/cli/table.py b/src/llama_stack/cli/table.py
similarity index 100%
rename from llama_stack/cli/table.py
rename to src/llama_stack/cli/table.py
diff --git a/llama_stack/cli/utils.py b/src/llama_stack/cli/utils.py
similarity index 100%
rename from llama_stack/cli/utils.py
rename to src/llama_stack/cli/utils.py
diff --git a/llama_stack/core/__init__.py b/src/llama_stack/core/__init__.py
similarity index 100%
rename from llama_stack/core/__init__.py
rename to src/llama_stack/core/__init__.py
diff --git a/llama_stack/core/access_control/__init__.py b/src/llama_stack/core/access_control/__init__.py
similarity index 100%
rename from llama_stack/core/access_control/__init__.py
rename to src/llama_stack/core/access_control/__init__.py
diff --git a/llama_stack/core/access_control/access_control.py b/src/llama_stack/core/access_control/access_control.py
similarity index 100%
rename from llama_stack/core/access_control/access_control.py
rename to src/llama_stack/core/access_control/access_control.py
diff --git a/llama_stack/core/access_control/conditions.py b/src/llama_stack/core/access_control/conditions.py
similarity index 100%
rename from llama_stack/core/access_control/conditions.py
rename to src/llama_stack/core/access_control/conditions.py
diff --git a/llama_stack/core/access_control/datatypes.py b/src/llama_stack/core/access_control/datatypes.py
similarity index 100%
rename from llama_stack/core/access_control/datatypes.py
rename to src/llama_stack/core/access_control/datatypes.py
diff --git a/llama_stack/core/build.py b/src/llama_stack/core/build.py
similarity index 100%
rename from llama_stack/core/build.py
rename to src/llama_stack/core/build.py
diff --git a/llama_stack/core/client.py b/src/llama_stack/core/client.py
similarity index 100%
rename from llama_stack/core/client.py
rename to src/llama_stack/core/client.py
diff --git a/llama_stack/core/common.sh b/src/llama_stack/core/common.sh
similarity index 100%
rename from llama_stack/core/common.sh
rename to src/llama_stack/core/common.sh
diff --git a/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
similarity index 100%
rename from llama_stack/core/configure.py
rename to src/llama_stack/core/configure.py
diff --git a/llama_stack/core/conversations/__init__.py b/src/llama_stack/core/conversations/__init__.py
similarity index 100%
rename from llama_stack/core/conversations/__init__.py
rename to src/llama_stack/core/conversations/__init__.py
diff --git a/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
similarity index 100%
rename from llama_stack/core/conversations/conversations.py
rename to src/llama_stack/core/conversations/conversations.py
diff --git a/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
similarity index 100%
rename from llama_stack/core/datatypes.py
rename to src/llama_stack/core/datatypes.py
diff --git a/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
similarity index 100%
rename from llama_stack/core/distribution.py
rename to src/llama_stack/core/distribution.py
diff --git a/llama_stack/core/external.py b/src/llama_stack/core/external.py
similarity index 100%
rename from llama_stack/core/external.py
rename to src/llama_stack/core/external.py
diff --git a/llama_stack/core/id_generation.py b/src/llama_stack/core/id_generation.py
similarity index 100%
rename from llama_stack/core/id_generation.py
rename to src/llama_stack/core/id_generation.py
diff --git a/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
similarity index 100%
rename from llama_stack/core/inspect.py
rename to src/llama_stack/core/inspect.py
diff --git a/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
similarity index 100%
rename from llama_stack/core/library_client.py
rename to src/llama_stack/core/library_client.py
diff --git a/llama_stack/core/prompts/__init__.py b/src/llama_stack/core/prompts/__init__.py
similarity index 100%
rename from llama_stack/core/prompts/__init__.py
rename to src/llama_stack/core/prompts/__init__.py
diff --git a/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
similarity index 100%
rename from llama_stack/core/prompts/prompts.py
rename to src/llama_stack/core/prompts/prompts.py
diff --git a/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
similarity index 100%
rename from llama_stack/core/providers.py
rename to src/llama_stack/core/providers.py
diff --git a/llama_stack/core/request_headers.py b/src/llama_stack/core/request_headers.py
similarity index 100%
rename from llama_stack/core/request_headers.py
rename to src/llama_stack/core/request_headers.py
diff --git a/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
similarity index 100%
rename from llama_stack/core/resolver.py
rename to src/llama_stack/core/resolver.py
diff --git a/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
similarity index 100%
rename from llama_stack/core/routers/__init__.py
rename to src/llama_stack/core/routers/__init__.py
diff --git a/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
similarity index 100%
rename from llama_stack/core/routers/datasets.py
rename to src/llama_stack/core/routers/datasets.py
diff --git a/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
similarity index 100%
rename from llama_stack/core/routers/eval_scoring.py
rename to src/llama_stack/core/routers/eval_scoring.py
diff --git a/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
similarity index 100%
rename from llama_stack/core/routers/inference.py
rename to src/llama_stack/core/routers/inference.py
diff --git a/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
similarity index 100%
rename from llama_stack/core/routers/safety.py
rename to src/llama_stack/core/routers/safety.py
diff --git a/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
similarity index 100%
rename from llama_stack/core/routers/tool_runtime.py
rename to src/llama_stack/core/routers/tool_runtime.py
diff --git a/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
similarity index 100%
rename from llama_stack/core/routers/vector_io.py
rename to src/llama_stack/core/routers/vector_io.py
diff --git a/llama_stack/core/routing_tables/__init__.py b/src/llama_stack/core/routing_tables/__init__.py
similarity index 100%
rename from llama_stack/core/routing_tables/__init__.py
rename to src/llama_stack/core/routing_tables/__init__.py
diff --git a/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
similarity index 100%
rename from llama_stack/core/routing_tables/benchmarks.py
rename to src/llama_stack/core/routing_tables/benchmarks.py
diff --git a/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
similarity index 100%
rename from llama_stack/core/routing_tables/common.py
rename to src/llama_stack/core/routing_tables/common.py
diff --git a/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
similarity index 100%
rename from llama_stack/core/routing_tables/datasets.py
rename to src/llama_stack/core/routing_tables/datasets.py
diff --git a/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
similarity index 100%
rename from llama_stack/core/routing_tables/models.py
rename to src/llama_stack/core/routing_tables/models.py
diff --git a/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
similarity index 100%
rename from llama_stack/core/routing_tables/scoring_functions.py
rename to src/llama_stack/core/routing_tables/scoring_functions.py
diff --git a/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
similarity index 100%
rename from llama_stack/core/routing_tables/shields.py
rename to src/llama_stack/core/routing_tables/shields.py
diff --git a/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
similarity index 100%
rename from llama_stack/core/routing_tables/toolgroups.py
rename to src/llama_stack/core/routing_tables/toolgroups.py
diff --git a/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
similarity index 100%
rename from llama_stack/core/routing_tables/vector_stores.py
rename to src/llama_stack/core/routing_tables/vector_stores.py
diff --git a/llama_stack/core/server/__init__.py b/src/llama_stack/core/server/__init__.py
similarity index 100%
rename from llama_stack/core/server/__init__.py
rename to src/llama_stack/core/server/__init__.py
diff --git a/llama_stack/core/server/auth.py b/src/llama_stack/core/server/auth.py
similarity index 100%
rename from llama_stack/core/server/auth.py
rename to src/llama_stack/core/server/auth.py
diff --git a/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
similarity index 100%
rename from llama_stack/core/server/auth_providers.py
rename to src/llama_stack/core/server/auth_providers.py
diff --git a/llama_stack/core/server/quota.py b/src/llama_stack/core/server/quota.py
similarity index 100%
rename from llama_stack/core/server/quota.py
rename to src/llama_stack/core/server/quota.py
diff --git a/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
similarity index 100%
rename from llama_stack/core/server/routes.py
rename to src/llama_stack/core/server/routes.py
diff --git a/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
similarity index 100%
rename from llama_stack/core/server/server.py
rename to src/llama_stack/core/server/server.py
diff --git a/llama_stack/core/server/tracing.py b/src/llama_stack/core/server/tracing.py
similarity index 100%
rename from llama_stack/core/server/tracing.py
rename to src/llama_stack/core/server/tracing.py
diff --git a/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
similarity index 100%
rename from llama_stack/core/stack.py
rename to src/llama_stack/core/stack.py
diff --git a/llama_stack/core/start_stack.sh b/src/llama_stack/core/start_stack.sh
similarity index 100%
rename from llama_stack/core/start_stack.sh
rename to src/llama_stack/core/start_stack.sh
diff --git a/llama_stack/core/storage/__init__.py b/src/llama_stack/core/storage/__init__.py
similarity index 100%
rename from llama_stack/core/storage/__init__.py
rename to src/llama_stack/core/storage/__init__.py
diff --git a/llama_stack/core/storage/datatypes.py b/src/llama_stack/core/storage/datatypes.py
similarity index 100%
rename from llama_stack/core/storage/datatypes.py
rename to src/llama_stack/core/storage/datatypes.py
diff --git a/llama_stack/core/store/__init__.py b/src/llama_stack/core/store/__init__.py
similarity index 100%
rename from llama_stack/core/store/__init__.py
rename to src/llama_stack/core/store/__init__.py
diff --git a/llama_stack/core/store/registry.py b/src/llama_stack/core/store/registry.py
similarity index 100%
rename from llama_stack/core/store/registry.py
rename to src/llama_stack/core/store/registry.py
diff --git a/llama_stack/core/telemetry/__init__.py b/src/llama_stack/core/telemetry/__init__.py
similarity index 100%
rename from llama_stack/core/telemetry/__init__.py
rename to src/llama_stack/core/telemetry/__init__.py
diff --git a/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
similarity index 100%
rename from llama_stack/core/telemetry/telemetry.py
rename to src/llama_stack/core/telemetry/telemetry.py
diff --git a/llama_stack/core/telemetry/trace_protocol.py b/src/llama_stack/core/telemetry/trace_protocol.py
similarity index 100%
rename from llama_stack/core/telemetry/trace_protocol.py
rename to src/llama_stack/core/telemetry/trace_protocol.py
diff --git a/llama_stack/core/telemetry/tracing.py b/src/llama_stack/core/telemetry/tracing.py
similarity index 100%
rename from llama_stack/core/telemetry/tracing.py
rename to src/llama_stack/core/telemetry/tracing.py
diff --git a/llama_stack/core/testing_context.py b/src/llama_stack/core/testing_context.py
similarity index 100%
rename from llama_stack/core/testing_context.py
rename to src/llama_stack/core/testing_context.py
diff --git a/llama_stack/core/ui/Containerfile b/src/llama_stack/core/ui/Containerfile
similarity index 100%
rename from llama_stack/core/ui/Containerfile
rename to src/llama_stack/core/ui/Containerfile
diff --git a/llama_stack/core/ui/README.md b/src/llama_stack/core/ui/README.md
similarity index 100%
rename from llama_stack/core/ui/README.md
rename to src/llama_stack/core/ui/README.md
diff --git a/llama_stack/core/ui/__init__.py b/src/llama_stack/core/ui/__init__.py
similarity index 100%
rename from llama_stack/core/ui/__init__.py
rename to src/llama_stack/core/ui/__init__.py
diff --git a/llama_stack/core/ui/app.py b/src/llama_stack/core/ui/app.py
similarity index 100%
rename from llama_stack/core/ui/app.py
rename to src/llama_stack/core/ui/app.py
diff --git a/llama_stack/core/ui/modules/__init__.py b/src/llama_stack/core/ui/modules/__init__.py
similarity index 100%
rename from llama_stack/core/ui/modules/__init__.py
rename to src/llama_stack/core/ui/modules/__init__.py
diff --git a/llama_stack/core/ui/modules/api.py b/src/llama_stack/core/ui/modules/api.py
similarity index 100%
rename from llama_stack/core/ui/modules/api.py
rename to src/llama_stack/core/ui/modules/api.py
diff --git a/llama_stack/core/ui/modules/utils.py b/src/llama_stack/core/ui/modules/utils.py
similarity index 100%
rename from llama_stack/core/ui/modules/utils.py
rename to src/llama_stack/core/ui/modules/utils.py
diff --git a/llama_stack/core/ui/page/__init__.py b/src/llama_stack/core/ui/page/__init__.py
similarity index 100%
rename from llama_stack/core/ui/page/__init__.py
rename to src/llama_stack/core/ui/page/__init__.py
diff --git a/llama_stack/core/ui/page/distribution/__init__.py b/src/llama_stack/core/ui/page/distribution/__init__.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/__init__.py
rename to src/llama_stack/core/ui/page/distribution/__init__.py
diff --git a/llama_stack/core/ui/page/distribution/datasets.py b/src/llama_stack/core/ui/page/distribution/datasets.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/datasets.py
rename to src/llama_stack/core/ui/page/distribution/datasets.py
diff --git a/llama_stack/core/ui/page/distribution/eval_tasks.py b/src/llama_stack/core/ui/page/distribution/eval_tasks.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/eval_tasks.py
rename to src/llama_stack/core/ui/page/distribution/eval_tasks.py
diff --git a/llama_stack/core/ui/page/distribution/models.py b/src/llama_stack/core/ui/page/distribution/models.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/models.py
rename to src/llama_stack/core/ui/page/distribution/models.py
diff --git a/llama_stack/core/ui/page/distribution/providers.py b/src/llama_stack/core/ui/page/distribution/providers.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/providers.py
rename to src/llama_stack/core/ui/page/distribution/providers.py
diff --git a/llama_stack/core/ui/page/distribution/resources.py b/src/llama_stack/core/ui/page/distribution/resources.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/resources.py
rename to src/llama_stack/core/ui/page/distribution/resources.py
diff --git a/llama_stack/core/ui/page/distribution/scoring_functions.py b/src/llama_stack/core/ui/page/distribution/scoring_functions.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/scoring_functions.py
rename to src/llama_stack/core/ui/page/distribution/scoring_functions.py
diff --git a/llama_stack/core/ui/page/distribution/shields.py b/src/llama_stack/core/ui/page/distribution/shields.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/shields.py
rename to src/llama_stack/core/ui/page/distribution/shields.py
diff --git a/llama_stack/core/ui/page/evaluations/__init__.py b/src/llama_stack/core/ui/page/evaluations/__init__.py
similarity index 100%
rename from llama_stack/core/ui/page/evaluations/__init__.py
rename to src/llama_stack/core/ui/page/evaluations/__init__.py
diff --git a/llama_stack/core/ui/page/evaluations/app_eval.py b/src/llama_stack/core/ui/page/evaluations/app_eval.py
similarity index 100%
rename from llama_stack/core/ui/page/evaluations/app_eval.py
rename to src/llama_stack/core/ui/page/evaluations/app_eval.py
diff --git a/llama_stack/core/ui/page/evaluations/native_eval.py b/src/llama_stack/core/ui/page/evaluations/native_eval.py
similarity index 100%
rename from llama_stack/core/ui/page/evaluations/native_eval.py
rename to src/llama_stack/core/ui/page/evaluations/native_eval.py
diff --git a/llama_stack/core/ui/page/playground/__init__.py b/src/llama_stack/core/ui/page/playground/__init__.py
similarity index 100%
rename from llama_stack/core/ui/page/playground/__init__.py
rename to src/llama_stack/core/ui/page/playground/__init__.py
diff --git a/llama_stack/core/ui/page/playground/chat.py b/src/llama_stack/core/ui/page/playground/chat.py
similarity index 100%
rename from llama_stack/core/ui/page/playground/chat.py
rename to src/llama_stack/core/ui/page/playground/chat.py
diff --git a/llama_stack/core/ui/page/playground/tools.py b/src/llama_stack/core/ui/page/playground/tools.py
similarity index 100%
rename from llama_stack/core/ui/page/playground/tools.py
rename to src/llama_stack/core/ui/page/playground/tools.py
diff --git a/llama_stack/core/ui/requirements.txt b/src/llama_stack/core/ui/requirements.txt
similarity index 100%
rename from llama_stack/core/ui/requirements.txt
rename to src/llama_stack/core/ui/requirements.txt
diff --git a/llama_stack/core/utils/__init__.py b/src/llama_stack/core/utils/__init__.py
similarity index 100%
rename from llama_stack/core/utils/__init__.py
rename to src/llama_stack/core/utils/__init__.py
diff --git a/llama_stack/core/utils/config.py b/src/llama_stack/core/utils/config.py
similarity index 100%
rename from llama_stack/core/utils/config.py
rename to src/llama_stack/core/utils/config.py
diff --git a/llama_stack/core/utils/config_dirs.py b/src/llama_stack/core/utils/config_dirs.py
similarity index 100%
rename from llama_stack/core/utils/config_dirs.py
rename to src/llama_stack/core/utils/config_dirs.py
diff --git a/llama_stack/core/utils/config_resolution.py b/src/llama_stack/core/utils/config_resolution.py
similarity index 100%
rename from llama_stack/core/utils/config_resolution.py
rename to src/llama_stack/core/utils/config_resolution.py
diff --git a/llama_stack/core/utils/context.py b/src/llama_stack/core/utils/context.py
similarity index 100%
rename from llama_stack/core/utils/context.py
rename to src/llama_stack/core/utils/context.py
diff --git a/llama_stack/core/utils/dynamic.py b/src/llama_stack/core/utils/dynamic.py
similarity index 100%
rename from llama_stack/core/utils/dynamic.py
rename to src/llama_stack/core/utils/dynamic.py
diff --git a/llama_stack/core/utils/exec.py b/src/llama_stack/core/utils/exec.py
similarity index 100%
rename from llama_stack/core/utils/exec.py
rename to src/llama_stack/core/utils/exec.py
diff --git a/llama_stack/core/utils/image_types.py b/src/llama_stack/core/utils/image_types.py
similarity index 100%
rename from llama_stack/core/utils/image_types.py
rename to src/llama_stack/core/utils/image_types.py
diff --git a/llama_stack/core/utils/model_utils.py b/src/llama_stack/core/utils/model_utils.py
similarity index 100%
rename from llama_stack/core/utils/model_utils.py
rename to src/llama_stack/core/utils/model_utils.py
diff --git a/llama_stack/core/utils/prompt_for_config.py b/src/llama_stack/core/utils/prompt_for_config.py
similarity index 100%
rename from llama_stack/core/utils/prompt_for_config.py
rename to src/llama_stack/core/utils/prompt_for_config.py
diff --git a/llama_stack/core/utils/serialize.py b/src/llama_stack/core/utils/serialize.py
similarity index 100%
rename from llama_stack/core/utils/serialize.py
rename to src/llama_stack/core/utils/serialize.py
diff --git a/llama_stack/distributions/__init__.py b/src/llama_stack/distributions/__init__.py
similarity index 100%
rename from llama_stack/distributions/__init__.py
rename to src/llama_stack/distributions/__init__.py
diff --git a/llama_stack/distributions/ci-tests/__init__.py b/src/llama_stack/distributions/ci-tests/__init__.py
similarity index 100%
rename from llama_stack/distributions/ci-tests/__init__.py
rename to src/llama_stack/distributions/ci-tests/__init__.py
diff --git a/src/llama_stack/distributions/ci-tests/build.yaml b/src/llama_stack/distributions/ci-tests/build.yaml
new file mode 100644
index 000000000..c01e415a9
--- /dev/null
+++ b/src/llama_stack/distributions/ci-tests/build.yaml
@@ -0,0 +1,59 @@
+version: 2
+distribution_spec:
+  description: CI tests for Llama Stack
+  providers:
+    inference:
+    - provider_type: remote::cerebras
+    - provider_type: remote::ollama
+    - provider_type: remote::vllm
+    - provider_type: remote::tgi
+    - provider_type: remote::fireworks
+    - provider_type: remote::together
+    - provider_type: remote::bedrock
+    - provider_type: remote::nvidia
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::vertexai
+    - provider_type: remote::groq
+    - provider_type: remote::sambanova
+    - provider_type: remote::azure
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: inline::sqlite-vec
+    - provider_type: inline::milvus
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
+    files:
+    - provider_type: inline::localfs
+    safety:
+    - provider_type: inline::llama-guard
+    - provider_type: inline::code-scanner
+    agents:
+    - provider_type: inline::meta-reference
+    post_training:
+    - provider_type: inline::torchtune-cpu
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    batches:
+    - provider_type: inline::reference
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- asyncpg
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py
similarity index 100%
rename from llama_stack/distributions/ci-tests/ci_tests.py
rename to src/llama_stack/distributions/ci-tests/ci_tests.py
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
new file mode 100644
index 000000000..ed880d4a0
--- /dev/null
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -0,0 +1,278 @@
+version: 2
+image_name: ci-tests
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  post_training:
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/llama_stack/distributions/dell/__init__.py b/src/llama_stack/distributions/dell/__init__.py
similarity index 100%
rename from llama_stack/distributions/dell/__init__.py
rename to src/llama_stack/distributions/dell/__init__.py
diff --git a/src/llama_stack/distributions/dell/build.yaml b/src/llama_stack/distributions/dell/build.yaml
new file mode 100644
index 000000000..7bc26ca9e
--- /dev/null
+++ b/src/llama_stack/distributions/dell/build.yaml
@@ -0,0 +1,33 @@
+version: 2
+distribution_spec:
+  description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
+    container
+  providers:
+    inference:
+    - provider_type: remote::tgi
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
similarity index 100%
rename from llama_stack/distributions/dell/dell.py
rename to src/llama_stack/distributions/dell/dell.py
diff --git a/llama_stack/distributions/dell/doc_template.md b/src/llama_stack/distributions/dell/doc_template.md
similarity index 100%
rename from llama_stack/distributions/dell/doc_template.md
rename to src/llama_stack/distributions/dell/doc_template.md
diff --git a/src/llama_stack/distributions/dell/run-with-safety.yaml b/src/llama_stack/distributions/dell/run-with-safety.yaml
new file mode 100644
index 000000000..2563f2f4b
--- /dev/null
+++ b/src/llama_stack/distributions/dell/run-with-safety.yaml
@@ -0,0 +1,141 @@
+version: 2
+image_name: dell
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: tgi0
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_URL}
+  - provider_id: tgi1
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_SAFETY_URL}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: tgi0
+    model_type: llm
+  - metadata: {}
+    model_id: ${env.SAFETY_MODEL}
+    provider_id: tgi1
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields:
+  - shield_id: ${env.SAFETY_MODEL}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: brave-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/distributions/dell/run.yaml b/src/llama_stack/distributions/dell/run.yaml
new file mode 100644
index 000000000..7bada394f
--- /dev/null
+++ b/src/llama_stack/distributions/dell/run.yaml
@@ -0,0 +1,132 @@
+version: 2
+image_name: dell
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: tgi0
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_URL}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: tgi0
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: brave-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/meta-reference-gpu/__init__.py b/src/llama_stack/distributions/meta-reference-gpu/__init__.py
similarity index 100%
rename from llama_stack/distributions/meta-reference-gpu/__init__.py
rename to src/llama_stack/distributions/meta-reference-gpu/__init__.py
diff --git a/src/llama_stack/distributions/meta-reference-gpu/build.yaml b/src/llama_stack/distributions/meta-reference-gpu/build.yaml
new file mode 100644
index 000000000..1513742a7
--- /dev/null
+++ b/src/llama_stack/distributions/meta-reference-gpu/build.yaml
@@ -0,0 +1,32 @@
+version: 2
+distribution_spec:
+  description: Use Meta Reference for running LLM inference
+  providers:
+    inference:
+    - provider_type: inline::meta-reference
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md
similarity index 100%
rename from llama_stack/distributions/meta-reference-gpu/doc_template.md
rename to src/llama_stack/distributions/meta-reference-gpu/doc_template.md
diff --git a/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
similarity index 100%
rename from llama_stack/distributions/meta-reference-gpu/meta_reference.py
rename to src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
diff --git a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
new file mode 100644
index 000000000..01b5db4f9
--- /dev/null
+++ b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@@ -0,0 +1,154 @@
+version: 2
+image_name: meta-reference-gpu
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: meta-reference-inference
+    provider_type: inline::meta-reference
+    config:
+      model: ${env.INFERENCE_MODEL}
+      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
+      quantization:
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  - provider_id: meta-reference-safety
+    provider_type: inline::meta-reference
+    config:
+      model: ${env.SAFETY_MODEL}
+      checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null}
+      quantization:
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: meta-reference-inference
+    model_type: llm
+  - metadata: {}
+    model_id: ${env.SAFETY_MODEL}
+    provider_id: meta-reference-safety
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields:
+  - shield_id: ${env.SAFETY_MODEL}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/distributions/meta-reference-gpu/run.yaml b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
new file mode 100644
index 000000000..87c33dde0
--- /dev/null
+++ b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
@@ -0,0 +1,139 @@
+version: 2
+image_name: meta-reference-gpu
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: meta-reference-inference
+    provider_type: inline::meta-reference
+    config:
+      model: ${env.INFERENCE_MODEL}
+      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
+      quantization:
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: meta-reference-inference
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/nvidia/__init__.py b/src/llama_stack/distributions/nvidia/__init__.py
similarity index 100%
rename from llama_stack/distributions/nvidia/__init__.py
rename to src/llama_stack/distributions/nvidia/__init__.py
diff --git a/src/llama_stack/distributions/nvidia/build.yaml b/src/llama_stack/distributions/nvidia/build.yaml
new file mode 100644
index 000000000..8ddd12439
--- /dev/null
+++ b/src/llama_stack/distributions/nvidia/build.yaml
@@ -0,0 +1,29 @@
+version: 2
+distribution_spec:
+  description: Use NVIDIA NIM for running LLM inference, evaluation and safety
+  providers:
+    inference:
+    - provider_type: remote::nvidia
+    vector_io:
+    - provider_type: inline::faiss
+    safety:
+    - provider_type: remote::nvidia
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: remote::nvidia
+    post_training:
+    - provider_type: remote::nvidia
+    datasetio:
+    - provider_type: inline::localfs
+    - provider_type: remote::nvidia
+    scoring:
+    - provider_type: inline::basic
+    tool_runtime:
+    - provider_type: inline::rag-runtime
+    files:
+    - provider_type: inline::localfs
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/nvidia/doc_template.md b/src/llama_stack/distributions/nvidia/doc_template.md
similarity index 100%
rename from llama_stack/distributions/nvidia/doc_template.md
rename to src/llama_stack/distributions/nvidia/doc_template.md
diff --git a/llama_stack/distributions/nvidia/nvidia.py b/src/llama_stack/distributions/nvidia/nvidia.py
similarity index 100%
rename from llama_stack/distributions/nvidia/nvidia.py
rename to src/llama_stack/distributions/nvidia/nvidia.py
diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
new file mode 100644
index 000000000..c23d0f9cb
--- /dev/null
+++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -0,0 +1,137 @@
+version: 2
+image_name: nvidia
+apis:
+- agents
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
+  post_training:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:=}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
+  datasetio:
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:=}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  tool_runtime:
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: nvidia
+    model_type: llm
+  - metadata: {}
+    model_id: ${env.SAFETY_MODEL}
+    provider_id: nvidia
+    model_type: llm
+  shields:
+  - shield_id: ${env.SAFETY_MODEL}
+    provider_id: nvidia
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/run.yaml
new file mode 100644
index 000000000..81e744d53
--- /dev/null
+++ b/src/llama_stack/distributions/nvidia/run.yaml
@@ -0,0 +1,116 @@
+version: 2
+image_name: nvidia
+apis:
+- agents
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
+  post_training:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:=}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
+  datasetio:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:=}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  tool_runtime:
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/open-benchmark/__init__.py b/src/llama_stack/distributions/open-benchmark/__init__.py
similarity index 100%
rename from llama_stack/distributions/open-benchmark/__init__.py
rename to src/llama_stack/distributions/open-benchmark/__init__.py
diff --git a/src/llama_stack/distributions/open-benchmark/build.yaml b/src/llama_stack/distributions/open-benchmark/build.yaml
new file mode 100644
index 000000000..05acd98e3
--- /dev/null
+++ b/src/llama_stack/distributions/open-benchmark/build.yaml
@@ -0,0 +1,36 @@
+version: 2
+distribution_spec:
+  description: Distribution for running open benchmarks
+  providers:
+    inference:
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::groq
+    - provider_type: remote::together
+    vector_io:
+    - provider_type: inline::sqlite-vec
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
similarity index 100%
rename from llama_stack/distributions/open-benchmark/open_benchmark.py
rename to src/llama_stack/distributions/open-benchmark/open_benchmark.py
diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/run.yaml
new file mode 100644
index 000000000..4fd0e199b
--- /dev/null
+++ b/src/llama_stack/distributions/open-benchmark/run.yaml
@@ -0,0 +1,252 @@
+version: 2
+image_name: open-benchmark
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  vector_io:
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: gpt-4o
+    provider_id: openai
+    provider_model_id: gpt-4o
+    model_type: llm
+  - metadata: {}
+    model_id: claude-3-5-sonnet-latest
+    provider_id: anthropic
+    provider_model_id: claude-3-5-sonnet-latest
+    model_type: llm
+  - metadata: {}
+    model_id: gemini/gemini-1.5-flash
+    provider_id: gemini
+    provider_model_id: gemini/gemini-1.5-flash
+    model_type: llm
+  - metadata: {}
+    model_id: meta-llama/Llama-3.3-70B-Instruct
+    provider_id: groq
+    provider_model_id: groq/llama-3.3-70b-versatile
+    model_type: llm
+  - metadata: {}
+    model_id: meta-llama/Llama-3.1-405B-Instruct
+    provider_id: together
+    provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+    model_type: llm
+  shields:
+  - shield_id: meta-llama/Llama-Guard-3-8B
+  vector_dbs: []
+  datasets:
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/simpleqa?split=train
+    metadata: {}
+    dataset_id: simpleqa
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
+    metadata: {}
+    dataset_id: mmlu_cot
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
+    metadata: {}
+    dataset_id: gpqa_cot
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/math_500?split=test
+    metadata: {}
+    dataset_id: math_500
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/IfEval?split=train
+    metadata: {}
+    dataset_id: ifeval
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/docvqa?split=val
+    metadata: {}
+    dataset_id: docvqa
+  scoring_fns: []
+  benchmarks:
+  - dataset_id: simpleqa
+    scoring_functions:
+    - llm-as-judge::405b-simpleqa
+    metadata: {}
+    benchmark_id: meta-reference-simpleqa
+  - dataset_id: mmlu_cot
+    scoring_functions:
+    - basic::regex_parser_multiple_choice_answer
+    metadata: {}
+    benchmark_id: meta-reference-mmlu-cot
+  - dataset_id: gpqa_cot
+    scoring_functions:
+    - basic::regex_parser_multiple_choice_answer
+    metadata: {}
+    benchmark_id: meta-reference-gpqa-cot
+  - dataset_id: math_500
+    scoring_functions:
+    - basic::regex_parser_math_response
+    metadata: {}
+    benchmark_id: meta-reference-math-500
+  - dataset_id: ifeval
+    scoring_functions:
+    - basic::ifeval
+    metadata: {}
+    benchmark_id: meta-reference-ifeval
+  - dataset_id: docvqa
+    scoring_functions:
+    - basic::docvqa
+    metadata: {}
+    benchmark_id: meta-reference-docvqa
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/postgres-demo/__init__.py b/src/llama_stack/distributions/postgres-demo/__init__.py
similarity index 100%
rename from llama_stack/distributions/postgres-demo/__init__.py
rename to src/llama_stack/distributions/postgres-demo/__init__.py
diff --git a/src/llama_stack/distributions/postgres-demo/build.yaml b/src/llama_stack/distributions/postgres-demo/build.yaml
new file mode 100644
index 000000000..063dc3999
--- /dev/null
+++ b/src/llama_stack/distributions/postgres-demo/build.yaml
@@ -0,0 +1,23 @@
+version: 2
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers
+  providers:
+    inference:
+    - provider_type: remote::vllm
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: remote::chromadb
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+image_type: venv
+additional_pip_packages:
+- asyncpg
+- psycopg2-binary
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/postgres-demo/postgres_demo.py b/src/llama_stack/distributions/postgres-demo/postgres_demo.py
similarity index 100%
rename from llama_stack/distributions/postgres-demo/postgres_demo.py
rename to src/llama_stack/distributions/postgres-demo/postgres_demo.py
diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/run.yaml
new file mode 100644
index 000000000..0d7ecff48
--- /dev/null
+++ b/src/llama_stack/distributions/postgres-demo/run.yaml
@@ -0,0 +1,115 @@
+version: 2
+image_name: postgres-demo
+apis:
+- agents
+- inference
+- safety
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: vllm-inference
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: vllm-inference
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields:
+  - shield_id: meta-llama/Llama-Guard-3-8B
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/starter-gpu/__init__.py b/src/llama_stack/distributions/starter-gpu/__init__.py
similarity index 100%
rename from llama_stack/distributions/starter-gpu/__init__.py
rename to src/llama_stack/distributions/starter-gpu/__init__.py
diff --git a/src/llama_stack/distributions/starter-gpu/build.yaml b/src/llama_stack/distributions/starter-gpu/build.yaml
new file mode 100644
index 000000000..b2e2a0c85
--- /dev/null
+++ b/src/llama_stack/distributions/starter-gpu/build.yaml
@@ -0,0 +1,60 @@
+version: 2
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers.
+    This distribution is intended for GPU-enabled environments.
+  providers:
+    inference:
+    - provider_type: remote::cerebras
+    - provider_type: remote::ollama
+    - provider_type: remote::vllm
+    - provider_type: remote::tgi
+    - provider_type: remote::fireworks
+    - provider_type: remote::together
+    - provider_type: remote::bedrock
+    - provider_type: remote::nvidia
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::vertexai
+    - provider_type: remote::groq
+    - provider_type: remote::sambanova
+    - provider_type: remote::azure
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: inline::sqlite-vec
+    - provider_type: inline::milvus
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
+    files:
+    - provider_type: inline::localfs
+    safety:
+    - provider_type: inline::llama-guard
+    - provider_type: inline::code-scanner
+    agents:
+    - provider_type: inline::meta-reference
+    post_training:
+    - provider_type: inline::huggingface-gpu
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    batches:
+    - provider_type: inline::reference
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- asyncpg
+- sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
new file mode 100644
index 000000000..33e8c9b59
--- /dev/null
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -0,0 +1,281 @@
+version: 2
+image_name: starter-gpu
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  post_training:
+  - provider_id: huggingface-gpu
+    provider_type: inline::huggingface-gpu
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
+      dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/llama_stack/distributions/starter-gpu/starter_gpu.py b/src/llama_stack/distributions/starter-gpu/starter_gpu.py
similarity index 100%
rename from llama_stack/distributions/starter-gpu/starter_gpu.py
rename to src/llama_stack/distributions/starter-gpu/starter_gpu.py
diff --git a/llama_stack/distributions/starter/__init__.py b/src/llama_stack/distributions/starter/__init__.py
similarity index 100%
rename from llama_stack/distributions/starter/__init__.py
rename to src/llama_stack/distributions/starter/__init__.py
diff --git a/src/llama_stack/distributions/starter/build.yaml b/src/llama_stack/distributions/starter/build.yaml
new file mode 100644
index 000000000..baa80ef3e
--- /dev/null
+++ b/src/llama_stack/distributions/starter/build.yaml
@@ -0,0 +1,60 @@
+version: 2
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers.
+    This distribution is intended for CPU-only environments.
+  providers:
+    inference:
+    - provider_type: remote::cerebras
+    - provider_type: remote::ollama
+    - provider_type: remote::vllm
+    - provider_type: remote::tgi
+    - provider_type: remote::fireworks
+    - provider_type: remote::together
+    - provider_type: remote::bedrock
+    - provider_type: remote::nvidia
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::vertexai
+    - provider_type: remote::groq
+    - provider_type: remote::sambanova
+    - provider_type: remote::azure
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: inline::sqlite-vec
+    - provider_type: inline::milvus
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
+    files:
+    - provider_type: inline::localfs
+    safety:
+    - provider_type: inline::llama-guard
+    - provider_type: inline::code-scanner
+    agents:
+    - provider_type: inline::meta-reference
+    post_training:
+    - provider_type: inline::torchtune-cpu
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    batches:
+    - provider_type: inline::reference
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- asyncpg
+- sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
new file mode 100644
index 000000000..4ca0914af
--- /dev/null
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -0,0 +1,278 @@
+version: 2
+image_name: starter
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  post_training:
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
similarity index 100%
rename from llama_stack/distributions/starter/starter.py
rename to src/llama_stack/distributions/starter/starter.py
diff --git a/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
similarity index 100%
rename from llama_stack/distributions/template.py
rename to src/llama_stack/distributions/template.py
diff --git a/llama_stack/distributions/watsonx/__init__.py b/src/llama_stack/distributions/watsonx/__init__.py
similarity index 100%
rename from llama_stack/distributions/watsonx/__init__.py
rename to src/llama_stack/distributions/watsonx/__init__.py
diff --git a/src/llama_stack/distributions/watsonx/build.yaml b/src/llama_stack/distributions/watsonx/build.yaml
new file mode 100644
index 000000000..dba1a94e2
--- /dev/null
+++ b/src/llama_stack/distributions/watsonx/build.yaml
@@ -0,0 +1,33 @@
+version: 2
+distribution_spec:
+  description: Use watsonx for running LLM inference
+  providers:
+    inference:
+    - provider_type: remote::watsonx
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    files:
+    - provider_type: inline::localfs
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/run.yaml
new file mode 100644
index 000000000..ca3c8402d
--- /dev/null
+++ b/src/llama_stack/distributions/watsonx/run.yaml
@@ -0,0 +1,133 @@
+version: 2
+image_name: watsonx
+apis:
+- agents
+- datasetio
+- eval
+- files
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: watsonx
+    provider_type: remote::watsonx
+    config:
+      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:=}
+      project_id: ${env.WATSONX_PROJECT_ID:=}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/watsonx/watsonx.py b/src/llama_stack/distributions/watsonx/watsonx.py
similarity index 100%
rename from llama_stack/distributions/watsonx/watsonx.py
rename to src/llama_stack/distributions/watsonx/watsonx.py
diff --git a/llama_stack/env.py b/src/llama_stack/env.py
similarity index 100%
rename from llama_stack/env.py
rename to src/llama_stack/env.py
diff --git a/llama_stack/log.py b/src/llama_stack/log.py
similarity index 100%
rename from llama_stack/log.py
rename to src/llama_stack/log.py
diff --git a/llama_stack/models/__init__.py b/src/llama_stack/models/__init__.py
similarity index 100%
rename from llama_stack/models/__init__.py
rename to src/llama_stack/models/__init__.py
diff --git a/llama_stack/models/llama/__init__.py b/src/llama_stack/models/llama/__init__.py
similarity index 100%
rename from llama_stack/models/llama/__init__.py
rename to src/llama_stack/models/llama/__init__.py
diff --git a/llama_stack/models/llama/checkpoint.py b/src/llama_stack/models/llama/checkpoint.py
similarity index 100%
rename from llama_stack/models/llama/checkpoint.py
rename to src/llama_stack/models/llama/checkpoint.py
diff --git a/llama_stack/models/llama/datatypes.py b/src/llama_stack/models/llama/datatypes.py
similarity index 100%
rename from llama_stack/models/llama/datatypes.py
rename to src/llama_stack/models/llama/datatypes.py
diff --git a/llama_stack/models/llama/hadamard_utils.py b/src/llama_stack/models/llama/hadamard_utils.py
similarity index 100%
rename from llama_stack/models/llama/hadamard_utils.py
rename to src/llama_stack/models/llama/hadamard_utils.py
diff --git a/llama_stack/models/llama/llama3/__init__.py b/src/llama_stack/models/llama/llama3/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3/__init__.py
rename to src/llama_stack/models/llama/llama3/__init__.py
diff --git a/llama_stack/models/llama/llama3/args.py b/src/llama_stack/models/llama/llama3/args.py
similarity index 100%
rename from llama_stack/models/llama/llama3/args.py
rename to src/llama_stack/models/llama/llama3/args.py
diff --git a/llama_stack/models/llama/llama3/chat_format.py b/src/llama_stack/models/llama/llama3/chat_format.py
similarity index 100%
rename from llama_stack/models/llama/llama3/chat_format.py
rename to src/llama_stack/models/llama/llama3/chat_format.py
diff --git a/llama_stack/models/llama/llama3/dog.jpg b/src/llama_stack/models/llama/llama3/dog.jpg
similarity index 100%
rename from llama_stack/models/llama/llama3/dog.jpg
rename to src/llama_stack/models/llama/llama3/dog.jpg
diff --git a/llama_stack/models/llama/llama3/generation.py b/src/llama_stack/models/llama/llama3/generation.py
similarity index 100%
rename from llama_stack/models/llama/llama3/generation.py
rename to src/llama_stack/models/llama/llama3/generation.py
diff --git a/llama_stack/models/llama/llama3/interface.py b/src/llama_stack/models/llama/llama3/interface.py
similarity index 100%
rename from llama_stack/models/llama/llama3/interface.py
rename to src/llama_stack/models/llama/llama3/interface.py
diff --git a/llama_stack/models/llama/llama3/model.py b/src/llama_stack/models/llama/llama3/model.py
similarity index 100%
rename from llama_stack/models/llama/llama3/model.py
rename to src/llama_stack/models/llama/llama3/model.py
diff --git a/llama_stack/models/llama/llama3/multimodal/__init__.py b/src/llama_stack/models/llama/llama3/multimodal/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/__init__.py
rename to src/llama_stack/models/llama/llama3/multimodal/__init__.py
diff --git a/llama_stack/models/llama/llama3/multimodal/encoder_utils.py b/src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/encoder_utils.py
rename to src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py
diff --git a/llama_stack/models/llama/llama3/multimodal/image_transform.py b/src/llama_stack/models/llama/llama3/multimodal/image_transform.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/image_transform.py
rename to src/llama_stack/models/llama/llama3/multimodal/image_transform.py
diff --git a/llama_stack/models/llama/llama3/multimodal/model.py b/src/llama_stack/models/llama/llama3/multimodal/model.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/model.py
rename to src/llama_stack/models/llama/llama3/multimodal/model.py
diff --git a/llama_stack/models/llama/llama3/multimodal/utils.py b/src/llama_stack/models/llama/llama3/multimodal/utils.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/utils.py
rename to src/llama_stack/models/llama/llama3/multimodal/utils.py
diff --git a/llama_stack/models/llama/llama3/pasta.jpeg b/src/llama_stack/models/llama/llama3/pasta.jpeg
similarity index 100%
rename from llama_stack/models/llama/llama3/pasta.jpeg
rename to src/llama_stack/models/llama/llama3/pasta.jpeg
diff --git a/llama_stack/models/llama/llama3/prompt_templates/__init__.py b/src/llama_stack/models/llama/llama3/prompt_templates/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3/prompt_templates/__init__.py
rename to src/llama_stack/models/llama/llama3/prompt_templates/__init__.py
diff --git a/llama_stack/models/llama/llama3/prompt_templates/base.py b/src/llama_stack/models/llama/llama3/prompt_templates/base.py
similarity index 100%
rename from llama_stack/models/llama/llama3/prompt_templates/base.py
rename to src/llama_stack/models/llama/llama3/prompt_templates/base.py
diff --git a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
rename to src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
diff --git a/llama_stack/models/llama/llama3/prompt_templates/tool_response.py b/src/llama_stack/models/llama/llama3/prompt_templates/tool_response.py
similarity index 100%
rename from llama_stack/models/llama/llama3/prompt_templates/tool_response.py
rename to src/llama_stack/models/llama/llama3/prompt_templates/tool_response.py
diff --git a/llama_stack/models/llama/llama3/quantization/__init__.py b/src/llama_stack/models/llama/llama3/quantization/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3/quantization/__init__.py
rename to src/llama_stack/models/llama/llama3/quantization/__init__.py
diff --git a/llama_stack/models/llama/llama3/quantization/loader.py b/src/llama_stack/models/llama/llama3/quantization/loader.py
similarity index 100%
rename from llama_stack/models/llama/llama3/quantization/loader.py
rename to src/llama_stack/models/llama/llama3/quantization/loader.py
diff --git a/llama_stack/models/llama/llama3/template_data.py b/src/llama_stack/models/llama/llama3/template_data.py
similarity index 100%
rename from llama_stack/models/llama/llama3/template_data.py
rename to src/llama_stack/models/llama/llama3/template_data.py
diff --git a/llama_stack/models/llama/llama3/tokenizer.model b/src/llama_stack/models/llama/llama3/tokenizer.model
similarity index 100%
rename from llama_stack/models/llama/llama3/tokenizer.model
rename to src/llama_stack/models/llama/llama3/tokenizer.model
diff --git a/llama_stack/models/llama/llama3/tokenizer.py b/src/llama_stack/models/llama/llama3/tokenizer.py
similarity index 100%
rename from llama_stack/models/llama/llama3/tokenizer.py
rename to src/llama_stack/models/llama/llama3/tokenizer.py
diff --git a/llama_stack/models/llama/llama3/tool_utils.py b/src/llama_stack/models/llama/llama3/tool_utils.py
similarity index 100%
rename from llama_stack/models/llama/llama3/tool_utils.py
rename to src/llama_stack/models/llama/llama3/tool_utils.py
diff --git a/llama_stack/models/llama/llama3_1/__init__.py b/src/llama_stack/models/llama/llama3_1/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3_1/__init__.py
rename to src/llama_stack/models/llama/llama3_1/__init__.py
diff --git a/llama_stack/models/llama/llama3_1/prompt_format.md b/src/llama_stack/models/llama/llama3_1/prompt_format.md
similarity index 100%
rename from llama_stack/models/llama/llama3_1/prompt_format.md
rename to src/llama_stack/models/llama/llama3_1/prompt_format.md
diff --git a/llama_stack/models/llama/llama3_1/prompts.py b/src/llama_stack/models/llama/llama3_1/prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama3_1/prompts.py
rename to src/llama_stack/models/llama/llama3_1/prompts.py
diff --git a/llama_stack/models/llama/llama3_2/__init__.py b/src/llama_stack/models/llama/llama3_2/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3_2/__init__.py
rename to src/llama_stack/models/llama/llama3_2/__init__.py
diff --git a/llama_stack/models/llama/llama3_2/prompts_text.py b/src/llama_stack/models/llama/llama3_2/prompts_text.py
similarity index 100%
rename from llama_stack/models/llama/llama3_2/prompts_text.py
rename to src/llama_stack/models/llama/llama3_2/prompts_text.py
diff --git a/llama_stack/models/llama/llama3_2/prompts_vision.py b/src/llama_stack/models/llama/llama3_2/prompts_vision.py
similarity index 100%
rename from llama_stack/models/llama/llama3_2/prompts_vision.py
rename to src/llama_stack/models/llama/llama3_2/prompts_vision.py
diff --git a/llama_stack/models/llama/llama3_2/text_prompt_format.md b/src/llama_stack/models/llama/llama3_2/text_prompt_format.md
similarity index 100%
rename from llama_stack/models/llama/llama3_2/text_prompt_format.md
rename to src/llama_stack/models/llama/llama3_2/text_prompt_format.md
diff --git a/llama_stack/models/llama/llama3_2/vision_prompt_format.md b/src/llama_stack/models/llama/llama3_2/vision_prompt_format.md
similarity index 100%
rename from llama_stack/models/llama/llama3_2/vision_prompt_format.md
rename to src/llama_stack/models/llama/llama3_2/vision_prompt_format.md
diff --git a/llama_stack/models/llama/llama3_3/__init__.py b/src/llama_stack/models/llama/llama3_3/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3_3/__init__.py
rename to src/llama_stack/models/llama/llama3_3/__init__.py
diff --git a/llama_stack/models/llama/llama3_3/prompts.py b/src/llama_stack/models/llama/llama3_3/prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama3_3/prompts.py
rename to src/llama_stack/models/llama/llama3_3/prompts.py
diff --git a/llama_stack/models/llama/llama4/__init__.py b/src/llama_stack/models/llama/llama4/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama4/__init__.py
rename to src/llama_stack/models/llama/llama4/__init__.py
diff --git a/llama_stack/models/llama/llama4/args.py b/src/llama_stack/models/llama/llama4/args.py
similarity index 100%
rename from llama_stack/models/llama/llama4/args.py
rename to src/llama_stack/models/llama/llama4/args.py
diff --git a/llama_stack/models/llama/llama4/chat_format.py b/src/llama_stack/models/llama/llama4/chat_format.py
similarity index 100%
rename from llama_stack/models/llama/llama4/chat_format.py
rename to src/llama_stack/models/llama/llama4/chat_format.py
diff --git a/llama_stack/models/llama/llama4/datatypes.py b/src/llama_stack/models/llama/llama4/datatypes.py
similarity index 100%
rename from llama_stack/models/llama/llama4/datatypes.py
rename to src/llama_stack/models/llama/llama4/datatypes.py
diff --git a/llama_stack/models/llama/llama4/ffn.py b/src/llama_stack/models/llama/llama4/ffn.py
similarity index 100%
rename from llama_stack/models/llama/llama4/ffn.py
rename to src/llama_stack/models/llama/llama4/ffn.py
diff --git a/llama_stack/models/llama/llama4/generation.py b/src/llama_stack/models/llama/llama4/generation.py
similarity index 100%
rename from llama_stack/models/llama/llama4/generation.py
rename to src/llama_stack/models/llama/llama4/generation.py
diff --git a/llama_stack/models/llama/llama4/model.py b/src/llama_stack/models/llama/llama4/model.py
similarity index 100%
rename from llama_stack/models/llama/llama4/model.py
rename to src/llama_stack/models/llama/llama4/model.py
diff --git a/llama_stack/models/llama/llama4/moe.py b/src/llama_stack/models/llama/llama4/moe.py
similarity index 100%
rename from llama_stack/models/llama/llama4/moe.py
rename to src/llama_stack/models/llama/llama4/moe.py
diff --git a/llama_stack/models/llama/llama4/preprocess.py b/src/llama_stack/models/llama/llama4/preprocess.py
similarity index 100%
rename from llama_stack/models/llama/llama4/preprocess.py
rename to src/llama_stack/models/llama/llama4/preprocess.py
diff --git a/llama_stack/models/llama/llama4/prompt_format.md b/src/llama_stack/models/llama/llama4/prompt_format.md
similarity index 100%
rename from llama_stack/models/llama/llama4/prompt_format.md
rename to src/llama_stack/models/llama/llama4/prompt_format.md
diff --git a/llama_stack/models/llama/llama4/prompt_templates/__init__.py b/src/llama_stack/models/llama/llama4/prompt_templates/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama4/prompt_templates/__init__.py
rename to src/llama_stack/models/llama/llama4/prompt_templates/__init__.py
diff --git a/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py b/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
rename to src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
diff --git a/llama_stack/models/llama/llama4/prompts.py b/src/llama_stack/models/llama/llama4/prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama4/prompts.py
rename to src/llama_stack/models/llama/llama4/prompts.py
diff --git a/llama_stack/models/llama/llama4/quantization/__init__.py b/src/llama_stack/models/llama/llama4/quantization/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama4/quantization/__init__.py
rename to src/llama_stack/models/llama/llama4/quantization/__init__.py
diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/src/llama_stack/models/llama/llama4/quantization/loader.py
similarity index 100%
rename from llama_stack/models/llama/llama4/quantization/loader.py
rename to src/llama_stack/models/llama/llama4/quantization/loader.py
diff --git a/llama_stack/models/llama/llama4/tokenizer.model b/src/llama_stack/models/llama/llama4/tokenizer.model
similarity index 100%
rename from llama_stack/models/llama/llama4/tokenizer.model
rename to src/llama_stack/models/llama/llama4/tokenizer.model
diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/src/llama_stack/models/llama/llama4/tokenizer.py
similarity index 100%
rename from llama_stack/models/llama/llama4/tokenizer.py
rename to src/llama_stack/models/llama/llama4/tokenizer.py
diff --git a/llama_stack/models/llama/llama4/vision/__init__.py b/src/llama_stack/models/llama/llama4/vision/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama4/vision/__init__.py
rename to src/llama_stack/models/llama/llama4/vision/__init__.py
diff --git a/llama_stack/models/llama/llama4/vision/embedding.py b/src/llama_stack/models/llama/llama4/vision/embedding.py
similarity index 100%
rename from llama_stack/models/llama/llama4/vision/embedding.py
rename to src/llama_stack/models/llama/llama4/vision/embedding.py
diff --git a/llama_stack/models/llama/llama4/vision/encoder.py b/src/llama_stack/models/llama/llama4/vision/encoder.py
similarity index 100%
rename from llama_stack/models/llama/llama4/vision/encoder.py
rename to src/llama_stack/models/llama/llama4/vision/encoder.py
diff --git a/llama_stack/models/llama/prompt_format.py b/src/llama_stack/models/llama/prompt_format.py
similarity index 100%
rename from llama_stack/models/llama/prompt_format.py
rename to src/llama_stack/models/llama/prompt_format.py
diff --git a/llama_stack/models/llama/quantize_impls.py b/src/llama_stack/models/llama/quantize_impls.py
similarity index 100%
rename from llama_stack/models/llama/quantize_impls.py
rename to src/llama_stack/models/llama/quantize_impls.py
diff --git a/llama_stack/models/llama/resources/dog.jpg b/src/llama_stack/models/llama/resources/dog.jpg
similarity index 100%
rename from llama_stack/models/llama/resources/dog.jpg
rename to src/llama_stack/models/llama/resources/dog.jpg
diff --git a/llama_stack/models/llama/resources/pasta.jpeg b/src/llama_stack/models/llama/resources/pasta.jpeg
similarity index 100%
rename from llama_stack/models/llama/resources/pasta.jpeg
rename to src/llama_stack/models/llama/resources/pasta.jpeg
diff --git a/llama_stack/models/llama/resources/small_dog.jpg b/src/llama_stack/models/llama/resources/small_dog.jpg
similarity index 100%
rename from llama_stack/models/llama/resources/small_dog.jpg
rename to src/llama_stack/models/llama/resources/small_dog.jpg
diff --git a/llama_stack/models/llama/sku_list.py b/src/llama_stack/models/llama/sku_list.py
similarity index 100%
rename from llama_stack/models/llama/sku_list.py
rename to src/llama_stack/models/llama/sku_list.py
diff --git a/llama_stack/models/llama/sku_types.py b/src/llama_stack/models/llama/sku_types.py
similarity index 100%
rename from llama_stack/models/llama/sku_types.py
rename to src/llama_stack/models/llama/sku_types.py
diff --git a/llama_stack/models/llama/tokenizer_utils.py b/src/llama_stack/models/llama/tokenizer_utils.py
similarity index 100%
rename from llama_stack/models/llama/tokenizer_utils.py
rename to src/llama_stack/models/llama/tokenizer_utils.py
diff --git a/llama_stack/providers/__init__.py b/src/llama_stack/providers/__init__.py
similarity index 100%
rename from llama_stack/providers/__init__.py
rename to src/llama_stack/providers/__init__.py
diff --git a/llama_stack/providers/datatypes.py b/src/llama_stack/providers/datatypes.py
similarity index 100%
rename from llama_stack/providers/datatypes.py
rename to src/llama_stack/providers/datatypes.py
diff --git a/llama_stack/providers/inline/__init__.py b/src/llama_stack/providers/inline/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/__init__.py
rename to src/llama_stack/providers/inline/__init__.py
diff --git a/llama_stack/providers/inline/agents/__init__.py b/src/llama_stack/providers/inline/agents/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/agents/__init__.py
rename to src/llama_stack/providers/inline/agents/__init__.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/__init__.py
rename to src/llama_stack/providers/inline/agents/meta_reference/__init__.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/agent_instance.py
rename to src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/agents.py
rename to src/llama_stack/providers/inline/agents/meta_reference/agents.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/src/llama_stack/providers/inline/agents/meta_reference/config.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/config.py
rename to src/llama_stack/providers/inline/agents/meta_reference/config.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/src/llama_stack/providers/inline/agents/meta_reference/persistence.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/persistence.py
rename to src/llama_stack/providers/inline/agents/meta_reference/persistence.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/__init__.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/types.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/utils.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/safety.py
rename to src/llama_stack/providers/inline/agents/meta_reference/safety.py
diff --git a/llama_stack/providers/inline/batches/__init__.py b/src/llama_stack/providers/inline/batches/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/batches/__init__.py
rename to src/llama_stack/providers/inline/batches/__init__.py
diff --git a/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/batches/reference/__init__.py
rename to src/llama_stack/providers/inline/batches/reference/__init__.py
diff --git a/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
similarity index 100%
rename from llama_stack/providers/inline/batches/reference/batches.py
rename to src/llama_stack/providers/inline/batches/reference/batches.py
diff --git a/llama_stack/providers/inline/batches/reference/config.py b/src/llama_stack/providers/inline/batches/reference/config.py
similarity index 100%
rename from llama_stack/providers/inline/batches/reference/config.py
rename to src/llama_stack/providers/inline/batches/reference/config.py
diff --git a/llama_stack/providers/inline/datasetio/__init__.py b/src/llama_stack/providers/inline/datasetio/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/datasetio/__init__.py
rename to src/llama_stack/providers/inline/datasetio/__init__.py
diff --git a/llama_stack/providers/inline/datasetio/localfs/__init__.py b/src/llama_stack/providers/inline/datasetio/localfs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/datasetio/localfs/__init__.py
rename to src/llama_stack/providers/inline/datasetio/localfs/__init__.py
diff --git a/llama_stack/providers/inline/datasetio/localfs/config.py b/src/llama_stack/providers/inline/datasetio/localfs/config.py
similarity index 100%
rename from llama_stack/providers/inline/datasetio/localfs/config.py
rename to src/llama_stack/providers/inline/datasetio/localfs/config.py
diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
similarity index 100%
rename from llama_stack/providers/inline/datasetio/localfs/datasetio.py
rename to src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
diff --git a/llama_stack/providers/inline/eval/__init__.py b/src/llama_stack/providers/inline/eval/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/eval/__init__.py
rename to src/llama_stack/providers/inline/eval/__init__.py
diff --git a/llama_stack/providers/inline/eval/meta_reference/__init__.py b/src/llama_stack/providers/inline/eval/meta_reference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/eval/meta_reference/__init__.py
rename to src/llama_stack/providers/inline/eval/meta_reference/__init__.py
diff --git a/llama_stack/providers/inline/eval/meta_reference/config.py b/src/llama_stack/providers/inline/eval/meta_reference/config.py
similarity index 100%
rename from llama_stack/providers/inline/eval/meta_reference/config.py
rename to src/llama_stack/providers/inline/eval/meta_reference/config.py
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
similarity index 100%
rename from llama_stack/providers/inline/eval/meta_reference/eval.py
rename to src/llama_stack/providers/inline/eval/meta_reference/eval.py
diff --git a/llama_stack/providers/inline/files/localfs/__init__.py b/src/llama_stack/providers/inline/files/localfs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/files/localfs/__init__.py
rename to src/llama_stack/providers/inline/files/localfs/__init__.py
diff --git a/llama_stack/providers/inline/files/localfs/config.py b/src/llama_stack/providers/inline/files/localfs/config.py
similarity index 100%
rename from llama_stack/providers/inline/files/localfs/config.py
rename to src/llama_stack/providers/inline/files/localfs/config.py
diff --git a/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
similarity index 100%
rename from llama_stack/providers/inline/files/localfs/files.py
rename to src/llama_stack/providers/inline/files/localfs/files.py
diff --git a/llama_stack/providers/inline/inference/__init__.py b/src/llama_stack/providers/inline/inference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/inference/__init__.py
rename to src/llama_stack/providers/inline/inference/__init__.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/__init__.py b/src/llama_stack/providers/inline/inference/meta_reference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/__init__.py
rename to src/llama_stack/providers/inline/inference/meta_reference/__init__.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/common.py b/src/llama_stack/providers/inline/inference/meta_reference/common.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/common.py
rename to src/llama_stack/providers/inline/inference/meta_reference/common.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/config.py
rename to src/llama_stack/providers/inline/inference/meta_reference/config.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/generators.py
rename to src/llama_stack/providers/inline/inference/meta_reference/generators.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/inference.py
rename to src/llama_stack/providers/inline/inference/meta_reference/inference.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/model_parallel.py b/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/model_parallel.py
rename to src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
rename to src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py b/src/llama_stack/providers/inline/inference/sentence_transformers/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/inference/sentence_transformers/__init__.py
rename to src/llama_stack/providers/inline/inference/sentence_transformers/__init__.py
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/config.py b/src/llama_stack/providers/inline/inference/sentence_transformers/config.py
similarity index 100%
rename from llama_stack/providers/inline/inference/sentence_transformers/config.py
rename to src/llama_stack/providers/inline/inference/sentence_transformers/config.py
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
similarity index 100%
rename from llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
rename to src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift
diff --git a/llama_stack/providers/inline/post_training/__init__.py b/src/llama_stack/providers/inline/post_training/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/__init__.py
rename to src/llama_stack/providers/inline/post_training/__init__.py
diff --git a/llama_stack/providers/inline/post_training/common/__init__.py b/src/llama_stack/providers/inline/post_training/common/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/common/__init__.py
rename to src/llama_stack/providers/inline/post_training/common/__init__.py
diff --git a/llama_stack/providers/inline/post_training/common/utils.py b/src/llama_stack/providers/inline/post_training/common/utils.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/common/utils.py
rename to src/llama_stack/providers/inline/post_training/common/utils.py
diff --git a/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/common/validator.py
rename to src/llama_stack/providers/inline/post_training/common/validator.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/__init__.py b/src/llama_stack/providers/inline/post_training/huggingface/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/__init__.py
rename to src/llama_stack/providers/inline/post_training/huggingface/__init__.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/config.py b/src/llama_stack/providers/inline/post_training/huggingface/config.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/config.py
rename to src/llama_stack/providers/inline/post_training/huggingface/config.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/post_training.py
rename to src/llama_stack/providers/inline/post_training/huggingface/post_training.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py
rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/utils.py
rename to src/llama_stack/providers/inline/post_training/huggingface/utils.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/__init__.py
rename to src/llama_stack/providers/inline/post_training/torchtune/__init__.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/common/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/common/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/common/__init__.py
rename to src/llama_stack/providers/inline/post_training/torchtune/common/__init__.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py b/src/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
rename to src/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/common/utils.py
rename to src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/config.py b/src/llama_stack/providers/inline/post_training/torchtune/config.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/config.py
rename to src/llama_stack/providers/inline/post_training/torchtune/config.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py
rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/post_training.py
rename to src/llama_stack/providers/inline/post_training/torchtune/post_training.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py
rename to src/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
rename to src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
diff --git a/llama_stack/providers/inline/safety/__init__.py b/src/llama_stack/providers/inline/safety/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/safety/__init__.py
rename to src/llama_stack/providers/inline/safety/__init__.py
diff --git a/llama_stack/providers/inline/safety/code_scanner/__init__.py b/src/llama_stack/providers/inline/safety/code_scanner/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/safety/code_scanner/__init__.py
rename to src/llama_stack/providers/inline/safety/code_scanner/__init__.py
diff --git a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
similarity index 100%
rename from llama_stack/providers/inline/safety/code_scanner/code_scanner.py
rename to src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
diff --git a/llama_stack/providers/inline/safety/code_scanner/config.py b/src/llama_stack/providers/inline/safety/code_scanner/config.py
similarity index 100%
rename from llama_stack/providers/inline/safety/code_scanner/config.py
rename to src/llama_stack/providers/inline/safety/code_scanner/config.py
diff --git a/llama_stack/providers/inline/safety/llama_guard/__init__.py b/src/llama_stack/providers/inline/safety/llama_guard/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/safety/llama_guard/__init__.py
rename to src/llama_stack/providers/inline/safety/llama_guard/__init__.py
diff --git a/llama_stack/providers/inline/safety/llama_guard/config.py b/src/llama_stack/providers/inline/safety/llama_guard/config.py
similarity index 100%
rename from llama_stack/providers/inline/safety/llama_guard/config.py
rename to src/llama_stack/providers/inline/safety/llama_guard/config.py
diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
similarity index 100%
rename from llama_stack/providers/inline/safety/llama_guard/llama_guard.py
rename to src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
diff --git a/llama_stack/providers/inline/safety/prompt_guard/__init__.py b/src/llama_stack/providers/inline/safety/prompt_guard/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/safety/prompt_guard/__init__.py
rename to src/llama_stack/providers/inline/safety/prompt_guard/__init__.py
diff --git a/llama_stack/providers/inline/safety/prompt_guard/config.py b/src/llama_stack/providers/inline/safety/prompt_guard/config.py
similarity index 100%
rename from llama_stack/providers/inline/safety/prompt_guard/config.py
rename to src/llama_stack/providers/inline/safety/prompt_guard/config.py
diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
similarity index 100%
rename from llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
rename to src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
diff --git a/llama_stack/providers/inline/scoring/__init__.py b/src/llama_stack/providers/inline/scoring/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/__init__.py
rename to src/llama_stack/providers/inline/scoring/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/__init__.py b/src/llama_stack/providers/inline/scoring/basic/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/__init__.py
rename to src/llama_stack/providers/inline/scoring/basic/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/config.py b/src/llama_stack/providers/inline/scoring/basic/config.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/config.py
rename to src/llama_stack/providers/inline/scoring/basic/config.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/utils/__init__.py b/src/llama_stack/providers/inline/scoring/basic/utils/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/utils/__init__.py
rename to src/llama_stack/providers/inline/scoring/basic/utils/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py b/src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
rename to src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
diff --git a/llama_stack/providers/inline/scoring/basic/utils/math_utils.py b/src/llama_stack/providers/inline/scoring/basic/utils/math_utils.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/utils/math_utils.py
rename to src/llama_stack/providers/inline/scoring/basic/utils/math_utils.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/__init__.py
rename to src/llama_stack/providers/inline/scoring/braintrust/__init__.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/braintrust.py
rename to src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/src/llama_stack/providers/inline/scoring/braintrust/config.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/config.py
rename to src/llama_stack/providers/inline/scoring/braintrust/config.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/config.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/config.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/config.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/config.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
diff --git a/llama_stack/providers/inline/tool_runtime/__init__.py b/src/llama_stack/providers/inline/tool_runtime/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/__init__.py
rename to src/llama_stack/providers/inline/tool_runtime/__init__.py
diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/rag/__init__.py
rename to src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
diff --git a/llama_stack/providers/inline/tool_runtime/rag/config.py b/src/llama_stack/providers/inline/tool_runtime/rag/config.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/rag/config.py
rename to src/llama_stack/providers/inline/tool_runtime/rag/config.py
diff --git a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
rename to src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/rag/memory.py
rename to src/llama_stack/providers/inline/tool_runtime/rag/memory.py
diff --git a/llama_stack/providers/inline/vector_io/__init__.py b/src/llama_stack/providers/inline/vector_io/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/__init__.py
rename to src/llama_stack/providers/inline/vector_io/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/chroma/__init__.py
rename to src/llama_stack/providers/inline/vector_io/chroma/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/chroma/config.py
rename to src/llama_stack/providers/inline/vector_io/chroma/config.py
diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/faiss/__init__.py
rename to src/llama_stack/providers/inline/vector_io/faiss/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/faiss/config.py
rename to src/llama_stack/providers/inline/vector_io/faiss/config.py
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/faiss/faiss.py
rename to src/llama_stack/providers/inline/vector_io/faiss/faiss.py
diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/milvus/__init__.py
rename to src/llama_stack/providers/inline/vector_io/milvus/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/milvus/config.py
rename to src/llama_stack/providers/inline/vector_io/milvus/config.py
diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/qdrant/__init__.py
rename to src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/qdrant/config.py
rename to src/llama_stack/providers/inline/vector_io/qdrant/config.py
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
rename to src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/sqlite_vec/config.py
rename to src/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
rename to src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
diff --git a/llama_stack/providers/registry/__init__.py b/src/llama_stack/providers/registry/__init__.py
similarity index 100%
rename from llama_stack/providers/registry/__init__.py
rename to src/llama_stack/providers/registry/__init__.py
diff --git a/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
similarity index 100%
rename from llama_stack/providers/registry/agents.py
rename to src/llama_stack/providers/registry/agents.py
diff --git a/llama_stack/providers/registry/batches.py b/src/llama_stack/providers/registry/batches.py
similarity index 100%
rename from llama_stack/providers/registry/batches.py
rename to src/llama_stack/providers/registry/batches.py
diff --git a/llama_stack/providers/registry/datasetio.py b/src/llama_stack/providers/registry/datasetio.py
similarity index 100%
rename from llama_stack/providers/registry/datasetio.py
rename to src/llama_stack/providers/registry/datasetio.py
diff --git a/llama_stack/providers/registry/eval.py b/src/llama_stack/providers/registry/eval.py
similarity index 100%
rename from llama_stack/providers/registry/eval.py
rename to src/llama_stack/providers/registry/eval.py
diff --git a/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
similarity index 100%
rename from llama_stack/providers/registry/files.py
rename to src/llama_stack/providers/registry/files.py
diff --git a/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
similarity index 100%
rename from llama_stack/providers/registry/inference.py
rename to src/llama_stack/providers/registry/inference.py
diff --git a/llama_stack/providers/registry/post_training.py b/src/llama_stack/providers/registry/post_training.py
similarity index 100%
rename from llama_stack/providers/registry/post_training.py
rename to src/llama_stack/providers/registry/post_training.py
diff --git a/llama_stack/providers/registry/safety.py b/src/llama_stack/providers/registry/safety.py
similarity index 100%
rename from llama_stack/providers/registry/safety.py
rename to src/llama_stack/providers/registry/safety.py
diff --git a/llama_stack/providers/registry/scoring.py b/src/llama_stack/providers/registry/scoring.py
similarity index 100%
rename from llama_stack/providers/registry/scoring.py
rename to src/llama_stack/providers/registry/scoring.py
diff --git a/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
similarity index 100%
rename from llama_stack/providers/registry/tool_runtime.py
rename to src/llama_stack/providers/registry/tool_runtime.py
diff --git a/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py
similarity index 100%
rename from llama_stack/providers/registry/vector_io.py
rename to src/llama_stack/providers/registry/vector_io.py
diff --git a/llama_stack/providers/remote/__init__.py b/src/llama_stack/providers/remote/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/__init__.py
rename to src/llama_stack/providers/remote/__init__.py
diff --git a/llama_stack/providers/remote/agents/__init__.py b/src/llama_stack/providers/remote/agents/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/agents/__init__.py
rename to src/llama_stack/providers/remote/agents/__init__.py
diff --git a/llama_stack/providers/remote/datasetio/__init__.py b/src/llama_stack/providers/remote/datasetio/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/__init__.py
rename to src/llama_stack/providers/remote/datasetio/__init__.py
diff --git a/llama_stack/providers/remote/datasetio/huggingface/__init__.py b/src/llama_stack/providers/remote/datasetio/huggingface/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/huggingface/__init__.py
rename to src/llama_stack/providers/remote/datasetio/huggingface/__init__.py
diff --git a/llama_stack/providers/remote/datasetio/huggingface/config.py b/src/llama_stack/providers/remote/datasetio/huggingface/config.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/huggingface/config.py
rename to src/llama_stack/providers/remote/datasetio/huggingface/config.py
diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/huggingface/huggingface.py
rename to src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
diff --git a/llama_stack/providers/remote/datasetio/nvidia/README.md b/src/llama_stack/providers/remote/datasetio/nvidia/README.md
similarity index 100%
rename from llama_stack/providers/remote/datasetio/nvidia/README.md
rename to src/llama_stack/providers/remote/datasetio/nvidia/README.md
diff --git a/llama_stack/providers/remote/datasetio/nvidia/__init__.py b/src/llama_stack/providers/remote/datasetio/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/nvidia/__init__.py
rename to src/llama_stack/providers/remote/datasetio/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/src/llama_stack/providers/remote/datasetio/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/nvidia/config.py
rename to src/llama_stack/providers/remote/datasetio/nvidia/config.py
diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/nvidia/datasetio.py
rename to src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
diff --git a/llama_stack/providers/remote/eval/__init__.py b/src/llama_stack/providers/remote/eval/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/eval/__init__.py
rename to src/llama_stack/providers/remote/eval/__init__.py
diff --git a/llama_stack/providers/remote/eval/nvidia/README.md b/src/llama_stack/providers/remote/eval/nvidia/README.md
similarity index 100%
rename from llama_stack/providers/remote/eval/nvidia/README.md
rename to src/llama_stack/providers/remote/eval/nvidia/README.md
diff --git a/llama_stack/providers/remote/eval/nvidia/__init__.py b/src/llama_stack/providers/remote/eval/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/eval/nvidia/__init__.py
rename to src/llama_stack/providers/remote/eval/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/src/llama_stack/providers/remote/eval/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/eval/nvidia/config.py
rename to src/llama_stack/providers/remote/eval/nvidia/config.py
diff --git a/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
similarity index 100%
rename from llama_stack/providers/remote/eval/nvidia/eval.py
rename to src/llama_stack/providers/remote/eval/nvidia/eval.py
diff --git a/llama_stack/providers/remote/files/s3/README.md b/src/llama_stack/providers/remote/files/s3/README.md
similarity index 100%
rename from llama_stack/providers/remote/files/s3/README.md
rename to src/llama_stack/providers/remote/files/s3/README.md
diff --git a/llama_stack/providers/remote/files/s3/__init__.py b/src/llama_stack/providers/remote/files/s3/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/files/s3/__init__.py
rename to src/llama_stack/providers/remote/files/s3/__init__.py
diff --git a/llama_stack/providers/remote/files/s3/config.py b/src/llama_stack/providers/remote/files/s3/config.py
similarity index 100%
rename from llama_stack/providers/remote/files/s3/config.py
rename to src/llama_stack/providers/remote/files/s3/config.py
diff --git a/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
similarity index 100%
rename from llama_stack/providers/remote/files/s3/files.py
rename to src/llama_stack/providers/remote/files/s3/files.py
diff --git a/llama_stack/providers/remote/inference/__init__.py b/src/llama_stack/providers/remote/inference/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/__init__.py
rename to src/llama_stack/providers/remote/inference/__init__.py
diff --git a/llama_stack/providers/remote/inference/anthropic/__init__.py b/src/llama_stack/providers/remote/inference/anthropic/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/anthropic/__init__.py
rename to src/llama_stack/providers/remote/inference/anthropic/__init__.py
diff --git a/llama_stack/providers/remote/inference/anthropic/anthropic.py b/src/llama_stack/providers/remote/inference/anthropic/anthropic.py
similarity index 100%
rename from llama_stack/providers/remote/inference/anthropic/anthropic.py
rename to src/llama_stack/providers/remote/inference/anthropic/anthropic.py
diff --git a/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/anthropic/config.py
rename to src/llama_stack/providers/remote/inference/anthropic/config.py
diff --git a/llama_stack/providers/remote/inference/azure/__init__.py b/src/llama_stack/providers/remote/inference/azure/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/azure/__init__.py
rename to src/llama_stack/providers/remote/inference/azure/__init__.py
diff --git a/llama_stack/providers/remote/inference/azure/azure.py b/src/llama_stack/providers/remote/inference/azure/azure.py
similarity index 100%
rename from llama_stack/providers/remote/inference/azure/azure.py
rename to src/llama_stack/providers/remote/inference/azure/azure.py
diff --git a/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/azure/config.py
rename to src/llama_stack/providers/remote/inference/azure/config.py
diff --git a/llama_stack/providers/remote/inference/bedrock/__init__.py b/src/llama_stack/providers/remote/inference/bedrock/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/bedrock/__init__.py
rename to src/llama_stack/providers/remote/inference/bedrock/__init__.py
diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
similarity index 100%
rename from llama_stack/providers/remote/inference/bedrock/bedrock.py
rename to src/llama_stack/providers/remote/inference/bedrock/bedrock.py
diff --git a/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/bedrock/config.py
rename to src/llama_stack/providers/remote/inference/bedrock/config.py
diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/src/llama_stack/providers/remote/inference/bedrock/models.py
similarity index 100%
rename from llama_stack/providers/remote/inference/bedrock/models.py
rename to src/llama_stack/providers/remote/inference/bedrock/models.py
diff --git a/llama_stack/providers/remote/inference/cerebras/__init__.py b/src/llama_stack/providers/remote/inference/cerebras/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/cerebras/__init__.py
rename to src/llama_stack/providers/remote/inference/cerebras/__init__.py
diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
similarity index 100%
rename from llama_stack/providers/remote/inference/cerebras/cerebras.py
rename to src/llama_stack/providers/remote/inference/cerebras/cerebras.py
diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/cerebras/config.py
rename to src/llama_stack/providers/remote/inference/cerebras/config.py
diff --git a/llama_stack/providers/remote/inference/databricks/__init__.py b/src/llama_stack/providers/remote/inference/databricks/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/databricks/__init__.py
rename to src/llama_stack/providers/remote/inference/databricks/__init__.py
diff --git a/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/databricks/config.py
rename to src/llama_stack/providers/remote/inference/databricks/config.py
diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
similarity index 100%
rename from llama_stack/providers/remote/inference/databricks/databricks.py
rename to src/llama_stack/providers/remote/inference/databricks/databricks.py
diff --git a/llama_stack/providers/remote/inference/fireworks/__init__.py b/src/llama_stack/providers/remote/inference/fireworks/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/fireworks/__init__.py
rename to src/llama_stack/providers/remote/inference/fireworks/__init__.py
diff --git a/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/fireworks/config.py
rename to src/llama_stack/providers/remote/inference/fireworks/config.py
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/src/llama_stack/providers/remote/inference/fireworks/fireworks.py
similarity index 100%
rename from llama_stack/providers/remote/inference/fireworks/fireworks.py
rename to src/llama_stack/providers/remote/inference/fireworks/fireworks.py
diff --git a/llama_stack/providers/remote/inference/gemini/__init__.py b/src/llama_stack/providers/remote/inference/gemini/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/gemini/__init__.py
rename to src/llama_stack/providers/remote/inference/gemini/__init__.py
diff --git a/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/gemini/config.py
rename to src/llama_stack/providers/remote/inference/gemini/config.py
diff --git a/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
similarity index 100%
rename from llama_stack/providers/remote/inference/gemini/gemini.py
rename to src/llama_stack/providers/remote/inference/gemini/gemini.py
diff --git a/llama_stack/providers/remote/inference/groq/__init__.py b/src/llama_stack/providers/remote/inference/groq/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/groq/__init__.py
rename to src/llama_stack/providers/remote/inference/groq/__init__.py
diff --git a/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/groq/config.py
rename to src/llama_stack/providers/remote/inference/groq/config.py
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/src/llama_stack/providers/remote/inference/groq/groq.py
similarity index 100%
rename from llama_stack/providers/remote/inference/groq/groq.py
rename to src/llama_stack/providers/remote/inference/groq/groq.py
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/llama_openai_compat/__init__.py
rename to src/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/llama_openai_compat/config.py
rename to src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
similarity index 100%
rename from llama_stack/providers/remote/inference/llama_openai_compat/llama.py
rename to src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/NVIDIA.md
rename to src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
diff --git a/llama_stack/providers/remote/inference/nvidia/__init__.py b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/__init__.py
rename to src/llama_stack/providers/remote/inference/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/config.py
rename to src/llama_stack/providers/remote/inference/nvidia/config.py
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/nvidia.py
rename to src/llama_stack/providers/remote/inference/nvidia/nvidia.py
diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/src/llama_stack/providers/remote/inference/nvidia/utils.py
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/utils.py
rename to src/llama_stack/providers/remote/inference/nvidia/utils.py
diff --git a/llama_stack/providers/remote/inference/ollama/__init__.py b/src/llama_stack/providers/remote/inference/ollama/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/ollama/__init__.py
rename to src/llama_stack/providers/remote/inference/ollama/__init__.py
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/src/llama_stack/providers/remote/inference/ollama/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/ollama/config.py
rename to src/llama_stack/providers/remote/inference/ollama/config.py
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
similarity index 100%
rename from llama_stack/providers/remote/inference/ollama/ollama.py
rename to src/llama_stack/providers/remote/inference/ollama/ollama.py
diff --git a/llama_stack/providers/remote/inference/openai/__init__.py b/src/llama_stack/providers/remote/inference/openai/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/openai/__init__.py
rename to src/llama_stack/providers/remote/inference/openai/__init__.py
diff --git a/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/openai/config.py
rename to src/llama_stack/providers/remote/inference/openai/config.py
diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/src/llama_stack/providers/remote/inference/openai/openai.py
similarity index 100%
rename from llama_stack/providers/remote/inference/openai/openai.py
rename to src/llama_stack/providers/remote/inference/openai/openai.py
diff --git a/llama_stack/providers/remote/inference/passthrough/__init__.py b/src/llama_stack/providers/remote/inference/passthrough/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/passthrough/__init__.py
rename to src/llama_stack/providers/remote/inference/passthrough/__init__.py
diff --git a/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/passthrough/config.py
rename to src/llama_stack/providers/remote/inference/passthrough/config.py
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
similarity index 100%
rename from llama_stack/providers/remote/inference/passthrough/passthrough.py
rename to src/llama_stack/providers/remote/inference/passthrough/passthrough.py
diff --git a/llama_stack/providers/remote/inference/runpod/__init__.py b/src/llama_stack/providers/remote/inference/runpod/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/runpod/__init__.py
rename to src/llama_stack/providers/remote/inference/runpod/__init__.py
diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/runpod/config.py
rename to src/llama_stack/providers/remote/inference/runpod/config.py
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
similarity index 100%
rename from llama_stack/providers/remote/inference/runpod/runpod.py
rename to src/llama_stack/providers/remote/inference/runpod/runpod.py
diff --git a/llama_stack/providers/remote/inference/sambanova/__init__.py b/src/llama_stack/providers/remote/inference/sambanova/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/sambanova/__init__.py
rename to src/llama_stack/providers/remote/inference/sambanova/__init__.py
diff --git a/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/sambanova/config.py
rename to src/llama_stack/providers/remote/inference/sambanova/config.py
diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/src/llama_stack/providers/remote/inference/sambanova/sambanova.py
similarity index 100%
rename from llama_stack/providers/remote/inference/sambanova/sambanova.py
rename to src/llama_stack/providers/remote/inference/sambanova/sambanova.py
diff --git a/llama_stack/providers/remote/inference/tgi/__init__.py b/src/llama_stack/providers/remote/inference/tgi/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/tgi/__init__.py
rename to src/llama_stack/providers/remote/inference/tgi/__init__.py
diff --git a/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/tgi/config.py
rename to src/llama_stack/providers/remote/inference/tgi/config.py
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
similarity index 100%
rename from llama_stack/providers/remote/inference/tgi/tgi.py
rename to src/llama_stack/providers/remote/inference/tgi/tgi.py
diff --git a/llama_stack/providers/remote/inference/together/__init__.py b/src/llama_stack/providers/remote/inference/together/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/together/__init__.py
rename to src/llama_stack/providers/remote/inference/together/__init__.py
diff --git a/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/together/config.py
rename to src/llama_stack/providers/remote/inference/together/config.py
diff --git a/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
similarity index 100%
rename from llama_stack/providers/remote/inference/together/together.py
rename to src/llama_stack/providers/remote/inference/together/together.py
diff --git a/llama_stack/providers/remote/inference/vertexai/__init__.py b/src/llama_stack/providers/remote/inference/vertexai/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vertexai/__init__.py
rename to src/llama_stack/providers/remote/inference/vertexai/__init__.py
diff --git a/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vertexai/config.py
rename to src/llama_stack/providers/remote/inference/vertexai/config.py
diff --git a/llama_stack/providers/remote/inference/vertexai/vertexai.py b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vertexai/vertexai.py
rename to src/llama_stack/providers/remote/inference/vertexai/vertexai.py
diff --git a/llama_stack/providers/remote/inference/vllm/__init__.py b/src/llama_stack/providers/remote/inference/vllm/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vllm/__init__.py
rename to src/llama_stack/providers/remote/inference/vllm/__init__.py
diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vllm/config.py
rename to src/llama_stack/providers/remote/inference/vllm/config.py
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vllm/vllm.py
rename to src/llama_stack/providers/remote/inference/vllm/vllm.py
diff --git a/llama_stack/providers/remote/inference/watsonx/__init__.py b/src/llama_stack/providers/remote/inference/watsonx/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/watsonx/__init__.py
rename to src/llama_stack/providers/remote/inference/watsonx/__init__.py
diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/watsonx/config.py
rename to src/llama_stack/providers/remote/inference/watsonx/config.py
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
similarity index 100%
rename from llama_stack/providers/remote/inference/watsonx/watsonx.py
rename to src/llama_stack/providers/remote/inference/watsonx/watsonx.py
diff --git a/llama_stack/providers/remote/post_training/__init__.py b/src/llama_stack/providers/remote/post_training/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/__init__.py
rename to src/llama_stack/providers/remote/post_training/__init__.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/README.md
rename to src/llama_stack/providers/remote/post_training/nvidia/README.md
diff --git a/llama_stack/providers/remote/post_training/nvidia/__init__.py b/src/llama_stack/providers/remote/post_training/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/__init__.py
rename to src/llama_stack/providers/remote/post_training/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/src/llama_stack/providers/remote/post_training/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/config.py
rename to src/llama_stack/providers/remote/post_training/nvidia/config.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/models.py b/src/llama_stack/providers/remote/post_training/nvidia/models.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/models.py
rename to src/llama_stack/providers/remote/post_training/nvidia/models.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/post_training.py
rename to src/llama_stack/providers/remote/post_training/nvidia/post_training.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/utils.py
rename to src/llama_stack/providers/remote/post_training/nvidia/utils.py
diff --git a/llama_stack/providers/remote/safety/__init__.py b/src/llama_stack/providers/remote/safety/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/safety/__init__.py
rename to src/llama_stack/providers/remote/safety/__init__.py
diff --git a/llama_stack/providers/remote/safety/bedrock/__init__.py b/src/llama_stack/providers/remote/safety/bedrock/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/safety/bedrock/__init__.py
rename to src/llama_stack/providers/remote/safety/bedrock/__init__.py
diff --git a/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
similarity index 100%
rename from llama_stack/providers/remote/safety/bedrock/bedrock.py
rename to src/llama_stack/providers/remote/safety/bedrock/bedrock.py
diff --git a/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py
similarity index 100%
rename from llama_stack/providers/remote/safety/bedrock/config.py
rename to src/llama_stack/providers/remote/safety/bedrock/config.py
diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md
similarity index 100%
rename from llama_stack/providers/remote/safety/nvidia/README.md
rename to src/llama_stack/providers/remote/safety/nvidia/README.md
diff --git a/llama_stack/providers/remote/safety/nvidia/__init__.py b/src/llama_stack/providers/remote/safety/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/safety/nvidia/__init__.py
rename to src/llama_stack/providers/remote/safety/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/safety/nvidia/config.py
rename to src/llama_stack/providers/remote/safety/nvidia/config.py
diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
similarity index 100%
rename from llama_stack/providers/remote/safety/nvidia/nvidia.py
rename to src/llama_stack/providers/remote/safety/nvidia/nvidia.py
diff --git a/llama_stack/providers/remote/safety/sambanova/__init__.py b/src/llama_stack/providers/remote/safety/sambanova/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/safety/sambanova/__init__.py
rename to src/llama_stack/providers/remote/safety/sambanova/__init__.py
diff --git a/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py
similarity index 100%
rename from llama_stack/providers/remote/safety/sambanova/config.py
rename to src/llama_stack/providers/remote/safety/sambanova/config.py
diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
similarity index 100%
rename from llama_stack/providers/remote/safety/sambanova/sambanova.py
rename to src/llama_stack/providers/remote/safety/sambanova/sambanova.py
diff --git a/llama_stack/providers/remote/tool_runtime/__init__.py b/src/llama_stack/providers/remote/tool_runtime/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/bing_search/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
rename to src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/config.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/bing_search/config.py
rename to src/llama_stack/providers/remote/tool_runtime/bing_search/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/brave_search/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
rename to src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/brave_search/config.py
rename to src/llama_stack/providers/remote/tool_runtime/brave_search/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
rename to src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
rename to src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/tavily_search/config.py
rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
diff --git a/llama_stack/providers/remote/vector_io/__init__.py b/src/llama_stack/providers/remote/vector_io/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/__init__.py
rename to src/llama_stack/providers/remote/vector_io/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/chroma/__init__.py
rename to src/llama_stack/providers/remote/vector_io/chroma/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/chroma/chroma.py
rename to src/llama_stack/providers/remote/vector_io/chroma/chroma.py
diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/chroma/config.py
rename to src/llama_stack/providers/remote/vector_io/chroma/config.py
diff --git a/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/milvus/__init__.py
rename to src/llama_stack/providers/remote/vector_io/milvus/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/milvus/config.py
rename to src/llama_stack/providers/remote/vector_io/milvus/config.py
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/milvus/milvus.py
rename to src/llama_stack/providers/remote/vector_io/milvus/milvus.py
diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/pgvector/__init__.py
rename to src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/pgvector/config.py
rename to src/llama_stack/providers/remote/vector_io/pgvector/config.py
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/pgvector/pgvector.py
rename to src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/qdrant/__init__.py
rename to src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/qdrant/config.py
rename to src/llama_stack/providers/remote/vector_io/qdrant/config.py
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/qdrant/qdrant.py
rename to src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
diff --git a/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/weaviate/__init__.py
rename to src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/weaviate/config.py
rename to src/llama_stack/providers/remote/vector_io/weaviate/config.py
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/weaviate/weaviate.py
rename to src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
diff --git a/llama_stack/providers/utils/__init__.py b/src/llama_stack/providers/utils/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/__init__.py
rename to src/llama_stack/providers/utils/__init__.py
diff --git a/llama_stack/providers/utils/bedrock/__init__.py b/src/llama_stack/providers/utils/bedrock/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/bedrock/__init__.py
rename to src/llama_stack/providers/utils/bedrock/__init__.py
diff --git a/llama_stack/providers/utils/bedrock/client.py b/src/llama_stack/providers/utils/bedrock/client.py
similarity index 100%
rename from llama_stack/providers/utils/bedrock/client.py
rename to src/llama_stack/providers/utils/bedrock/client.py
diff --git a/llama_stack/providers/utils/bedrock/config.py b/src/llama_stack/providers/utils/bedrock/config.py
similarity index 100%
rename from llama_stack/providers/utils/bedrock/config.py
rename to src/llama_stack/providers/utils/bedrock/config.py
diff --git a/llama_stack/providers/utils/bedrock/refreshable_boto_session.py b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
similarity index 100%
rename from llama_stack/providers/utils/bedrock/refreshable_boto_session.py
rename to src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
diff --git a/llama_stack/providers/utils/common/__init__.py b/src/llama_stack/providers/utils/common/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/common/__init__.py
rename to src/llama_stack/providers/utils/common/__init__.py
diff --git a/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
similarity index 100%
rename from llama_stack/providers/utils/common/data_schema_validator.py
rename to src/llama_stack/providers/utils/common/data_schema_validator.py
diff --git a/llama_stack/providers/utils/datasetio/__init__.py b/src/llama_stack/providers/utils/datasetio/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/datasetio/__init__.py
rename to src/llama_stack/providers/utils/datasetio/__init__.py
diff --git a/llama_stack/providers/utils/datasetio/url_utils.py b/src/llama_stack/providers/utils/datasetio/url_utils.py
similarity index 100%
rename from llama_stack/providers/utils/datasetio/url_utils.py
rename to src/llama_stack/providers/utils/datasetio/url_utils.py
diff --git a/llama_stack/providers/utils/files/__init__.py b/src/llama_stack/providers/utils/files/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/files/__init__.py
rename to src/llama_stack/providers/utils/files/__init__.py
diff --git a/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
similarity index 100%
rename from llama_stack/providers/utils/files/form_data.py
rename to src/llama_stack/providers/utils/files/form_data.py
diff --git a/llama_stack/providers/utils/inference/__init__.py b/src/llama_stack/providers/utils/inference/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/inference/__init__.py
rename to src/llama_stack/providers/utils/inference/__init__.py
diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/src/llama_stack/providers/utils/inference/embedding_mixin.py
similarity index 100%
rename from llama_stack/providers/utils/inference/embedding_mixin.py
rename to src/llama_stack/providers/utils/inference/embedding_mixin.py
diff --git a/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
similarity index 100%
rename from llama_stack/providers/utils/inference/inference_store.py
rename to src/llama_stack/providers/utils/inference/inference_store.py
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
similarity index 100%
rename from llama_stack/providers/utils/inference/litellm_openai_mixin.py
rename to src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
similarity index 100%
rename from llama_stack/providers/utils/inference/model_registry.py
rename to src/llama_stack/providers/utils/inference/model_registry.py
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
similarity index 100%
rename from llama_stack/providers/utils/inference/openai_compat.py
rename to src/llama_stack/providers/utils/inference/openai_compat.py
diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
similarity index 100%
rename from llama_stack/providers/utils/inference/openai_mixin.py
rename to src/llama_stack/providers/utils/inference/openai_mixin.py
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
similarity index 100%
rename from llama_stack/providers/utils/inference/prompt_adapter.py
rename to src/llama_stack/providers/utils/inference/prompt_adapter.py
diff --git a/llama_stack/providers/utils/kvstore/__init__.py b/src/llama_stack/providers/utils/kvstore/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/__init__.py
rename to src/llama_stack/providers/utils/kvstore/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/api.py b/src/llama_stack/providers/utils/kvstore/api.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/api.py
rename to src/llama_stack/providers/utils/kvstore/api.py
diff --git a/llama_stack/providers/utils/kvstore/config.py b/src/llama_stack/providers/utils/kvstore/config.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/config.py
rename to src/llama_stack/providers/utils/kvstore/config.py
diff --git a/llama_stack/providers/utils/kvstore/kvstore.py b/src/llama_stack/providers/utils/kvstore/kvstore.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/kvstore.py
rename to src/llama_stack/providers/utils/kvstore/kvstore.py
diff --git a/llama_stack/providers/utils/kvstore/mongodb/__init__.py b/src/llama_stack/providers/utils/kvstore/mongodb/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/mongodb/__init__.py
rename to src/llama_stack/providers/utils/kvstore/mongodb/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py b/src/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/mongodb/mongodb.py
rename to src/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
diff --git a/llama_stack/providers/utils/kvstore/postgres/__init__.py b/src/llama_stack/providers/utils/kvstore/postgres/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/postgres/__init__.py
rename to src/llama_stack/providers/utils/kvstore/postgres/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/postgres/postgres.py b/src/llama_stack/providers/utils/kvstore/postgres/postgres.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/postgres/postgres.py
rename to src/llama_stack/providers/utils/kvstore/postgres/postgres.py
diff --git a/llama_stack/providers/utils/kvstore/redis/__init__.py b/src/llama_stack/providers/utils/kvstore/redis/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/redis/__init__.py
rename to src/llama_stack/providers/utils/kvstore/redis/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/redis/redis.py b/src/llama_stack/providers/utils/kvstore/redis/redis.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/redis/redis.py
rename to src/llama_stack/providers/utils/kvstore/redis/redis.py
diff --git a/llama_stack/providers/utils/kvstore/sqlite/__init__.py b/src/llama_stack/providers/utils/kvstore/sqlite/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/sqlite/__init__.py
rename to src/llama_stack/providers/utils/kvstore/sqlite/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/sqlite/config.py
rename to src/llama_stack/providers/utils/kvstore/sqlite/config.py
diff --git a/llama_stack/providers/utils/kvstore/sqlite/sqlite.py b/src/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/sqlite/sqlite.py
rename to src/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
diff --git a/llama_stack/providers/utils/memory/__init__.py b/src/llama_stack/providers/utils/memory/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/memory/__init__.py
rename to src/llama_stack/providers/utils/memory/__init__.py
diff --git a/llama_stack/providers/utils/memory/file_utils.py b/src/llama_stack/providers/utils/memory/file_utils.py
similarity index 100%
rename from llama_stack/providers/utils/memory/file_utils.py
rename to src/llama_stack/providers/utils/memory/file_utils.py
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
similarity index 100%
rename from llama_stack/providers/utils/memory/openai_vector_store_mixin.py
rename to src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
similarity index 100%
rename from llama_stack/providers/utils/memory/vector_store.py
rename to src/llama_stack/providers/utils/memory/vector_store.py
diff --git a/llama_stack/providers/utils/pagination.py b/src/llama_stack/providers/utils/pagination.py
similarity index 100%
rename from llama_stack/providers/utils/pagination.py
rename to src/llama_stack/providers/utils/pagination.py
diff --git a/llama_stack/providers/utils/responses/__init__.py b/src/llama_stack/providers/utils/responses/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/responses/__init__.py
rename to src/llama_stack/providers/utils/responses/__init__.py
diff --git a/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
similarity index 100%
rename from llama_stack/providers/utils/responses/responses_store.py
rename to src/llama_stack/providers/utils/responses/responses_store.py
diff --git a/llama_stack/providers/utils/scheduler.py b/src/llama_stack/providers/utils/scheduler.py
similarity index 100%
rename from llama_stack/providers/utils/scheduler.py
rename to src/llama_stack/providers/utils/scheduler.py
diff --git a/llama_stack/providers/utils/scoring/__init__.py b/src/llama_stack/providers/utils/scoring/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/scoring/__init__.py
rename to src/llama_stack/providers/utils/scoring/__init__.py
diff --git a/llama_stack/providers/utils/scoring/aggregation_utils.py b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
similarity index 100%
rename from llama_stack/providers/utils/scoring/aggregation_utils.py
rename to src/llama_stack/providers/utils/scoring/aggregation_utils.py
diff --git a/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/utils/scoring/base_scoring_fn.py
rename to src/llama_stack/providers/utils/scoring/base_scoring_fn.py
diff --git a/llama_stack/providers/utils/scoring/basic_scoring_utils.py b/src/llama_stack/providers/utils/scoring/basic_scoring_utils.py
similarity index 100%
rename from llama_stack/providers/utils/scoring/basic_scoring_utils.py
rename to src/llama_stack/providers/utils/scoring/basic_scoring_utils.py
diff --git a/llama_stack/providers/utils/sqlstore/__init__.py b/src/llama_stack/providers/utils/sqlstore/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/__init__.py
rename to src/llama_stack/providers/utils/sqlstore/__init__.py
diff --git a/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/api.py
rename to src/llama_stack/providers/utils/sqlstore/api.py
diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
rename to src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
rename to src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlstore.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/sqlstore.py
rename to src/llama_stack/providers/utils/sqlstore/sqlstore.py
diff --git a/llama_stack/providers/utils/tools/__init__.py b/src/llama_stack/providers/utils/tools/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/tools/__init__.py
rename to src/llama_stack/providers/utils/tools/__init__.py
diff --git a/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
similarity index 100%
rename from llama_stack/providers/utils/tools/mcp.py
rename to src/llama_stack/providers/utils/tools/mcp.py
diff --git a/llama_stack/providers/utils/tools/ttl_dict.py b/src/llama_stack/providers/utils/tools/ttl_dict.py
similarity index 100%
rename from llama_stack/providers/utils/tools/ttl_dict.py
rename to src/llama_stack/providers/utils/tools/ttl_dict.py
diff --git a/llama_stack/providers/utils/vector_io/__init__.py b/src/llama_stack/providers/utils/vector_io/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/vector_io/__init__.py
rename to src/llama_stack/providers/utils/vector_io/__init__.py
diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/src/llama_stack/providers/utils/vector_io/vector_utils.py
similarity index 100%
rename from llama_stack/providers/utils/vector_io/vector_utils.py
rename to src/llama_stack/providers/utils/vector_io/vector_utils.py
diff --git a/llama_stack/schema_utils.py b/src/llama_stack/schema_utils.py
similarity index 100%
rename from llama_stack/schema_utils.py
rename to src/llama_stack/schema_utils.py
diff --git a/llama_stack/strong_typing/__init__.py b/src/llama_stack/strong_typing/__init__.py
similarity index 100%
rename from llama_stack/strong_typing/__init__.py
rename to src/llama_stack/strong_typing/__init__.py
diff --git a/llama_stack/strong_typing/auxiliary.py b/src/llama_stack/strong_typing/auxiliary.py
similarity index 89%
rename from llama_stack/strong_typing/auxiliary.py
rename to src/llama_stack/strong_typing/auxiliary.py
index 965ffa079..eb067b38b 100644
--- a/llama_stack/strong_typing/auxiliary.py
+++ b/src/llama_stack/strong_typing/auxiliary.py
@@ -12,23 +12,24 @@ Type-safe data interchange for Python data classes.
 
 import dataclasses
 import sys
+from collections.abc import Callable
 from dataclasses import is_dataclass
-from typing import Callable, Dict, Optional, Type, TypeVar, Union, overload
+from typing import TypeVar, overload
 
 if sys.version_info >= (3, 9):
     from typing import Annotated as Annotated
 else:
-    from typing_extensions import Annotated as Annotated
+    from typing import Annotated as Annotated
 
 if sys.version_info >= (3, 10):
     from typing import TypeAlias as TypeAlias
 else:
-    from typing_extensions import TypeAlias as TypeAlias
+    from typing import TypeAlias as TypeAlias
 
 if sys.version_info >= (3, 11):
     from typing import dataclass_transform as dataclass_transform
 else:
-    from typing_extensions import dataclass_transform as dataclass_transform
+    from typing import dataclass_transform as dataclass_transform
 
 T = TypeVar("T")
 
@@ -56,17 +57,17 @@ class CompactDataClass:
 
 
 @overload
-def typeannotation(cls: Type[T], /) -> Type[T]: ...
+def typeannotation(cls: type[T], /) -> type[T]: ...
 
 
 @overload
-def typeannotation(cls: None, *, eq: bool = True, order: bool = False) -> Callable[[Type[T]], Type[T]]: ...
+def typeannotation(cls: None, *, eq: bool = True, order: bool = False) -> Callable[[type[T]], type[T]]: ...
 
 
 @dataclass_transform(eq_default=True, order_default=False)
 def typeannotation(
-    cls: Optional[Type[T]] = None, *, eq: bool = True, order: bool = False
-) -> Union[Type[T], Callable[[Type[T]], Type[T]]]:
+    cls: type[T] | None = None, *, eq: bool = True, order: bool = False
+) -> type[T] | Callable[[type[T]], type[T]]:
     """
     Returns the same class as was passed in, with dunder methods added based on the fields defined in the class.
 
@@ -76,7 +77,7 @@ def typeannotation(
     :returns: A data-class type, or a wrapper for data-class types.
     """
 
-    def wrap(cls: Type[T]) -> Type[T]:
+    def wrap(cls: type[T]) -> type[T]:
         # mypy fails to equate bound-y functions (first argument interpreted as
         # the bound object) with class methods, hence the `ignore` directive.
         cls.__repr__ = _compact_dataclass_repr  # type: ignore[method-assign]
@@ -213,7 +214,7 @@ float32: TypeAlias = Annotated[float, Storage(4)]
 float64: TypeAlias = Annotated[float, Storage(8)]
 
 # maps globals of type Annotated[T, ...] defined in this module to their string names
-_auxiliary_types: Dict[object, str] = {}
+_auxiliary_types: dict[object, str] = {}
 module = sys.modules[__name__]
 for var in dir(module):
     typ = getattr(module, var)
@@ -222,7 +223,7 @@ for var in dir(module):
         _auxiliary_types[typ] = var
 
 
-def get_auxiliary_format(data_type: object) -> Optional[str]:
+def get_auxiliary_format(data_type: object) -> str | None:
     "Returns the JSON format string corresponding to an auxiliary type."
 
     return _auxiliary_types.get(data_type)
diff --git a/llama_stack/strong_typing/classdef.py b/src/llama_stack/strong_typing/classdef.py
similarity index 86%
rename from llama_stack/strong_typing/classdef.py
rename to src/llama_stack/strong_typing/classdef.py
index 5ead886d4..e54e3a9d6 100644
--- a/llama_stack/strong_typing/classdef.py
+++ b/src/llama_stack/strong_typing/classdef.py
@@ -17,7 +17,7 @@ import types
 import typing
 import uuid
 from dataclasses import dataclass
-from typing import Any, Dict, List, Literal, Optional, Tuple, Type, TypeVar, Union
+from typing import Any, Literal, TypeVar, Union
 
 from .auxiliary import (
     Alias,
@@ -40,57 +40,57 @@ T = TypeVar("T")
 
 @dataclass
 class JsonSchemaNode:
-    title: Optional[str]
-    description: Optional[str]
+    title: str | None
+    description: str | None
 
 
 @dataclass
 class JsonSchemaType(JsonSchemaNode):
     type: str
-    format: Optional[str]
+    format: str | None
 
 
 @dataclass
 class JsonSchemaBoolean(JsonSchemaType):
     type: Literal["boolean"]
-    const: Optional[bool]
-    default: Optional[bool]
-    examples: Optional[List[bool]]
+    const: bool | None
+    default: bool | None
+    examples: list[bool] | None
 
 
 @dataclass
 class JsonSchemaInteger(JsonSchemaType):
     type: Literal["integer"]
-    const: Optional[int]
-    default: Optional[int]
-    examples: Optional[List[int]]
-    enum: Optional[List[int]]
-    minimum: Optional[int]
-    maximum: Optional[int]
+    const: int | None
+    default: int | None
+    examples: list[int] | None
+    enum: list[int] | None
+    minimum: int | None
+    maximum: int | None
 
 
 @dataclass
 class JsonSchemaNumber(JsonSchemaType):
     type: Literal["number"]
-    const: Optional[float]
-    default: Optional[float]
-    examples: Optional[List[float]]
-    minimum: Optional[float]
-    maximum: Optional[float]
-    exclusiveMinimum: Optional[float]
-    exclusiveMaximum: Optional[float]
-    multipleOf: Optional[float]
+    const: float | None
+    default: float | None
+    examples: list[float] | None
+    minimum: float | None
+    maximum: float | None
+    exclusiveMinimum: float | None
+    exclusiveMaximum: float | None
+    multipleOf: float | None
 
 
 @dataclass
 class JsonSchemaString(JsonSchemaType):
     type: Literal["string"]
-    const: Optional[str]
-    default: Optional[str]
-    examples: Optional[List[str]]
-    enum: Optional[List[str]]
-    minLength: Optional[int]
-    maxLength: Optional[int]
+    const: str | None
+    default: str | None
+    examples: list[str] | None
+    enum: list[str] | None
+    minLength: int | None
+    maxLength: int | None
 
 
 @dataclass
@@ -102,9 +102,9 @@ class JsonSchemaArray(JsonSchemaType):
 @dataclass
 class JsonSchemaObject(JsonSchemaType):
     type: Literal["object"]
-    properties: Optional[Dict[str, "JsonSchemaAny"]]
-    additionalProperties: Optional[bool]
-    required: Optional[List[str]]
+    properties: dict[str, "JsonSchemaAny"] | None
+    additionalProperties: bool | None
+    required: list[str] | None
 
 
 @dataclass
@@ -114,24 +114,24 @@ class JsonSchemaRef(JsonSchemaNode):
 
 @dataclass
 class JsonSchemaAllOf(JsonSchemaNode):
-    allOf: List["JsonSchemaAny"]
+    allOf: list["JsonSchemaAny"]
 
 
 @dataclass
 class JsonSchemaAnyOf(JsonSchemaNode):
-    anyOf: List["JsonSchemaAny"]
+    anyOf: list["JsonSchemaAny"]
 
 
 @dataclass
 class Discriminator:
     propertyName: str
-    mapping: Dict[str, str]
+    mapping: dict[str, str]
 
 
 @dataclass
 class JsonSchemaOneOf(JsonSchemaNode):
-    oneOf: List["JsonSchemaAny"]
-    discriminator: Optional[Discriminator]
+    oneOf: list["JsonSchemaAny"]
+    discriminator: Discriminator | None
 
 
 JsonSchemaAny = Union[
@@ -149,7 +149,7 @@ JsonSchemaAny = Union[
 @dataclass
 class JsonSchemaTopLevelObject(JsonSchemaObject):
     schema: Annotated[str, Alias("$schema")]
-    definitions: Optional[Dict[str, JsonSchemaAny]]
+    definitions: dict[str, JsonSchemaAny] | None
 
 
 def integer_range_to_type(min_value: float, max_value: float) -> type:
@@ -173,11 +173,11 @@ def enum_safe_name(name: str) -> str:
 def enum_values_to_type(
     module: types.ModuleType,
     name: str,
-    values: Dict[str, Any],
-    title: Optional[str] = None,
-    description: Optional[str] = None,
-) -> Type[enum.Enum]:
-    enum_class: Type[enum.Enum] = enum.Enum(name, values)  # type: ignore
+    values: dict[str, Any],
+    title: str | None = None,
+    description: str | None = None,
+) -> type[enum.Enum]:
+    enum_class: type[enum.Enum] = enum.Enum(name, values)  # type: ignore
 
     # assign the newly created type to the same module where the defining class is
     enum_class.__module__ = module.__name__
@@ -330,7 +330,7 @@ def node_to_typedef(module: types.ModuleType, context: str, node: JsonSchemaNode
         type_def = node_to_typedef(module, context, node.items)
         if type_def.default is not dataclasses.MISSING:
             raise TypeError("disallowed: `default` for array element type")
-        list_type = List[(type_def.type,)]  # type: ignore
+        list_type = list[(type_def.type,)]  # type: ignore
         return TypeDef(list_type, dataclasses.MISSING)
 
     elif isinstance(node, JsonSchemaObject):
@@ -344,8 +344,8 @@ def node_to_typedef(module: types.ModuleType, context: str, node: JsonSchemaNode
 
         class_name = context
 
-        fields: List[Tuple[str, Any, dataclasses.Field]] = []
-        params: Dict[str, DocstringParam] = {}
+        fields: list[tuple[str, Any, dataclasses.Field]] = []
+        params: dict[str, DocstringParam] = {}
         for prop_name, prop_node in node.properties.items():
             type_def = node_to_typedef(module, f"{class_name}__{prop_name}", prop_node)
             if prop_name in required:
@@ -388,7 +388,7 @@ class SchemaFlatteningOptions:
     recursive: bool = False
 
 
-def flatten_schema(schema: Schema, *, options: Optional[SchemaFlatteningOptions] = None) -> Schema:
+def flatten_schema(schema: Schema, *, options: SchemaFlatteningOptions | None = None) -> Schema:
     top_node = typing.cast(JsonSchemaTopLevelObject, json_to_object(JsonSchemaTopLevelObject, schema))
     flattener = SchemaFlattener(options)
     obj = flattener.flatten(top_node)
@@ -398,7 +398,7 @@ def flatten_schema(schema: Schema, *, options: Optional[SchemaFlatteningOptions]
 class SchemaFlattener:
     options: SchemaFlatteningOptions
 
-    def __init__(self, options: Optional[SchemaFlatteningOptions] = None) -> None:
+    def __init__(self, options: SchemaFlatteningOptions | None = None) -> None:
         self.options = options or SchemaFlatteningOptions()
 
     def flatten(self, source_node: JsonSchemaObject) -> JsonSchemaObject:
@@ -406,10 +406,10 @@ class SchemaFlattener:
             return source_node
 
         source_props = source_node.properties or {}
-        target_props: Dict[str, JsonSchemaAny] = {}
+        target_props: dict[str, JsonSchemaAny] = {}
 
         source_reqs = source_node.required or []
-        target_reqs: List[str] = []
+        target_reqs: list[str] = []
 
         for name, prop in source_props.items():
             if not isinstance(prop, JsonSchemaObject):
diff --git a/llama_stack/strong_typing/core.py b/src/llama_stack/strong_typing/core.py
similarity index 80%
rename from llama_stack/strong_typing/core.py
rename to src/llama_stack/strong_typing/core.py
index 501b6a5db..5f3764aeb 100644
--- a/llama_stack/strong_typing/core.py
+++ b/src/llama_stack/strong_typing/core.py
@@ -10,7 +10,7 @@ Type-safe data interchange for Python data classes.
 :see: https://github.com/hunyadi/strong_typing
 """
 
-from typing import Dict, List, Union
+from typing import Union
 
 
 class JsonObject:
@@ -28,8 +28,8 @@ JsonType = Union[
     int,
     float,
     str,
-    Dict[str, "JsonType"],
-    List["JsonType"],
+    dict[str, "JsonType"],
+    list["JsonType"],
 ]
 
 # a JSON type that cannot contain `null` values
@@ -38,9 +38,9 @@ StrictJsonType = Union[
     int,
     float,
     str,
-    Dict[str, "StrictJsonType"],
-    List["StrictJsonType"],
+    dict[str, "StrictJsonType"],
+    list["StrictJsonType"],
 ]
 
 # a meta-type that captures the object type in a JSON schema
-Schema = Dict[str, JsonType]
+Schema = dict[str, JsonType]
diff --git a/llama_stack/strong_typing/deserializer.py b/src/llama_stack/strong_typing/deserializer.py
similarity index 89%
rename from llama_stack/strong_typing/deserializer.py
rename to src/llama_stack/strong_typing/deserializer.py
index 883590862..58dfe53a4 100644
--- a/llama_stack/strong_typing/deserializer.py
+++ b/src/llama_stack/strong_typing/deserializer.py
@@ -20,19 +20,14 @@ import ipaddress
 import sys
 import typing
 import uuid
+from collections.abc import Callable
 from types import ModuleType
 from typing import (
     Any,
-    Callable,
-    Dict,
     Generic,
-    List,
     Literal,
     NamedTuple,
     Optional,
-    Set,
-    Tuple,
-    Type,
     TypeVar,
     Union,
 )
@@ -70,7 +65,7 @@ V = TypeVar("V")
 class Deserializer(abc.ABC, Generic[T]):
     "Parses a JSON value into a Python type."
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         """
         Creates auxiliary parsers that this parser is depending on.
 
@@ -203,19 +198,19 @@ class IPv6Deserializer(Deserializer[ipaddress.IPv6Address]):
         return ipaddress.IPv6Address(data)
 
 
-class ListDeserializer(Deserializer[List[T]]):
+class ListDeserializer(Deserializer[list[T]]):
     "Recursively de-serializes a JSON array into a Python `list`."
 
-    item_type: Type[T]
+    item_type: type[T]
     item_parser: Deserializer
 
-    def __init__(self, item_type: Type[T]) -> None:
+    def __init__(self, item_type: type[T]) -> None:
         self.item_type = item_type
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.item_parser = _get_deserializer(self.item_type, context)
 
-    def parse(self, data: JsonType) -> List[T]:
+    def parse(self, data: JsonType) -> list[T]:
         if not isinstance(data, list):
             type_name = python_type_to_str(self.item_type)
             raise JsonTypeError(f"type `List[{type_name}]` expects JSON `array` data but instead received: {data}")
@@ -223,19 +218,19 @@ class ListDeserializer(Deserializer[List[T]]):
         return [self.item_parser.parse(item) for item in data]
 
 
-class DictDeserializer(Deserializer[Dict[K, V]]):
+class DictDeserializer(Deserializer[dict[K, V]]):
     "Recursively de-serializes a JSON object into a Python `dict`."
 
-    key_type: Type[K]
-    value_type: Type[V]
+    key_type: type[K]
+    value_type: type[V]
     value_parser: Deserializer[V]
 
-    def __init__(self, key_type: Type[K], value_type: Type[V]) -> None:
+    def __init__(self, key_type: type[K], value_type: type[V]) -> None:
         self.key_type = key_type
         self.value_type = value_type
         self._check_key_type()
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.value_parser = _get_deserializer(self.value_type, context)
 
     def _check_key_type(self) -> None:
@@ -264,7 +259,7 @@ class DictDeserializer(Deserializer[Dict[K, V]]):
         value_type_name = python_type_to_str(self.value_type)
         return f"Dict[{key_type_name}, {value_type_name}]"
 
-    def parse(self, data: JsonType) -> Dict[K, V]:
+    def parse(self, data: JsonType) -> dict[K, V]:
         if not isinstance(data, dict):
             raise JsonTypeError(
                 f"`type `{self.container_type}` expects JSON `object` data but instead received: {data}"
@@ -276,19 +271,19 @@ class DictDeserializer(Deserializer[Dict[K, V]]):
         )
 
 
-class SetDeserializer(Deserializer[Set[T]]):
+class SetDeserializer(Deserializer[set[T]]):
     "Recursively de-serializes a JSON list into a Python `set`."
 
-    member_type: Type[T]
+    member_type: type[T]
     member_parser: Deserializer
 
-    def __init__(self, member_type: Type[T]) -> None:
+    def __init__(self, member_type: type[T]) -> None:
         self.member_type = member_type
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.member_parser = _get_deserializer(self.member_type, context)
 
-    def parse(self, data: JsonType) -> Set[T]:
+    def parse(self, data: JsonType) -> set[T]:
         if not isinstance(data, list):
             type_name = python_type_to_str(self.member_type)
             raise JsonTypeError(f"type `Set[{type_name}]` expects JSON `array` data but instead received: {data}")
@@ -296,16 +291,16 @@ class SetDeserializer(Deserializer[Set[T]]):
         return set(self.member_parser.parse(item) for item in data)
 
 
-class TupleDeserializer(Deserializer[Tuple[Any, ...]]):
+class TupleDeserializer(Deserializer[tuple[Any, ...]]):
     "Recursively de-serializes a JSON list into a Python `tuple`."
 
-    item_types: Tuple[Type[Any], ...]
-    item_parsers: Tuple[Deserializer[Any], ...]
+    item_types: tuple[type[Any], ...]
+    item_parsers: tuple[Deserializer[Any], ...]
 
-    def __init__(self, item_types: Tuple[Type[Any], ...]) -> None:
+    def __init__(self, item_types: tuple[type[Any], ...]) -> None:
         self.item_types = item_types
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.item_parsers = tuple(_get_deserializer(item_type, context) for item_type in self.item_types)
 
     @property
@@ -313,7 +308,7 @@ class TupleDeserializer(Deserializer[Tuple[Any, ...]]):
         type_names = ", ".join(python_type_to_str(item_type) for item_type in self.item_types)
         return f"Tuple[{type_names}]"
 
-    def parse(self, data: JsonType) -> Tuple[Any, ...]:
+    def parse(self, data: JsonType) -> tuple[Any, ...]:
         if not isinstance(data, list) or len(data) != len(self.item_parsers):
             if not isinstance(data, list):
                 raise JsonTypeError(
@@ -331,13 +326,13 @@ class TupleDeserializer(Deserializer[Tuple[Any, ...]]):
 class UnionDeserializer(Deserializer):
     "De-serializes a JSON value (of any type) into a Python union type."
 
-    member_types: Tuple[type, ...]
-    member_parsers: Tuple[Deserializer, ...]
+    member_types: tuple[type, ...]
+    member_parsers: tuple[Deserializer, ...]
 
-    def __init__(self, member_types: Tuple[type, ...]) -> None:
+    def __init__(self, member_types: tuple[type, ...]) -> None:
         self.member_types = member_types
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.member_parsers = tuple(_get_deserializer(member_type, context) for member_type in self.member_types)
 
     def parse(self, data: JsonType) -> Any:
@@ -354,7 +349,7 @@ class UnionDeserializer(Deserializer):
         raise JsonKeyError(f"type `Union[{type_names}]` could not be instantiated from: {data}")
 
 
-def get_literal_properties(typ: type) -> Set[str]:
+def get_literal_properties(typ: type) -> set[str]:
     "Returns the names of all properties in a class that are of a literal type."
 
     return set(
@@ -362,7 +357,7 @@ def get_literal_properties(typ: type) -> Set[str]:
     )
 
 
-def get_discriminating_properties(types: Tuple[type, ...]) -> Set[str]:
+def get_discriminating_properties(types: tuple[type, ...]) -> set[str]:
     "Returns a set of properties with literal type that are common across all specified classes."
 
     if not types or not all(isinstance(typ, type) for typ in types):
@@ -378,15 +373,15 @@ def get_discriminating_properties(types: Tuple[type, ...]) -> Set[str]:
 class TaggedUnionDeserializer(Deserializer):
     "De-serializes a JSON value with one or more disambiguating properties into a Python union type."
 
-    member_types: Tuple[type, ...]
-    disambiguating_properties: Set[str]
-    member_parsers: Dict[Tuple[str, Any], Deserializer]
+    member_types: tuple[type, ...]
+    disambiguating_properties: set[str]
+    member_parsers: dict[tuple[str, Any], Deserializer]
 
-    def __init__(self, member_types: Tuple[type, ...]) -> None:
+    def __init__(self, member_types: tuple[type, ...]) -> None:
         self.member_types = member_types
         self.disambiguating_properties = get_discriminating_properties(member_types)
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.member_parsers = {}
         for member_type in self.member_types:
             for property_name in self.disambiguating_properties:
@@ -435,13 +430,13 @@ class TaggedUnionDeserializer(Deserializer):
 class LiteralDeserializer(Deserializer):
     "De-serializes a JSON value into a Python literal type."
 
-    values: Tuple[Any, ...]
+    values: tuple[Any, ...]
     parser: Deserializer
 
-    def __init__(self, values: Tuple[Any, ...]) -> None:
+    def __init__(self, values: tuple[Any, ...]) -> None:
         self.values = values
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         literal_type_tuple = tuple(type(value) for value in self.values)
         literal_type_set = set(literal_type_tuple)
         if len(literal_type_set) != 1:
@@ -464,9 +459,9 @@ class LiteralDeserializer(Deserializer):
 class EnumDeserializer(Deserializer[E]):
     "Returns an enumeration instance based on the enumeration value read from a JSON value."
 
-    enum_type: Type[E]
+    enum_type: type[E]
 
-    def __init__(self, enum_type: Type[E]) -> None:
+    def __init__(self, enum_type: type[E]) -> None:
         self.enum_type = enum_type
 
     def parse(self, data: JsonType) -> E:
@@ -504,13 +499,13 @@ class FieldDeserializer(abc.ABC, Generic[T, R]):
         self.parser = parser
 
     @abc.abstractmethod
-    def parse_field(self, data: Dict[str, JsonType]) -> R: ...
+    def parse_field(self, data: dict[str, JsonType]) -> R: ...
 
 
 class RequiredFieldDeserializer(FieldDeserializer[T, T]):
     "Deserializes a JSON property into a mandatory Python object field."
 
-    def parse_field(self, data: Dict[str, JsonType]) -> T:
+    def parse_field(self, data: dict[str, JsonType]) -> T:
         if self.property_name not in data:
             raise JsonKeyError(f"missing required property `{self.property_name}` from JSON object: {data}")
 
@@ -520,7 +515,7 @@ class RequiredFieldDeserializer(FieldDeserializer[T, T]):
 class OptionalFieldDeserializer(FieldDeserializer[T, Optional[T]]):
     "Deserializes a JSON property into an optional Python object field with a default value of `None`."
 
-    def parse_field(self, data: Dict[str, JsonType]) -> Optional[T]:
+    def parse_field(self, data: dict[str, JsonType]) -> T | None:
         value = data.get(self.property_name)
         if value is not None:
             return self.parser.parse(value)
@@ -543,7 +538,7 @@ class DefaultFieldDeserializer(FieldDeserializer[T, T]):
         super().__init__(property_name, field_name, parser)
         self.default_value = default_value
 
-    def parse_field(self, data: Dict[str, JsonType]) -> T:
+    def parse_field(self, data: dict[str, JsonType]) -> T:
         value = data.get(self.property_name)
         if value is not None:
             return self.parser.parse(value)
@@ -566,7 +561,7 @@ class DefaultFactoryFieldDeserializer(FieldDeserializer[T, T]):
         super().__init__(property_name, field_name, parser)
         self.default_factory = default_factory
 
-    def parse_field(self, data: Dict[str, JsonType]) -> T:
+    def parse_field(self, data: dict[str, JsonType]) -> T:
         value = data.get(self.property_name)
         if value is not None:
             return self.parser.parse(value)
@@ -578,13 +573,13 @@ class ClassDeserializer(Deserializer[T]):
     "Base class for de-serializing class-like types such as data classes, named tuples and regular classes."
 
     class_type: type
-    property_parsers: List[FieldDeserializer]
-    property_fields: Set[str]
+    property_parsers: list[FieldDeserializer]
+    property_fields: set[str]
 
-    def __init__(self, class_type: Type[T]) -> None:
+    def __init__(self, class_type: type[T]) -> None:
         self.class_type = class_type
 
-    def assign(self, property_parsers: List[FieldDeserializer]) -> None:
+    def assign(self, property_parsers: list[FieldDeserializer]) -> None:
         self.property_parsers = property_parsers
         self.property_fields = set(property_parser.property_name for property_parser in property_parsers)
 
@@ -593,7 +588,7 @@ class ClassDeserializer(Deserializer[T]):
             type_name = python_type_to_str(self.class_type)
             raise JsonTypeError(f"`type `{type_name}` expects JSON `object` data but instead received: {data}")
 
-        object_data: Dict[str, JsonType] = typing.cast(Dict[str, JsonType], data)
+        object_data: dict[str, JsonType] = typing.cast(dict[str, JsonType], data)
 
         field_values = {}
         for property_parser in self.property_parsers:
@@ -619,8 +614,8 @@ class ClassDeserializer(Deserializer[T]):
 class NamedTupleDeserializer(ClassDeserializer[NamedTuple]):
     "De-serializes a named tuple from a JSON `object`."
 
-    def build(self, context: Optional[ModuleType]) -> None:
-        property_parsers: List[FieldDeserializer] = [
+    def build(self, context: ModuleType | None) -> None:
+        property_parsers: list[FieldDeserializer] = [
             RequiredFieldDeserializer(field_name, field_name, _get_deserializer(field_type, context))
             for field_name, field_type in get_resolved_hints(self.class_type).items()
         ]
@@ -634,13 +629,13 @@ class NamedTupleDeserializer(ClassDeserializer[NamedTuple]):
 class DataclassDeserializer(ClassDeserializer[T]):
     "De-serializes a data class from a JSON `object`."
 
-    def __init__(self, class_type: Type[T]) -> None:
+    def __init__(self, class_type: type[T]) -> None:
         if not dataclasses.is_dataclass(class_type):
             raise TypeError("expected: data-class type")
         super().__init__(class_type)  # type: ignore[arg-type]
 
-    def build(self, context: Optional[ModuleType]) -> None:
-        property_parsers: List[FieldDeserializer] = []
+    def build(self, context: ModuleType | None) -> None:
+        property_parsers: list[FieldDeserializer] = []
         resolved_hints = get_resolved_hints(self.class_type)
         for field in dataclasses.fields(self.class_type):
             field_type = resolved_hints[field.name]
@@ -651,7 +646,7 @@ class DataclassDeserializer(ClassDeserializer[T]):
             has_default_factory = field.default_factory is not dataclasses.MISSING
 
             if is_optional:
-                required_type: Type[T] = unwrap_optional_type(field_type)
+                required_type: type[T] = unwrap_optional_type(field_type)
             else:
                 required_type = field_type
 
@@ -691,15 +686,15 @@ class FrozenDataclassDeserializer(DataclassDeserializer[T]):
 class TypedClassDeserializer(ClassDeserializer[T]):
     "De-serializes a class with type annotations from a JSON `object` by iterating over class properties."
 
-    def build(self, context: Optional[ModuleType]) -> None:
-        property_parsers: List[FieldDeserializer] = []
+    def build(self, context: ModuleType | None) -> None:
+        property_parsers: list[FieldDeserializer] = []
         for field_name, field_type in get_resolved_hints(self.class_type).items():
             property_name = python_field_to_json_property(field_name, field_type)
 
             is_optional = is_type_optional(field_type)
 
             if is_optional:
-                required_type: Type[T] = unwrap_optional_type(field_type)
+                required_type: type[T] = unwrap_optional_type(field_type)
             else:
                 required_type = field_type
 
@@ -715,7 +710,7 @@ class TypedClassDeserializer(ClassDeserializer[T]):
         super().assign(property_parsers)
 
 
-def create_deserializer(typ: TypeLike, context: Optional[ModuleType] = None) -> Deserializer:
+def create_deserializer(typ: TypeLike, context: ModuleType | None = None) -> Deserializer:
     """
     Creates a de-serializer engine to produce a Python object from an object obtained from a JSON string.
 
@@ -741,10 +736,10 @@ def create_deserializer(typ: TypeLike, context: Optional[ModuleType] = None) ->
     return _get_deserializer(typ, context)
 
 
-_CACHE: Dict[Tuple[str, str], Deserializer] = {}
+_CACHE: dict[tuple[str, str], Deserializer] = {}
 
 
-def _get_deserializer(typ: TypeLike, context: Optional[ModuleType]) -> Deserializer:
+def _get_deserializer(typ: TypeLike, context: ModuleType | None) -> Deserializer:
     "Creates or re-uses a de-serializer engine to parse an object obtained from a JSON string."
 
     cache_key = None
diff --git a/llama_stack/strong_typing/docstring.py b/src/llama_stack/strong_typing/docstring.py
similarity index 93%
rename from llama_stack/strong_typing/docstring.py
rename to src/llama_stack/strong_typing/docstring.py
index 497c9ea82..4c9ea49e5 100644
--- a/llama_stack/strong_typing/docstring.py
+++ b/src/llama_stack/strong_typing/docstring.py
@@ -18,14 +18,15 @@ import re
 import sys
 import types
 import typing
+from collections.abc import Callable
 from dataclasses import dataclass
 from io import StringIO
-from typing import Any, Callable, Dict, Optional, Protocol, Type, TypeVar
+from typing import Any, Protocol, TypeVar
 
 if sys.version_info >= (3, 10):
     from typing import TypeGuard
 else:
-    from typing_extensions import TypeGuard
+    from typing import TypeGuard
 
 from .inspection import (
     DataclassInstance,
@@ -110,14 +111,14 @@ class Docstring:
     :param returns: The returns declaration extracted from a docstring.
     """
 
-    short_description: Optional[str] = None
-    long_description: Optional[str] = None
-    params: Dict[str, DocstringParam] = dataclasses.field(default_factory=dict)
-    returns: Optional[DocstringReturns] = None
-    raises: Dict[str, DocstringRaises] = dataclasses.field(default_factory=dict)
+    short_description: str | None = None
+    long_description: str | None = None
+    params: dict[str, DocstringParam] = dataclasses.field(default_factory=dict)
+    returns: DocstringReturns | None = None
+    raises: dict[str, DocstringRaises] = dataclasses.field(default_factory=dict)
 
     @property
-    def full_description(self) -> Optional[str]:
+    def full_description(self) -> str | None:
         if self.short_description and self.long_description:
             return f"{self.short_description}\n\n{self.long_description}"
         elif self.short_description:
@@ -158,18 +159,18 @@ class Docstring:
         return s
 
 
-def is_exception(member: object) -> TypeGuard[Type[BaseException]]:
+def is_exception(member: object) -> TypeGuard[type[BaseException]]:
     return isinstance(member, type) and issubclass(member, BaseException)
 
 
-def get_exceptions(module: types.ModuleType) -> Dict[str, Type[BaseException]]:
+def get_exceptions(module: types.ModuleType) -> dict[str, type[BaseException]]:
     "Returns all exception classes declared in a module."
 
     return {name: class_type for name, class_type in inspect.getmembers(module, is_exception)}
 
 
 class SupportsDoc(Protocol):
-    __doc__: Optional[str]
+    __doc__: str | None
 
 
 def _maybe_unwrap_async_iterator(t):
@@ -213,7 +214,7 @@ def parse_type(typ: SupportsDoc) -> Docstring:
     # assign exception types
     defining_module = inspect.getmodule(typ)
     if defining_module:
-        context: Dict[str, type] = {}
+        context: dict[str, type] = {}
         context.update(get_exceptions(builtins))
         context.update(get_exceptions(defining_module))
         for exc_name, exc in docstring.raises.items():
@@ -262,8 +263,8 @@ def parse_text(text: str) -> Docstring:
     else:
         long_description = None
 
-    params: Dict[str, DocstringParam] = {}
-    raises: Dict[str, DocstringRaises] = {}
+    params: dict[str, DocstringParam] = {}
+    raises: dict[str, DocstringRaises] = {}
     returns = None
     for match in re.finditer(r"(^:.*?)(?=^:|\Z)", meta_chunk, flags=re.DOTALL | re.MULTILINE):
         chunk = match.group(0)
@@ -325,7 +326,7 @@ def has_docstring(typ: SupportsDoc) -> bool:
     return bool(typ.__doc__)
 
 
-def get_docstring(typ: SupportsDoc) -> Optional[str]:
+def get_docstring(typ: SupportsDoc) -> str | None:
     if typ.__doc__ is None:
         return None
 
@@ -348,7 +349,7 @@ def check_docstring(typ: SupportsDoc, docstring: Docstring, strict: bool = False
         check_function_docstring(typ, docstring, strict)
 
 
-def check_dataclass_docstring(typ: Type[DataclassInstance], docstring: Docstring, strict: bool = False) -> None:
+def check_dataclass_docstring(typ: type[DataclassInstance], docstring: Docstring, strict: bool = False) -> None:
     """
     Verifies the doc-string of a data-class type.
 
diff --git a/llama_stack/strong_typing/exception.py b/src/llama_stack/strong_typing/exception.py
similarity index 100%
rename from llama_stack/strong_typing/exception.py
rename to src/llama_stack/strong_typing/exception.py
diff --git a/llama_stack/strong_typing/inspection.py b/src/llama_stack/strong_typing/inspection.py
similarity index 91%
rename from llama_stack/strong_typing/inspection.py
rename to src/llama_stack/strong_typing/inspection.py
index f3a4bef90..d3ebc7585 100644
--- a/llama_stack/strong_typing/inspection.py
+++ b/src/llama_stack/strong_typing/inspection.py
@@ -22,19 +22,12 @@ import sys
 import types
 import typing
 import uuid
+from collections.abc import Callable, Iterable
 from typing import (
     Any,
-    Callable,
-    Dict,
-    Iterable,
-    List,
     Literal,
     NamedTuple,
-    Optional,
     Protocol,
-    Set,
-    Tuple,
-    Type,
     TypeVar,
     Union,
     runtime_checkable,
@@ -43,12 +36,12 @@ from typing import (
 if sys.version_info >= (3, 9):
     from typing import Annotated
 else:
-    from typing_extensions import Annotated
+    from typing import Annotated
 
 if sys.version_info >= (3, 10):
     from typing import TypeGuard
 else:
-    from typing_extensions import TypeGuard
+    from typing import TypeGuard
 
 
 from pydantic import BaseModel
@@ -143,10 +136,10 @@ def evaluate_type(typ: Any, module: types.ModuleType) -> Any:
 
 @runtime_checkable
 class DataclassInstance(Protocol):
-    __dataclass_fields__: typing.ClassVar[Dict[str, dataclasses.Field]]
+    __dataclass_fields__: typing.ClassVar[dict[str, dataclasses.Field]]
 
 
-def is_dataclass_type(typ: Any) -> TypeGuard[Type[DataclassInstance]]:
+def is_dataclass_type(typ: Any) -> TypeGuard[type[DataclassInstance]]:
     "True if the argument corresponds to a data class type (but not an instance)."
 
     typ = unwrap_annotated_type(typ)
@@ -171,14 +164,14 @@ class DataclassField:
         self.default = default
 
 
-def dataclass_fields(cls: Type[DataclassInstance]) -> Iterable[DataclassField]:
+def dataclass_fields(cls: type[DataclassInstance]) -> Iterable[DataclassField]:
     "Generates the fields of a data-class resolving forward references."
 
     for field in dataclasses.fields(cls):
         yield DataclassField(field.name, evaluate_member_type(field.type, cls), field.default)
 
 
-def dataclass_field_by_name(cls: Type[DataclassInstance], name: str) -> DataclassField:
+def dataclass_field_by_name(cls: type[DataclassInstance], name: str) -> DataclassField:
     "Looks up a field in a data-class by its field name."
 
     for field in dataclasses.fields(cls):
@@ -194,7 +187,7 @@ def is_named_tuple_instance(obj: Any) -> TypeGuard[NamedTuple]:
     return is_named_tuple_type(type(obj))
 
 
-def is_named_tuple_type(typ: Any) -> TypeGuard[Type[NamedTuple]]:
+def is_named_tuple_type(typ: Any) -> TypeGuard[type[NamedTuple]]:
     """
     True if the argument corresponds to a named tuple type.
 
@@ -223,7 +216,7 @@ def is_named_tuple_type(typ: Any) -> TypeGuard[Type[NamedTuple]]:
 
 if sys.version_info >= (3, 11):
 
-    def is_type_enum(typ: object) -> TypeGuard[Type[enum.Enum]]:
+    def is_type_enum(typ: object) -> TypeGuard[type[enum.Enum]]:
         "True if the specified type is an enumeration type."
 
         typ = unwrap_annotated_type(typ)
@@ -231,7 +224,7 @@ if sys.version_info >= (3, 11):
 
 else:
 
-    def is_type_enum(typ: object) -> TypeGuard[Type[enum.Enum]]:
+    def is_type_enum(typ: object) -> TypeGuard[type[enum.Enum]]:
         "True if the specified type is an enumeration type."
 
         typ = unwrap_annotated_type(typ)
@@ -240,7 +233,7 @@ else:
         return isinstance(typ, type) and issubclass(typ, enum.Enum)
 
 
-def enum_value_types(enum_type: Type[enum.Enum]) -> List[type]:
+def enum_value_types(enum_type: type[enum.Enum]) -> list[type]:
     """
     Returns all unique value types of the `enum.Enum` type in definition order.
     """
@@ -250,8 +243,8 @@ def enum_value_types(enum_type: Type[enum.Enum]) -> List[type]:
 
 
 def extend_enum(
-    source: Type[enum.Enum],
-) -> Callable[[Type[enum.Enum]], Type[enum.Enum]]:
+    source: type[enum.Enum],
+) -> Callable[[type[enum.Enum]], type[enum.Enum]]:
     """
     Creates a new enumeration type extending the set of values in an existing type.
 
@@ -259,13 +252,13 @@ def extend_enum(
     :returns: A new enumeration type with the extended set of values.
     """
 
-    def wrap(extend: Type[enum.Enum]) -> Type[enum.Enum]:
+    def wrap(extend: type[enum.Enum]) -> type[enum.Enum]:
         # create new enumeration type combining the values from both types
-        values: Dict[str, Any] = {}
+        values: dict[str, Any] = {}
         values.update((e.name, e.value) for e in source)
         values.update((e.name, e.value) for e in extend)
         # mypy fails to determine that __name__ is always a string; hence the `ignore` directive.
-        enum_class: Type[enum.Enum] = enum.Enum(extend.__name__, values)  # type: ignore[misc]
+        enum_class: type[enum.Enum] = enum.Enum(extend.__name__, values)  # type: ignore[misc]
 
         # assign the newly created type to the same module where the extending class is defined
         enum_class.__module__ = extend.__module__
@@ -292,7 +285,7 @@ else:
         return typing.get_origin(typ) is Union
 
 
-def is_type_optional(typ: object, strict: bool = False) -> TypeGuard[Type[Optional[Any]]]:
+def is_type_optional(typ: object, strict: bool = False) -> TypeGuard[type[Any | None]]:
     """
     True if the type annotation corresponds to an optional type (e.g. `Optional[T]` or `Union[T1,T2,None]`).
 
@@ -313,7 +306,7 @@ def is_type_optional(typ: object, strict: bool = False) -> TypeGuard[Type[Option
     return False
 
 
-def unwrap_optional_type(typ: Type[Optional[T]]) -> Type[T]:
+def unwrap_optional_type(typ: type[T | None]) -> type[T]:
     """
     Extracts the inner type of an optional type.
 
@@ -324,7 +317,7 @@ def unwrap_optional_type(typ: Type[Optional[T]]) -> Type[T]:
     return rewrap_annotated_type(_unwrap_optional_type, typ)
 
 
-def _unwrap_optional_type(typ: Type[Optional[T]]) -> Type[T]:
+def _unwrap_optional_type(typ: type[T | None]) -> type[T]:
     "Extracts the type qualified as optional (e.g. returns `T` for `Optional[T]`)."
 
     # Optional[T] is represented internally as Union[T, None]
@@ -346,7 +339,7 @@ def is_type_union(typ: object) -> bool:
     return False
 
 
-def unwrap_union_types(typ: object) -> Tuple[object, ...]:
+def unwrap_union_types(typ: object) -> tuple[object, ...]:
     """
     Extracts the inner types of a union type.
 
@@ -358,7 +351,7 @@ def unwrap_union_types(typ: object) -> Tuple[object, ...]:
     return _unwrap_union_types(typ)
 
 
-def _unwrap_union_types(typ: object) -> Tuple[object, ...]:
+def _unwrap_union_types(typ: object) -> tuple[object, ...]:
     "Extracts the types in a union (e.g. returns a tuple of types `T1` and `T2` for `Union[T1, T2]`)."
 
     if not _is_union_like(typ):
@@ -389,7 +382,7 @@ def unwrap_literal_value(typ: object) -> Any:
     return args[0]
 
 
-def unwrap_literal_values(typ: object) -> Tuple[Any, ...]:
+def unwrap_literal_values(typ: object) -> tuple[Any, ...]:
     """
     Extracts the constant values captured by a literal type.
 
@@ -401,7 +394,7 @@ def unwrap_literal_values(typ: object) -> Tuple[Any, ...]:
     return typing.get_args(typ)
 
 
-def unwrap_literal_types(typ: object) -> Tuple[type, ...]:
+def unwrap_literal_types(typ: object) -> tuple[type, ...]:
     """
     Extracts the types of the constant values captured by a literal type.
 
@@ -412,14 +405,14 @@ def unwrap_literal_types(typ: object) -> Tuple[type, ...]:
     return tuple(type(t) for t in unwrap_literal_values(typ))
 
 
-def is_generic_list(typ: object) -> TypeGuard[Type[list]]:
+def is_generic_list(typ: object) -> TypeGuard[type[list]]:
     "True if the specified type is a generic list, i.e. `List[T]`."
 
     typ = unwrap_annotated_type(typ)
     return typing.get_origin(typ) is list
 
 
-def unwrap_generic_list(typ: Type[List[T]]) -> Type[T]:
+def unwrap_generic_list(typ: type[list[T]]) -> type[T]:
     """
     Extracts the item type of a list type.
 
@@ -430,21 +423,21 @@ def unwrap_generic_list(typ: Type[List[T]]) -> Type[T]:
     return rewrap_annotated_type(_unwrap_generic_list, typ)
 
 
-def _unwrap_generic_list(typ: Type[List[T]]) -> Type[T]:
+def _unwrap_generic_list(typ: type[list[T]]) -> type[T]:
     "Extracts the item type of a list type (e.g. returns `T` for `List[T]`)."
 
     (list_type,) = typing.get_args(typ)  # unpack single tuple element
     return list_type  # type: ignore[no-any-return]
 
 
-def is_generic_set(typ: object) -> TypeGuard[Type[set]]:
+def is_generic_set(typ: object) -> TypeGuard[type[set]]:
     "True if the specified type is a generic set, i.e. `Set[T]`."
 
     typ = unwrap_annotated_type(typ)
     return typing.get_origin(typ) is set
 
 
-def unwrap_generic_set(typ: Type[Set[T]]) -> Type[T]:
+def unwrap_generic_set(typ: type[set[T]]) -> type[T]:
     """
     Extracts the item type of a set type.
 
@@ -455,21 +448,21 @@ def unwrap_generic_set(typ: Type[Set[T]]) -> Type[T]:
     return rewrap_annotated_type(_unwrap_generic_set, typ)
 
 
-def _unwrap_generic_set(typ: Type[Set[T]]) -> Type[T]:
+def _unwrap_generic_set(typ: type[set[T]]) -> type[T]:
     "Extracts the item type of a set type (e.g. returns `T` for `Set[T]`)."
 
     (set_type,) = typing.get_args(typ)  # unpack single tuple element
     return set_type  # type: ignore[no-any-return]
 
 
-def is_generic_dict(typ: object) -> TypeGuard[Type[dict]]:
+def is_generic_dict(typ: object) -> TypeGuard[type[dict]]:
     "True if the specified type is a generic dictionary, i.e. `Dict[KeyType, ValueType]`."
 
     typ = unwrap_annotated_type(typ)
     return typing.get_origin(typ) is dict
 
 
-def unwrap_generic_dict(typ: Type[Dict[K, V]]) -> Tuple[Type[K], Type[V]]:
+def unwrap_generic_dict(typ: type[dict[K, V]]) -> tuple[type[K], type[V]]:
     """
     Extracts the key and value types of a dictionary type as a tuple.
 
@@ -480,7 +473,7 @@ def unwrap_generic_dict(typ: Type[Dict[K, V]]) -> Tuple[Type[K], Type[V]]:
     return _unwrap_generic_dict(unwrap_annotated_type(typ))
 
 
-def _unwrap_generic_dict(typ: Type[Dict[K, V]]) -> Tuple[Type[K], Type[V]]:
+def _unwrap_generic_dict(typ: type[dict[K, V]]) -> tuple[type[K], type[V]]:
     "Extracts the key and value types of a dict type (e.g. returns (`K`, `V`) for `Dict[K, V]`)."
 
     key_type, value_type = typing.get_args(typ)
@@ -493,7 +486,7 @@ def is_type_annotated(typ: TypeLike) -> bool:
     return getattr(typ, "__metadata__", None) is not None
 
 
-def get_annotation(data_type: TypeLike, annotation_type: Type[T]) -> Optional[T]:
+def get_annotation(data_type: TypeLike, annotation_type: type[T]) -> T | None:
     """
     Returns the first annotation on a data type that matches the expected annotation type.
 
@@ -522,7 +515,7 @@ def unwrap_annotated_type(typ: T) -> T:
         return typ
 
 
-def rewrap_annotated_type(transform: Callable[[Type[S]], Type[T]], typ: Type[S]) -> Type[T]:
+def rewrap_annotated_type(transform: Callable[[type[S]], type[T]], typ: type[S]) -> type[T]:
     """
     Un-boxes, transforms and re-boxes an optionally annotated type.
 
@@ -546,7 +539,7 @@ def rewrap_annotated_type(transform: Callable[[Type[S]], Type[T]], typ: Type[S])
         return transformed_type
 
 
-def get_module_classes(module: types.ModuleType) -> List[type]:
+def get_module_classes(module: types.ModuleType) -> list[type]:
     "Returns all classes declared directly in a module."
 
     def is_class_member(member: object) -> TypeGuard[type]:
@@ -557,16 +550,16 @@ def get_module_classes(module: types.ModuleType) -> List[type]:
 
 if sys.version_info >= (3, 9):
 
-    def get_resolved_hints(typ: type) -> Dict[str, type]:
+    def get_resolved_hints(typ: type) -> dict[str, type]:
         return typing.get_type_hints(typ, include_extras=True)
 
 else:
 
-    def get_resolved_hints(typ: type) -> Dict[str, type]:
+    def get_resolved_hints(typ: type) -> dict[str, type]:
         return typing.get_type_hints(typ)
 
 
-def get_class_properties(typ: type) -> Iterable[Tuple[str, type | str]]:
+def get_class_properties(typ: type) -> Iterable[tuple[str, type | str]]:
     "Returns all properties of a class."
 
     if is_dataclass_type(typ):
@@ -593,7 +586,7 @@ def get_class_properties(typ: type) -> Iterable[Tuple[str, type | str]]:
         return resolved_hints.items()
 
 
-def get_class_property(typ: type, name: str) -> Optional[type | str]:
+def get_class_property(typ: type, name: str) -> type | str | None:
     "Looks up the annotated type of a property in a class by its property name."
 
     for property_name, property_type in get_class_properties(typ):
@@ -607,7 +600,7 @@ class _ROOT:
     pass
 
 
-def get_referenced_types(typ: TypeLike, module: Optional[types.ModuleType] = None) -> Set[type]:
+def get_referenced_types(typ: TypeLike, module: types.ModuleType | None = None) -> set[type]:
     """
     Extracts types directly or indirectly referenced by this type.
 
@@ -631,10 +624,10 @@ class TypeCollector:
     :param graph: The type dependency graph, linking types to types they depend on.
     """
 
-    graph: Dict[type, Set[type]]
+    graph: dict[type, set[type]]
 
     @property
-    def references(self) -> Set[type]:
+    def references(self) -> set[type]:
         "Types collected by the type collector."
 
         dependencies = set()
@@ -659,8 +652,8 @@ class TypeCollector:
     def run(
         self,
         typ: TypeLike,
-        cls: Type[DataclassInstance],
-        module: Optional[types.ModuleType],
+        cls: type[DataclassInstance],
+        module: types.ModuleType | None,
     ) -> None:
         """
         Extracts types indirectly referenced by this type.
@@ -779,7 +772,7 @@ def create_module(name: str) -> types.ModuleType:
 
 if sys.version_info >= (3, 10):
 
-    def create_data_type(class_name: str, fields: List[Tuple[str, type]]) -> type:
+    def create_data_type(class_name: str, fields: list[tuple[str, type]]) -> type:
         """
         Creates a new data-class type dynamically.
 
@@ -793,7 +786,7 @@ if sys.version_info >= (3, 10):
 
 else:
 
-    def create_data_type(class_name: str, fields: List[Tuple[str, type]]) -> type:
+    def create_data_type(class_name: str, fields: list[tuple[str, type]]) -> type:
         """
         Creates a new data-class type dynamically.
 
@@ -821,7 +814,7 @@ else:
         return cls
 
 
-def create_object(typ: Type[T]) -> T:
+def create_object(typ: type[T]) -> T:
     "Creates an instance of a type."
 
     if issubclass(typ, Exception):
@@ -906,7 +899,7 @@ def is_generic_instance(obj: Any, typ: TypeLike) -> bool:
 
 
 class RecursiveChecker:
-    _pred: Optional[Callable[[type, Any], bool]]
+    _pred: Callable[[type, Any], bool] | None
 
     def __init__(self, pred: Callable[[type, Any], bool]) -> None:
         """
@@ -1018,9 +1011,9 @@ def check_recursive(
     obj: object,
     /,
     *,
-    pred: Optional[Callable[[type, Any], bool]] = None,
-    type_pred: Optional[Callable[[type], bool]] = None,
-    value_pred: Optional[Callable[[Any], bool]] = None,
+    pred: Callable[[type, Any], bool] | None = None,
+    type_pred: Callable[[type], bool] | None = None,
+    value_pred: Callable[[Any], bool] | None = None,
 ) -> bool:
     """
     Checks if a predicate applies to all nested member properties of an object recursively.
@@ -1036,7 +1029,7 @@ def check_recursive(
         if pred is not None:
             raise TypeError("filter predicate not permitted when type and value predicates are present")
 
-        type_p: Callable[[Type[T]], bool] = type_pred
+        type_p: Callable[[type[T]], bool] = type_pred
         value_p: Callable[[T], bool] = value_pred
         pred = lambda typ, obj: not type_p(typ) or value_p(obj)  # noqa: E731
 
diff --git a/llama_stack/strong_typing/mapping.py b/src/llama_stack/strong_typing/mapping.py
similarity index 89%
rename from llama_stack/strong_typing/mapping.py
rename to src/llama_stack/strong_typing/mapping.py
index 408375a9f..d6c1a3172 100644
--- a/llama_stack/strong_typing/mapping.py
+++ b/src/llama_stack/strong_typing/mapping.py
@@ -11,13 +11,12 @@ Type-safe data interchange for Python data classes.
 """
 
 import keyword
-from typing import Optional
 
 from .auxiliary import Alias
 from .inspection import get_annotation
 
 
-def python_field_to_json_property(python_id: str, python_type: Optional[object] = None) -> str:
+def python_field_to_json_property(python_id: str, python_type: object | None = None) -> str:
     """
     Map a Python field identifier to a JSON property name.
 
diff --git a/llama_stack/strong_typing/name.py b/src/llama_stack/strong_typing/name.py
similarity index 95%
rename from llama_stack/strong_typing/name.py
rename to src/llama_stack/strong_typing/name.py
index a1a2ae5f1..00cdc2ae2 100644
--- a/llama_stack/strong_typing/name.py
+++ b/src/llama_stack/strong_typing/name.py
@@ -11,7 +11,7 @@ Type-safe data interchange for Python data classes.
 """
 
 import typing
-from typing import Any, Literal, Optional, Tuple, Union
+from typing import Any, Literal, Union
 
 from .auxiliary import _auxiliary_types
 from .inspection import (
@@ -39,7 +39,7 @@ class TypeFormatter:
     def __init__(self, use_union_operator: bool = False) -> None:
         self.use_union_operator = use_union_operator
 
-    def union_to_str(self, data_type_args: Tuple[TypeLike, ...]) -> str:
+    def union_to_str(self, data_type_args: tuple[TypeLike, ...]) -> str:
         if self.use_union_operator:
             return " | ".join(self.python_type_to_str(t) for t in data_type_args)
         else:
@@ -100,7 +100,7 @@ class TypeFormatter:
         metadata = getattr(data_type, "__metadata__", None)
         if metadata is not None:
             # type is Annotated[T, ...]
-            metatuple: Tuple[Any, ...] = metadata
+            metatuple: tuple[Any, ...] = metadata
             arg = typing.get_args(data_type)[0]
 
             # check for auxiliary types with user-defined annotations
@@ -110,7 +110,7 @@ class TypeFormatter:
                 if arg is not auxiliary_arg:
                     continue
 
-                auxiliary_metatuple: Optional[Tuple[Any, ...]] = getattr(auxiliary_type, "__metadata__", None)
+                auxiliary_metatuple: tuple[Any, ...] | None = getattr(auxiliary_type, "__metadata__", None)
                 if auxiliary_metatuple is None:
                     continue
 
diff --git a/llama_stack/strong_typing/py.typed b/src/llama_stack/strong_typing/py.typed
similarity index 100%
rename from llama_stack/strong_typing/py.typed
rename to src/llama_stack/strong_typing/py.typed
diff --git a/llama_stack/strong_typing/schema.py b/src/llama_stack/strong_typing/schema.py
similarity index 92%
rename from llama_stack/strong_typing/schema.py
rename to src/llama_stack/strong_typing/schema.py
index f911fc41f..15a3bbbfc 100644
--- a/llama_stack/strong_typing/schema.py
+++ b/src/llama_stack/strong_typing/schema.py
@@ -21,24 +21,19 @@ import json
 import types
 import typing
 import uuid
+from collections.abc import Callable
 from copy import deepcopy
 from typing import (
+    Annotated,
     Any,
-    Callable,
     ClassVar,
-    Dict,
-    List,
     Literal,
-    Optional,
-    Tuple,
-    Type,
     TypeVar,
     Union,
     overload,
 )
 
 import jsonschema
-from typing_extensions import Annotated
 
 from . import docstring
 from .auxiliary import (
@@ -71,7 +66,7 @@ OBJECT_ENUM_EXPANSION_LIMIT = 4
 T = TypeVar("T")
 
 
-def get_class_docstrings(data_type: type) -> Tuple[Optional[str], Optional[str]]:
+def get_class_docstrings(data_type: type) -> tuple[str | None, str | None]:
     docstr = docstring.parse_type(data_type)
 
     # check if class has a doc-string other than the auto-generated string assigned by @dataclass
@@ -82,8 +77,8 @@ def get_class_docstrings(data_type: type) -> Tuple[Optional[str], Optional[str]]
 
 
 def get_class_property_docstrings(
-    data_type: type, transform_fun: Optional[Callable[[type, str, str], str]] = None
-) -> Dict[str, str]:
+    data_type: type, transform_fun: Callable[[type, str, str], str] | None = None
+) -> dict[str, str]:
     """
     Extracts the documentation strings associated with the properties of a composite type.
 
@@ -92,7 +87,7 @@ def get_class_property_docstrings(
     :returns: A dictionary mapping property names to descriptions.
     """
 
-    result: Dict[str, str] = {}
+    result: dict[str, str] = {}
     # Only try to get MRO if data_type is actually a class
     # Special types like Literal, Union, etc. don't have MRO
     if not inspect.isclass(data_type):
@@ -125,7 +120,7 @@ def docstring_to_schema(data_type: type) -> Schema:
     return schema
 
 
-def id_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> str:
+def id_from_ref(data_type: typing.ForwardRef | str | type) -> str:
     "Extracts the name of a possibly forward-referenced type."
 
     if isinstance(data_type, typing.ForwardRef):
@@ -137,7 +132,7 @@ def id_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> str:
         return data_type.__name__
 
 
-def type_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> Tuple[str, type]:
+def type_from_ref(data_type: typing.ForwardRef | str | type) -> tuple[str, type]:
     "Creates a type from a forward reference."
 
     if isinstance(data_type, typing.ForwardRef):
@@ -153,16 +148,16 @@ def type_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> Tuple[str,
 
 @dataclasses.dataclass
 class TypeCatalogEntry:
-    schema: Optional[Schema]
+    schema: Schema | None
     identifier: str
-    examples: Optional[JsonType] = None
+    examples: JsonType | None = None
 
 
 class TypeCatalog:
     "Maintains an association of well-known Python types to their JSON schema."
 
-    _by_type: Dict[TypeLike, TypeCatalogEntry]
-    _by_name: Dict[str, TypeCatalogEntry]
+    _by_type: dict[TypeLike, TypeCatalogEntry]
+    _by_name: dict[str, TypeCatalogEntry]
 
     def __init__(self) -> None:
         self._by_type = {}
@@ -179,9 +174,9 @@ class TypeCatalog:
     def add(
         self,
         data_type: TypeLike,
-        schema: Optional[Schema],
+        schema: Schema | None,
         identifier: str,
-        examples: Optional[List[JsonType]] = None,
+        examples: list[JsonType] | None = None,
     ) -> None:
         if isinstance(data_type, typing.ForwardRef):
             raise TypeError("forward references cannot be used to register a type")
@@ -207,17 +202,17 @@ class SchemaOptions:
     definitions_path: str = "#/definitions/"
     use_descriptions: bool = True
     use_examples: bool = True
-    property_description_fun: Optional[Callable[[type, str, str], str]] = None
+    property_description_fun: Callable[[type, str, str], str] | None = None
 
 
 class JsonSchemaGenerator:
     "Creates a JSON schema with user-defined type definitions."
 
     type_catalog: ClassVar[TypeCatalog] = TypeCatalog()
-    types_used: Dict[str, TypeLike]
+    types_used: dict[str, TypeLike]
     options: SchemaOptions
 
-    def __init__(self, options: Optional[SchemaOptions] = None):
+    def __init__(self, options: SchemaOptions | None = None):
         if options is None:
             self.options = SchemaOptions()
         else:
@@ -249,13 +244,13 @@ class JsonSchemaGenerator:
     def _(self, arg: MaxLength) -> Schema:
         return {"maxLength": arg.value}
 
-    def _with_metadata(self, type_schema: Schema, metadata: Optional[Tuple[Any, ...]]) -> Schema:
+    def _with_metadata(self, type_schema: Schema, metadata: tuple[Any, ...] | None) -> Schema:
         if metadata:
             for m in metadata:
                 type_schema.update(self._metadata_to_schema(m))
         return type_schema
 
-    def _simple_type_to_schema(self, typ: TypeLike, json_schema_extra: Optional[dict] = None) -> Optional[Schema]:
+    def _simple_type_to_schema(self, typ: TypeLike, json_schema_extra: dict | None = None) -> Schema | None:
         """
         Returns the JSON schema associated with a simple, unrestricted type.
 
@@ -319,7 +314,7 @@ class JsonSchemaGenerator:
         self,
         data_type: TypeLike,
         force_expand: bool = False,
-        json_schema_extra: Optional[dict] = None,
+        json_schema_extra: dict | None = None,
     ) -> Schema:
         common_info = {}
         if json_schema_extra and "deprecated" in json_schema_extra:
@@ -330,7 +325,7 @@ class JsonSchemaGenerator:
         self,
         data_type: TypeLike,
         force_expand: bool = False,
-        json_schema_extra: Optional[dict] = None,
+        json_schema_extra: dict | None = None,
     ) -> Schema:
         """
         Returns the JSON schema associated with a type.
@@ -386,7 +381,7 @@ class JsonSchemaGenerator:
                 return {"$ref": f"{self.options.definitions_path}{identifier}"}
 
         if is_type_enum(typ):
-            enum_type: Type[enum.Enum] = typ
+            enum_type: type[enum.Enum] = typ
             value_types = enum_value_types(enum_type)
             if len(value_types) != 1:
                 raise ValueError(
@@ -438,7 +433,7 @@ class JsonSchemaGenerator:
                     }
                 else:
                     dict_schema = {
-                        "properties": {value: value_schema for value in enum_values},
+                        "properties": dict.fromkeys(enum_values, value_schema),
                         "additionalProperties": False,
                     }
             else:
@@ -508,8 +503,8 @@ class JsonSchemaGenerator:
         members = dict(inspect.getmembers(typ, lambda a: not inspect.isroutine(a)))
 
         property_docstrings = get_class_property_docstrings(typ, self.options.property_description_fun)
-        properties: Dict[str, Schema] = {}
-        required: List[str] = []
+        properties: dict[str, Schema] = {}
+        required: list[str] = []
         for property_name, property_type in get_class_properties(typ):
             # rename property if an alias name is specified
             alias = get_annotation(property_type, Alias)
@@ -599,7 +594,7 @@ class JsonSchemaGenerator:
 
         return type_schema
 
-    def classdef_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> Tuple[Schema, Dict[str, Schema]]:
+    def classdef_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> tuple[Schema, dict[str, Schema]]:
         """
         Returns the JSON schema associated with a type and any nested types.
 
@@ -616,7 +611,7 @@ class JsonSchemaGenerator:
         try:
             type_schema = self.type_to_schema(data_type, force_expand=force_expand)
 
-            types_defined: Dict[str, Schema] = {}
+            types_defined: dict[str, Schema] = {}
             while len(self.types_used) > len(types_defined):
                 # make a snapshot copy; original collection is going to be modified
                 types_undefined = {
@@ -647,7 +642,7 @@ class Validator(enum.Enum):
 
 def classdef_to_schema(
     data_type: TypeLike,
-    options: Optional[SchemaOptions] = None,
+    options: SchemaOptions | None = None,
     validator: Validator = Validator.Latest,
 ) -> Schema:
     """
@@ -701,7 +696,7 @@ def print_schema(data_type: type) -> None:
     print(json.dumps(s, indent=4))
 
 
-def get_schema_identifier(data_type: type) -> Optional[str]:
+def get_schema_identifier(data_type: type) -> str | None:
     if data_type in JsonSchemaGenerator.type_catalog:
         return JsonSchemaGenerator.type_catalog.get(data_type).identifier
     else:
@@ -710,9 +705,9 @@ def get_schema_identifier(data_type: type) -> Optional[str]:
 
 def register_schema(
     data_type: T,
-    schema: Optional[Schema] = None,
-    name: Optional[str] = None,
-    examples: Optional[List[JsonType]] = None,
+    schema: Schema | None = None,
+    name: str | None = None,
+    examples: list[JsonType] | None = None,
 ) -> T:
     """
     Associates a type with a JSON schema definition.
@@ -733,22 +728,22 @@ def register_schema(
 
 
 @overload
-def json_schema_type(cls: Type[T], /) -> Type[T]: ...
+def json_schema_type(cls: type[T], /) -> type[T]: ...
 
 
 @overload
-def json_schema_type(cls: None, *, schema: Optional[Schema] = None) -> Callable[[Type[T]], Type[T]]: ...
+def json_schema_type(cls: None, *, schema: Schema | None = None) -> Callable[[type[T]], type[T]]: ...
 
 
 def json_schema_type(
-    cls: Optional[Type[T]] = None,
+    cls: type[T] | None = None,
     *,
-    schema: Optional[Schema] = None,
-    examples: Optional[List[JsonType]] = None,
-) -> Union[Type[T], Callable[[Type[T]], Type[T]]]:
+    schema: Schema | None = None,
+    examples: list[JsonType] | None = None,
+) -> type[T] | Callable[[type[T]], type[T]]:
     """Decorator to add user-defined schema definition to a class."""
 
-    def wrap(cls: Type[T]) -> Type[T]:
+    def wrap(cls: type[T]) -> type[T]:
         return register_schema(cls, schema, examples=examples)
 
     # see if decorator is used as @json_schema_type or @json_schema_type()
diff --git a/llama_stack/strong_typing/serialization.py b/src/llama_stack/strong_typing/serialization.py
similarity index 95%
rename from llama_stack/strong_typing/serialization.py
rename to src/llama_stack/strong_typing/serialization.py
index c00a0aad5..3e34945ad 100644
--- a/llama_stack/strong_typing/serialization.py
+++ b/src/llama_stack/strong_typing/serialization.py
@@ -14,7 +14,7 @@ import inspect
 import json
 import sys
 from types import ModuleType
-from typing import Any, Optional, TextIO, TypeVar
+from typing import Any, TextIO, TypeVar
 
 from .core import JsonType
 from .deserializer import create_deserializer
@@ -42,7 +42,7 @@ def object_to_json(obj: Any) -> JsonType:
     return generator.generate(obj)
 
 
-def json_to_object(typ: TypeLike, data: JsonType, *, context: Optional[ModuleType] = None) -> object:
+def json_to_object(typ: TypeLike, data: JsonType, *, context: ModuleType | None = None) -> object:
     """
     Creates an object from a representation that has been de-serialized from JSON.
 
diff --git a/llama_stack/strong_typing/serializer.py b/src/llama_stack/strong_typing/serializer.py
similarity index 87%
rename from llama_stack/strong_typing/serializer.py
rename to src/llama_stack/strong_typing/serializer.py
index 17848c14b..4a12a1f4b 100644
--- a/llama_stack/strong_typing/serializer.py
+++ b/src/llama_stack/strong_typing/serializer.py
@@ -20,19 +20,13 @@ import ipaddress
 import sys
 import typing
 import uuid
+from collections.abc import Callable
 from types import FunctionType, MethodType, ModuleType
 from typing import (
     Any,
-    Callable,
-    Dict,
     Generic,
-    List,
     Literal,
     NamedTuple,
-    Optional,
-    Set,
-    Tuple,
-    Type,
     TypeVar,
     Union,
 )
@@ -133,7 +127,7 @@ class IPv6Serializer(Serializer[ipaddress.IPv6Address]):
 
 
 class EnumSerializer(Serializer[enum.Enum]):
-    def generate(self, obj: enum.Enum) -> Union[int, str]:
+    def generate(self, obj: enum.Enum) -> int | str:
         value = obj.value
         if isinstance(value, int):
             return value
@@ -141,12 +135,12 @@ class EnumSerializer(Serializer[enum.Enum]):
 
 
 class UntypedListSerializer(Serializer[list]):
-    def generate(self, obj: list) -> List[JsonType]:
+    def generate(self, obj: list) -> list[JsonType]:
         return [object_to_json(item) for item in obj]
 
 
 class UntypedDictSerializer(Serializer[dict]):
-    def generate(self, obj: dict) -> Dict[str, JsonType]:
+    def generate(self, obj: dict) -> dict[str, JsonType]:
         if obj and isinstance(next(iter(obj.keys())), enum.Enum):
             iterator = ((key.value, object_to_json(value)) for key, value in obj.items())
         else:
@@ -155,41 +149,41 @@ class UntypedDictSerializer(Serializer[dict]):
 
 
 class UntypedSetSerializer(Serializer[set]):
-    def generate(self, obj: set) -> List[JsonType]:
+    def generate(self, obj: set) -> list[JsonType]:
         return [object_to_json(item) for item in obj]
 
 
 class UntypedTupleSerializer(Serializer[tuple]):
-    def generate(self, obj: tuple) -> List[JsonType]:
+    def generate(self, obj: tuple) -> list[JsonType]:
         return [object_to_json(item) for item in obj]
 
 
 class TypedCollectionSerializer(Serializer, Generic[T]):
     generator: Serializer[T]
 
-    def __init__(self, item_type: Type[T], context: Optional[ModuleType]) -> None:
+    def __init__(self, item_type: type[T], context: ModuleType | None) -> None:
         self.generator = _get_serializer(item_type, context)
 
 
 class TypedListSerializer(TypedCollectionSerializer[T]):
-    def generate(self, obj: List[T]) -> List[JsonType]:
+    def generate(self, obj: list[T]) -> list[JsonType]:
         return [self.generator.generate(item) for item in obj]
 
 
 class TypedStringDictSerializer(TypedCollectionSerializer[T]):
-    def __init__(self, value_type: Type[T], context: Optional[ModuleType]) -> None:
+    def __init__(self, value_type: type[T], context: ModuleType | None) -> None:
         super().__init__(value_type, context)
 
-    def generate(self, obj: Dict[str, T]) -> Dict[str, JsonType]:
+    def generate(self, obj: dict[str, T]) -> dict[str, JsonType]:
         return {key: self.generator.generate(value) for key, value in obj.items()}
 
 
 class TypedEnumDictSerializer(TypedCollectionSerializer[T]):
     def __init__(
         self,
-        key_type: Type[enum.Enum],
-        value_type: Type[T],
-        context: Optional[ModuleType],
+        key_type: type[enum.Enum],
+        value_type: type[T],
+        context: ModuleType | None,
     ) -> None:
         super().__init__(value_type, context)
 
@@ -203,22 +197,22 @@ class TypedEnumDictSerializer(TypedCollectionSerializer[T]):
         if value_type is not str:
             raise JsonTypeError("invalid enumeration key type, expected `enum.Enum` with string values")
 
-    def generate(self, obj: Dict[enum.Enum, T]) -> Dict[str, JsonType]:
+    def generate(self, obj: dict[enum.Enum, T]) -> dict[str, JsonType]:
         return {key.value: self.generator.generate(value) for key, value in obj.items()}
 
 
 class TypedSetSerializer(TypedCollectionSerializer[T]):
-    def generate(self, obj: Set[T]) -> JsonType:
+    def generate(self, obj: set[T]) -> JsonType:
         return [self.generator.generate(item) for item in obj]
 
 
 class TypedTupleSerializer(Serializer[tuple]):
-    item_generators: Tuple[Serializer, ...]
+    item_generators: tuple[Serializer, ...]
 
-    def __init__(self, item_types: Tuple[type, ...], context: Optional[ModuleType]) -> None:
+    def __init__(self, item_types: tuple[type, ...], context: ModuleType | None) -> None:
         self.item_generators = tuple(_get_serializer(item_type, context) for item_type in item_types)
 
-    def generate(self, obj: tuple) -> List[JsonType]:
+    def generate(self, obj: tuple) -> list[JsonType]:
         return [item_generator.generate(item) for item_generator, item in zip(self.item_generators, obj, strict=False)]
 
 
@@ -250,16 +244,16 @@ class FieldSerializer(Generic[T]):
         self.property_name = property_name
         self.generator = generator
 
-    def generate_field(self, obj: object, object_dict: Dict[str, JsonType]) -> None:
+    def generate_field(self, obj: object, object_dict: dict[str, JsonType]) -> None:
         value = getattr(obj, self.field_name)
         if value is not None:
             object_dict[self.property_name] = self.generator.generate(value)
 
 
 class TypedClassSerializer(Serializer[T]):
-    property_generators: List[FieldSerializer]
+    property_generators: list[FieldSerializer]
 
-    def __init__(self, class_type: Type[T], context: Optional[ModuleType]) -> None:
+    def __init__(self, class_type: type[T], context: ModuleType | None) -> None:
         self.property_generators = [
             FieldSerializer(
                 field_name,
@@ -269,8 +263,8 @@ class TypedClassSerializer(Serializer[T]):
             for field_name, field_type in get_class_properties(class_type)
         ]
 
-    def generate(self, obj: T) -> Dict[str, JsonType]:
-        object_dict: Dict[str, JsonType] = {}
+    def generate(self, obj: T) -> dict[str, JsonType]:
+        object_dict: dict[str, JsonType] = {}
         for property_generator in self.property_generators:
             property_generator.generate_field(obj, object_dict)
 
@@ -278,12 +272,12 @@ class TypedClassSerializer(Serializer[T]):
 
 
 class TypedNamedTupleSerializer(TypedClassSerializer[NamedTuple]):
-    def __init__(self, class_type: Type[NamedTuple], context: Optional[ModuleType]) -> None:
+    def __init__(self, class_type: type[NamedTuple], context: ModuleType | None) -> None:
         super().__init__(class_type, context)
 
 
 class DataclassSerializer(TypedClassSerializer[T]):
-    def __init__(self, class_type: Type[T], context: Optional[ModuleType]) -> None:
+    def __init__(self, class_type: type[T], context: ModuleType | None) -> None:
         super().__init__(class_type, context)
 
 
@@ -295,7 +289,7 @@ class UnionSerializer(Serializer):
 class LiteralSerializer(Serializer):
     generator: Serializer
 
-    def __init__(self, values: Tuple[Any, ...], context: Optional[ModuleType]) -> None:
+    def __init__(self, values: tuple[Any, ...], context: ModuleType | None) -> None:
         literal_type_tuple = tuple(type(value) for value in values)
         literal_type_set = set(literal_type_tuple)
         if len(literal_type_set) != 1:
@@ -312,12 +306,12 @@ class LiteralSerializer(Serializer):
 
 
 class UntypedNamedTupleSerializer(Serializer):
-    fields: Dict[str, str]
+    fields: dict[str, str]
 
-    def __init__(self, class_type: Type[NamedTuple]) -> None:
+    def __init__(self, class_type: type[NamedTuple]) -> None:
         # named tuples are also instances of tuple
         self.fields = {}
-        field_names: Tuple[str, ...] = class_type._fields
+        field_names: tuple[str, ...] = class_type._fields
         for field_name in field_names:
             self.fields[field_name] = python_field_to_json_property(field_name)
 
@@ -351,7 +345,7 @@ class UntypedClassSerializer(Serializer):
         return object_dict
 
 
-def create_serializer(typ: TypeLike, context: Optional[ModuleType] = None) -> Serializer:
+def create_serializer(typ: TypeLike, context: ModuleType | None = None) -> Serializer:
     """
     Creates a serializer engine to produce an object that can be directly converted into a JSON string.
 
@@ -376,7 +370,7 @@ def create_serializer(typ: TypeLike, context: Optional[ModuleType] = None) -> Se
     return _get_serializer(typ, context)
 
 
-def _get_serializer(typ: TypeLike, context: Optional[ModuleType]) -> Serializer:
+def _get_serializer(typ: TypeLike, context: ModuleType | None) -> Serializer:
     if isinstance(typ, (str, typing.ForwardRef)):
         if context is None:
             raise TypeError(f"missing context for evaluating type: {typ}")
@@ -390,13 +384,13 @@ def _get_serializer(typ: TypeLike, context: Optional[ModuleType]) -> Serializer:
         return _create_serializer(typ, context)
 
 
-@functools.lru_cache(maxsize=None)
+@functools.cache
 def _fetch_serializer(typ: type) -> Serializer:
     context = sys.modules[typ.__module__]
     return _create_serializer(typ, context)
 
 
-def _create_serializer(typ: TypeLike, context: Optional[ModuleType]) -> Serializer:
+def _create_serializer(typ: TypeLike, context: ModuleType | None) -> Serializer:
     # check for well-known types
     if typ is type(None):
         return NoneSerializer()
diff --git a/llama_stack/strong_typing/slots.py b/src/llama_stack/strong_typing/slots.py
similarity index 71%
rename from llama_stack/strong_typing/slots.py
rename to src/llama_stack/strong_typing/slots.py
index c1a3293d8..772834140 100644
--- a/llama_stack/strong_typing/slots.py
+++ b/src/llama_stack/strong_typing/slots.py
@@ -4,18 +4,18 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Tuple, Type, TypeVar
+from typing import Any, TypeVar
 
 T = TypeVar("T")
 
 
 class SlotsMeta(type):
-    def __new__(cls: Type[T], name: str, bases: Tuple[type, ...], ns: Dict[str, Any]) -> T:
+    def __new__(cls: type[T], name: str, bases: tuple[type, ...], ns: dict[str, Any]) -> T:
         # caller may have already provided slots, in which case just retain them and keep going
-        slots: Tuple[str, ...] = ns.get("__slots__", ())
+        slots: tuple[str, ...] = ns.get("__slots__", ())
 
         # add fields with type annotations to slots
-        annotations: Dict[str, Any] = ns.get("__annotations__", {})
+        annotations: dict[str, Any] = ns.get("__annotations__", {})
         members = tuple(member for member in annotations.keys() if member not in slots)
 
         # assign slots
diff --git a/llama_stack/strong_typing/topological.py b/src/llama_stack/strong_typing/topological.py
similarity index 88%
rename from llama_stack/strong_typing/topological.py
rename to src/llama_stack/strong_typing/topological.py
index 28bf4bd0f..9502a5887 100644
--- a/llama_stack/strong_typing/topological.py
+++ b/src/llama_stack/strong_typing/topological.py
@@ -10,14 +10,15 @@ Type-safe data interchange for Python data classes.
 :see: https://github.com/hunyadi/strong_typing
 """
 
-from typing import Callable, Dict, Iterable, List, Optional, Set, TypeVar
+from collections.abc import Callable, Iterable
+from typing import TypeVar
 
 from .inspection import TypeCollector
 
 T = TypeVar("T")
 
 
-def topological_sort(graph: Dict[T, Set[T]]) -> List[T]:
+def topological_sort(graph: dict[T, set[T]]) -> list[T]:
     """
     Performs a topological sort of a graph.
 
@@ -29,9 +30,9 @@ def topological_sort(graph: Dict[T, Set[T]]) -> List[T]:
     """
 
     # empty list that will contain the sorted nodes (in reverse order)
-    ordered: List[T] = []
+    ordered: list[T] = []
 
-    seen: Dict[T, bool] = {}
+    seen: dict[T, bool] = {}
 
     def _visit(n: T) -> None:
         status = seen.get(n)
@@ -57,8 +58,8 @@ def topological_sort(graph: Dict[T, Set[T]]) -> List[T]:
 
 def type_topological_sort(
     types: Iterable[type],
-    dependency_fn: Optional[Callable[[type], Iterable[type]]] = None,
-) -> List[type]:
+    dependency_fn: Callable[[type], Iterable[type]] | None = None,
+) -> list[type]:
     """
     Performs a topological sort of a list of types.
 
@@ -78,7 +79,7 @@ def type_topological_sort(
     graph = collector.graph
 
     if dependency_fn:
-        new_types: Set[type] = set()
+        new_types: set[type] = set()
         for source_type, references in graph.items():
             dependent_types = dependency_fn(source_type)
             references.update(dependent_types)
diff --git a/llama_stack/testing/__init__.py b/src/llama_stack/testing/__init__.py
similarity index 100%
rename from llama_stack/testing/__init__.py
rename to src/llama_stack/testing/__init__.py
diff --git a/llama_stack/testing/api_recorder.py b/src/llama_stack/testing/api_recorder.py
similarity index 100%
rename from llama_stack/testing/api_recorder.py
rename to src/llama_stack/testing/api_recorder.py
diff --git a/llama_stack/ui/.gitignore b/src/llama_stack/ui/.gitignore
similarity index 100%
rename from llama_stack/ui/.gitignore
rename to src/llama_stack/ui/.gitignore
diff --git a/llama_stack/ui/.nvmrc b/src/llama_stack/ui/.nvmrc
similarity index 100%
rename from llama_stack/ui/.nvmrc
rename to src/llama_stack/ui/.nvmrc
diff --git a/llama_stack/ui/.prettierignore b/src/llama_stack/ui/.prettierignore
similarity index 100%
rename from llama_stack/ui/.prettierignore
rename to src/llama_stack/ui/.prettierignore
diff --git a/llama_stack/ui/.prettierrc b/src/llama_stack/ui/.prettierrc
similarity index 100%
rename from llama_stack/ui/.prettierrc
rename to src/llama_stack/ui/.prettierrc
diff --git a/llama_stack/ui/README.md b/src/llama_stack/ui/README.md
similarity index 100%
rename from llama_stack/ui/README.md
rename to src/llama_stack/ui/README.md
diff --git a/llama_stack/ui/app/api/auth/[...nextauth]/route.ts b/src/llama_stack/ui/app/api/auth/[...nextauth]/route.ts
similarity index 100%
rename from llama_stack/ui/app/api/auth/[...nextauth]/route.ts
rename to src/llama_stack/ui/app/api/auth/[...nextauth]/route.ts
diff --git a/llama_stack/ui/app/api/v1/[...path]/route.ts b/src/llama_stack/ui/app/api/v1/[...path]/route.ts
similarity index 100%
rename from llama_stack/ui/app/api/v1/[...path]/route.ts
rename to src/llama_stack/ui/app/api/v1/[...path]/route.ts
diff --git a/llama_stack/ui/app/auth/signin/page.tsx b/src/llama_stack/ui/app/auth/signin/page.tsx
similarity index 100%
rename from llama_stack/ui/app/auth/signin/page.tsx
rename to src/llama_stack/ui/app/auth/signin/page.tsx
diff --git a/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx b/src/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
similarity index 100%
rename from llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
rename to src/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
diff --git a/llama_stack/ui/app/chat-playground/page.test.tsx b/src/llama_stack/ui/app/chat-playground/page.test.tsx
similarity index 100%
rename from llama_stack/ui/app/chat-playground/page.test.tsx
rename to src/llama_stack/ui/app/chat-playground/page.test.tsx
diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/src/llama_stack/ui/app/chat-playground/page.tsx
similarity index 100%
rename from llama_stack/ui/app/chat-playground/page.tsx
rename to src/llama_stack/ui/app/chat-playground/page.tsx
diff --git a/llama_stack/ui/app/globals.css b/src/llama_stack/ui/app/globals.css
similarity index 100%
rename from llama_stack/ui/app/globals.css
rename to src/llama_stack/ui/app/globals.css
diff --git a/llama_stack/ui/app/layout.tsx b/src/llama_stack/ui/app/layout.tsx
similarity index 100%
rename from llama_stack/ui/app/layout.tsx
rename to src/llama_stack/ui/app/layout.tsx
diff --git a/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx b/src/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
rename to src/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
diff --git a/llama_stack/ui/app/logs/chat-completions/layout.tsx b/src/llama_stack/ui/app/logs/chat-completions/layout.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/chat-completions/layout.tsx
rename to src/llama_stack/ui/app/logs/chat-completions/layout.tsx
diff --git a/llama_stack/ui/app/logs/chat-completions/page.tsx b/src/llama_stack/ui/app/logs/chat-completions/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/chat-completions/page.tsx
rename to src/llama_stack/ui/app/logs/chat-completions/page.tsx
diff --git a/llama_stack/ui/app/logs/responses/[id]/page.tsx b/src/llama_stack/ui/app/logs/responses/[id]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/responses/[id]/page.tsx
rename to src/llama_stack/ui/app/logs/responses/[id]/page.tsx
diff --git a/llama_stack/ui/app/logs/responses/layout.tsx b/src/llama_stack/ui/app/logs/responses/layout.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/responses/layout.tsx
rename to src/llama_stack/ui/app/logs/responses/layout.tsx
diff --git a/llama_stack/ui/app/logs/responses/page.tsx b/src/llama_stack/ui/app/logs/responses/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/responses/page.tsx
rename to src/llama_stack/ui/app/logs/responses/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/layout.tsx b/src/llama_stack/ui/app/logs/vector-stores/layout.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/layout.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/layout.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/page.tsx
diff --git a/llama_stack/ui/app/page.tsx b/src/llama_stack/ui/app/page.tsx
similarity index 100%
rename from llama_stack/ui/app/page.tsx
rename to src/llama_stack/ui/app/page.tsx
diff --git a/llama_stack/ui/components.json b/src/llama_stack/ui/components.json
similarity index 100%
rename from llama_stack/ui/components.json
rename to src/llama_stack/ui/components.json
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx b/src/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx b/src/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx b/src/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
diff --git a/llama_stack/ui/components/chat-completions/chat-completions-table.tsx b/src/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-completions-table.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
diff --git a/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx b/src/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
diff --git a/llama_stack/ui/components/chat-playground/chat-message.tsx b/src/llama_stack/ui/components/chat-playground/chat-message.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/chat-message.tsx
rename to src/llama_stack/ui/components/chat-playground/chat-message.tsx
diff --git a/llama_stack/ui/components/chat-playground/chat.tsx b/src/llama_stack/ui/components/chat-playground/chat.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/chat.tsx
rename to src/llama_stack/ui/components/chat-playground/chat.tsx
diff --git a/llama_stack/ui/components/chat-playground/conversations.test.tsx b/src/llama_stack/ui/components/chat-playground/conversations.test.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/conversations.test.tsx
rename to src/llama_stack/ui/components/chat-playground/conversations.test.tsx
diff --git a/llama_stack/ui/components/chat-playground/conversations.tsx b/src/llama_stack/ui/components/chat-playground/conversations.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/conversations.tsx
rename to src/llama_stack/ui/components/chat-playground/conversations.tsx
diff --git a/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx b/src/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
rename to src/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
diff --git a/llama_stack/ui/components/chat-playground/markdown-renderer.tsx b/src/llama_stack/ui/components/chat-playground/markdown-renderer.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/markdown-renderer.tsx
rename to src/llama_stack/ui/components/chat-playground/markdown-renderer.tsx
diff --git a/llama_stack/ui/components/chat-playground/message-components.tsx b/src/llama_stack/ui/components/chat-playground/message-components.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/message-components.tsx
rename to src/llama_stack/ui/components/chat-playground/message-components.tsx
diff --git a/llama_stack/ui/components/chat-playground/message-input.tsx b/src/llama_stack/ui/components/chat-playground/message-input.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/message-input.tsx
rename to src/llama_stack/ui/components/chat-playground/message-input.tsx
diff --git a/llama_stack/ui/components/chat-playground/message-list.tsx b/src/llama_stack/ui/components/chat-playground/message-list.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/message-list.tsx
rename to src/llama_stack/ui/components/chat-playground/message-list.tsx
diff --git a/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx b/src/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
rename to src/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
diff --git a/llama_stack/ui/components/chat-playground/typing-indicator.tsx b/src/llama_stack/ui/components/chat-playground/typing-indicator.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/typing-indicator.tsx
rename to src/llama_stack/ui/components/chat-playground/typing-indicator.tsx
diff --git a/llama_stack/ui/components/chat-playground/vector-db-creator.tsx b/src/llama_stack/ui/components/chat-playground/vector-db-creator.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/vector-db-creator.tsx
rename to src/llama_stack/ui/components/chat-playground/vector-db-creator.tsx
diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/src/llama_stack/ui/components/layout/app-sidebar.tsx
similarity index 100%
rename from llama_stack/ui/components/layout/app-sidebar.tsx
rename to src/llama_stack/ui/components/layout/app-sidebar.tsx
diff --git a/llama_stack/ui/components/layout/detail-layout.tsx b/src/llama_stack/ui/components/layout/detail-layout.tsx
similarity index 100%
rename from llama_stack/ui/components/layout/detail-layout.tsx
rename to src/llama_stack/ui/components/layout/detail-layout.tsx
diff --git a/llama_stack/ui/components/layout/logs-layout.tsx b/src/llama_stack/ui/components/layout/logs-layout.tsx
similarity index 100%
rename from llama_stack/ui/components/layout/logs-layout.tsx
rename to src/llama_stack/ui/components/layout/logs-layout.tsx
diff --git a/llama_stack/ui/components/layout/page-breadcrumb.tsx b/src/llama_stack/ui/components/layout/page-breadcrumb.tsx
similarity index 100%
rename from llama_stack/ui/components/layout/page-breadcrumb.tsx
rename to src/llama_stack/ui/components/layout/page-breadcrumb.tsx
diff --git a/llama_stack/ui/components/logs/logs-table-scroll.test.tsx b/src/llama_stack/ui/components/logs/logs-table-scroll.test.tsx
similarity index 100%
rename from llama_stack/ui/components/logs/logs-table-scroll.test.tsx
rename to src/llama_stack/ui/components/logs/logs-table-scroll.test.tsx
diff --git a/llama_stack/ui/components/logs/logs-table.test.tsx b/src/llama_stack/ui/components/logs/logs-table.test.tsx
similarity index 100%
rename from llama_stack/ui/components/logs/logs-table.test.tsx
rename to src/llama_stack/ui/components/logs/logs-table.test.tsx
diff --git a/llama_stack/ui/components/logs/logs-table.tsx b/src/llama_stack/ui/components/logs/logs-table.tsx
similarity index 100%
rename from llama_stack/ui/components/logs/logs-table.tsx
rename to src/llama_stack/ui/components/logs/logs-table.tsx
diff --git a/llama_stack/ui/components/providers/session-provider.tsx b/src/llama_stack/ui/components/providers/session-provider.tsx
similarity index 100%
rename from llama_stack/ui/components/providers/session-provider.tsx
rename to src/llama_stack/ui/components/providers/session-provider.tsx
diff --git a/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx b/src/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
rename to src/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
diff --git a/llama_stack/ui/components/responses/hooks/function-call-grouping.ts b/src/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
similarity index 100%
rename from llama_stack/ui/components/responses/hooks/function-call-grouping.ts
rename to src/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
diff --git a/llama_stack/ui/components/responses/items/function-call-item.tsx b/src/llama_stack/ui/components/responses/items/function-call-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/function-call-item.tsx
rename to src/llama_stack/ui/components/responses/items/function-call-item.tsx
diff --git a/llama_stack/ui/components/responses/items/generic-item.tsx b/src/llama_stack/ui/components/responses/items/generic-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/generic-item.tsx
rename to src/llama_stack/ui/components/responses/items/generic-item.tsx
diff --git a/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx b/src/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/grouped-function-call-item.tsx
rename to src/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx
diff --git a/llama_stack/ui/components/responses/items/index.ts b/src/llama_stack/ui/components/responses/items/index.ts
similarity index 100%
rename from llama_stack/ui/components/responses/items/index.ts
rename to src/llama_stack/ui/components/responses/items/index.ts
diff --git a/llama_stack/ui/components/responses/items/item-renderer.tsx b/src/llama_stack/ui/components/responses/items/item-renderer.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/item-renderer.tsx
rename to src/llama_stack/ui/components/responses/items/item-renderer.tsx
diff --git a/llama_stack/ui/components/responses/items/message-item.tsx b/src/llama_stack/ui/components/responses/items/message-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/message-item.tsx
rename to src/llama_stack/ui/components/responses/items/message-item.tsx
diff --git a/llama_stack/ui/components/responses/items/web-search-item.tsx b/src/llama_stack/ui/components/responses/items/web-search-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/web-search-item.tsx
rename to src/llama_stack/ui/components/responses/items/web-search-item.tsx
diff --git a/llama_stack/ui/components/responses/responses-detail.test.tsx b/src/llama_stack/ui/components/responses/responses-detail.test.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/responses-detail.test.tsx
rename to src/llama_stack/ui/components/responses/responses-detail.test.tsx
diff --git a/llama_stack/ui/components/responses/responses-detail.tsx b/src/llama_stack/ui/components/responses/responses-detail.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/responses-detail.tsx
rename to src/llama_stack/ui/components/responses/responses-detail.tsx
diff --git a/llama_stack/ui/components/responses/responses-table.test.tsx b/src/llama_stack/ui/components/responses/responses-table.test.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/responses-table.test.tsx
rename to src/llama_stack/ui/components/responses/responses-table.test.tsx
diff --git a/llama_stack/ui/components/responses/responses-table.tsx b/src/llama_stack/ui/components/responses/responses-table.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/responses-table.tsx
rename to src/llama_stack/ui/components/responses/responses-table.tsx
diff --git a/llama_stack/ui/components/responses/utils/item-types.ts b/src/llama_stack/ui/components/responses/utils/item-types.ts
similarity index 100%
rename from llama_stack/ui/components/responses/utils/item-types.ts
rename to src/llama_stack/ui/components/responses/utils/item-types.ts
diff --git a/llama_stack/ui/components/ui/audio-visualizer.tsx b/src/llama_stack/ui/components/ui/audio-visualizer.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/audio-visualizer.tsx
rename to src/llama_stack/ui/components/ui/audio-visualizer.tsx
diff --git a/llama_stack/ui/components/ui/breadcrumb.tsx b/src/llama_stack/ui/components/ui/breadcrumb.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/breadcrumb.tsx
rename to src/llama_stack/ui/components/ui/breadcrumb.tsx
diff --git a/llama_stack/ui/components/ui/button.tsx b/src/llama_stack/ui/components/ui/button.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/button.tsx
rename to src/llama_stack/ui/components/ui/button.tsx
diff --git a/llama_stack/ui/components/ui/card.tsx b/src/llama_stack/ui/components/ui/card.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/card.tsx
rename to src/llama_stack/ui/components/ui/card.tsx
diff --git a/llama_stack/ui/components/ui/collapsible.tsx b/src/llama_stack/ui/components/ui/collapsible.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/collapsible.tsx
rename to src/llama_stack/ui/components/ui/collapsible.tsx
diff --git a/llama_stack/ui/components/ui/copy-button.tsx b/src/llama_stack/ui/components/ui/copy-button.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/copy-button.tsx
rename to src/llama_stack/ui/components/ui/copy-button.tsx
diff --git a/llama_stack/ui/components/ui/dropdown-menu.tsx b/src/llama_stack/ui/components/ui/dropdown-menu.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/dropdown-menu.tsx
rename to src/llama_stack/ui/components/ui/dropdown-menu.tsx
diff --git a/llama_stack/ui/components/ui/file-preview.tsx b/src/llama_stack/ui/components/ui/file-preview.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/file-preview.tsx
rename to src/llama_stack/ui/components/ui/file-preview.tsx
diff --git a/llama_stack/ui/components/ui/input.tsx b/src/llama_stack/ui/components/ui/input.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/input.tsx
rename to src/llama_stack/ui/components/ui/input.tsx
diff --git a/llama_stack/ui/components/ui/mode-toggle.tsx b/src/llama_stack/ui/components/ui/mode-toggle.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/mode-toggle.tsx
rename to src/llama_stack/ui/components/ui/mode-toggle.tsx
diff --git a/llama_stack/ui/components/ui/select.tsx b/src/llama_stack/ui/components/ui/select.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/select.tsx
rename to src/llama_stack/ui/components/ui/select.tsx
diff --git a/llama_stack/ui/components/ui/separator.tsx b/src/llama_stack/ui/components/ui/separator.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/separator.tsx
rename to src/llama_stack/ui/components/ui/separator.tsx
diff --git a/llama_stack/ui/components/ui/sheet.tsx b/src/llama_stack/ui/components/ui/sheet.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/sheet.tsx
rename to src/llama_stack/ui/components/ui/sheet.tsx
diff --git a/llama_stack/ui/components/ui/sidebar.tsx b/src/llama_stack/ui/components/ui/sidebar.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/sidebar.tsx
rename to src/llama_stack/ui/components/ui/sidebar.tsx
diff --git a/llama_stack/ui/components/ui/sign-in-button.tsx b/src/llama_stack/ui/components/ui/sign-in-button.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/sign-in-button.tsx
rename to src/llama_stack/ui/components/ui/sign-in-button.tsx
diff --git a/llama_stack/ui/components/ui/skeleton.tsx b/src/llama_stack/ui/components/ui/skeleton.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/skeleton.tsx
rename to src/llama_stack/ui/components/ui/skeleton.tsx
diff --git a/llama_stack/ui/components/ui/sonner.tsx b/src/llama_stack/ui/components/ui/sonner.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/sonner.tsx
rename to src/llama_stack/ui/components/ui/sonner.tsx
diff --git a/llama_stack/ui/components/ui/table.tsx b/src/llama_stack/ui/components/ui/table.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/table.tsx
rename to src/llama_stack/ui/components/ui/table.tsx
diff --git a/llama_stack/ui/components/ui/theme-provider.tsx b/src/llama_stack/ui/components/ui/theme-provider.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/theme-provider.tsx
rename to src/llama_stack/ui/components/ui/theme-provider.tsx
diff --git a/llama_stack/ui/components/ui/tooltip.tsx b/src/llama_stack/ui/components/ui/tooltip.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/tooltip.tsx
rename to src/llama_stack/ui/components/ui/tooltip.tsx
diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.test.tsx b/src/llama_stack/ui/components/vector-stores/vector-store-detail.test.tsx
similarity index 100%
rename from llama_stack/ui/components/vector-stores/vector-store-detail.test.tsx
rename to src/llama_stack/ui/components/vector-stores/vector-store-detail.test.tsx
diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/src/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
similarity index 100%
rename from llama_stack/ui/components/vector-stores/vector-store-detail.tsx
rename to src/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
diff --git a/llama_stack/ui/e2e/logs-table-scroll.spec.ts b/src/llama_stack/ui/e2e/logs-table-scroll.spec.ts
similarity index 100%
rename from llama_stack/ui/e2e/logs-table-scroll.spec.ts
rename to src/llama_stack/ui/e2e/logs-table-scroll.spec.ts
diff --git a/llama_stack/ui/eslint.config.mjs b/src/llama_stack/ui/eslint.config.mjs
similarity index 100%
rename from llama_stack/ui/eslint.config.mjs
rename to src/llama_stack/ui/eslint.config.mjs
diff --git a/llama_stack/ui/hooks/use-audio-recording.ts b/src/llama_stack/ui/hooks/use-audio-recording.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-audio-recording.ts
rename to src/llama_stack/ui/hooks/use-audio-recording.ts
diff --git a/llama_stack/ui/hooks/use-auth-client.ts b/src/llama_stack/ui/hooks/use-auth-client.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-auth-client.ts
rename to src/llama_stack/ui/hooks/use-auth-client.ts
diff --git a/llama_stack/ui/hooks/use-auto-scroll.ts b/src/llama_stack/ui/hooks/use-auto-scroll.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-auto-scroll.ts
rename to src/llama_stack/ui/hooks/use-auto-scroll.ts
diff --git a/llama_stack/ui/hooks/use-autosize-textarea.ts b/src/llama_stack/ui/hooks/use-autosize-textarea.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-autosize-textarea.ts
rename to src/llama_stack/ui/hooks/use-autosize-textarea.ts
diff --git a/llama_stack/ui/hooks/use-copy-to-clipboard.ts b/src/llama_stack/ui/hooks/use-copy-to-clipboard.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-copy-to-clipboard.ts
rename to src/llama_stack/ui/hooks/use-copy-to-clipboard.ts
diff --git a/llama_stack/ui/hooks/use-infinite-scroll.ts b/src/llama_stack/ui/hooks/use-infinite-scroll.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-infinite-scroll.ts
rename to src/llama_stack/ui/hooks/use-infinite-scroll.ts
diff --git a/llama_stack/ui/hooks/use-mobile.ts b/src/llama_stack/ui/hooks/use-mobile.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-mobile.ts
rename to src/llama_stack/ui/hooks/use-mobile.ts
diff --git a/llama_stack/ui/hooks/use-pagination.ts b/src/llama_stack/ui/hooks/use-pagination.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-pagination.ts
rename to src/llama_stack/ui/hooks/use-pagination.ts
diff --git a/llama_stack/ui/instrumentation.ts b/src/llama_stack/ui/instrumentation.ts
similarity index 100%
rename from llama_stack/ui/instrumentation.ts
rename to src/llama_stack/ui/instrumentation.ts
diff --git a/llama_stack/ui/jest.config.ts b/src/llama_stack/ui/jest.config.ts
similarity index 100%
rename from llama_stack/ui/jest.config.ts
rename to src/llama_stack/ui/jest.config.ts
diff --git a/llama_stack/ui/jest.setup.ts b/src/llama_stack/ui/jest.setup.ts
similarity index 100%
rename from llama_stack/ui/jest.setup.ts
rename to src/llama_stack/ui/jest.setup.ts
diff --git a/llama_stack/ui/lib/audio-utils.ts b/src/llama_stack/ui/lib/audio-utils.ts
similarity index 100%
rename from llama_stack/ui/lib/audio-utils.ts
rename to src/llama_stack/ui/lib/audio-utils.ts
diff --git a/llama_stack/ui/lib/auth.ts b/src/llama_stack/ui/lib/auth.ts
similarity index 100%
rename from llama_stack/ui/lib/auth.ts
rename to src/llama_stack/ui/lib/auth.ts
diff --git a/llama_stack/ui/lib/config-validator.ts b/src/llama_stack/ui/lib/config-validator.ts
similarity index 100%
rename from llama_stack/ui/lib/config-validator.ts
rename to src/llama_stack/ui/lib/config-validator.ts
diff --git a/llama_stack/ui/lib/contents-api.ts b/src/llama_stack/ui/lib/contents-api.ts
similarity index 100%
rename from llama_stack/ui/lib/contents-api.ts
rename to src/llama_stack/ui/lib/contents-api.ts
diff --git a/llama_stack/ui/lib/format-message-content.test.ts b/src/llama_stack/ui/lib/format-message-content.test.ts
similarity index 100%
rename from llama_stack/ui/lib/format-message-content.test.ts
rename to src/llama_stack/ui/lib/format-message-content.test.ts
diff --git a/llama_stack/ui/lib/format-message-content.ts b/src/llama_stack/ui/lib/format-message-content.ts
similarity index 100%
rename from llama_stack/ui/lib/format-message-content.ts
rename to src/llama_stack/ui/lib/format-message-content.ts
diff --git a/llama_stack/ui/lib/format-tool-call.tsx b/src/llama_stack/ui/lib/format-tool-call.tsx
similarity index 100%
rename from llama_stack/ui/lib/format-tool-call.tsx
rename to src/llama_stack/ui/lib/format-tool-call.tsx
diff --git a/llama_stack/ui/lib/message-content-utils.ts b/src/llama_stack/ui/lib/message-content-utils.ts
similarity index 100%
rename from llama_stack/ui/lib/message-content-utils.ts
rename to src/llama_stack/ui/lib/message-content-utils.ts
diff --git a/llama_stack/ui/lib/truncate-text.ts b/src/llama_stack/ui/lib/truncate-text.ts
similarity index 100%
rename from llama_stack/ui/lib/truncate-text.ts
rename to src/llama_stack/ui/lib/truncate-text.ts
diff --git a/llama_stack/ui/lib/types.ts b/src/llama_stack/ui/lib/types.ts
similarity index 100%
rename from llama_stack/ui/lib/types.ts
rename to src/llama_stack/ui/lib/types.ts
diff --git a/llama_stack/ui/lib/utils.tsx b/src/llama_stack/ui/lib/utils.tsx
similarity index 100%
rename from llama_stack/ui/lib/utils.tsx
rename to src/llama_stack/ui/lib/utils.tsx
diff --git a/llama_stack/ui/next.config.ts b/src/llama_stack/ui/next.config.ts
similarity index 100%
rename from llama_stack/ui/next.config.ts
rename to src/llama_stack/ui/next.config.ts
diff --git a/llama_stack/ui/package-lock.json b/src/llama_stack/ui/package-lock.json
similarity index 100%
rename from llama_stack/ui/package-lock.json
rename to src/llama_stack/ui/package-lock.json
diff --git a/llama_stack/ui/package.json b/src/llama_stack/ui/package.json
similarity index 100%
rename from llama_stack/ui/package.json
rename to src/llama_stack/ui/package.json
diff --git a/llama_stack/ui/playwright.config.ts b/src/llama_stack/ui/playwright.config.ts
similarity index 100%
rename from llama_stack/ui/playwright.config.ts
rename to src/llama_stack/ui/playwright.config.ts
diff --git a/llama_stack/ui/postcss.config.mjs b/src/llama_stack/ui/postcss.config.mjs
similarity index 100%
rename from llama_stack/ui/postcss.config.mjs
rename to src/llama_stack/ui/postcss.config.mjs
diff --git a/llama_stack/ui/public/favicon.ico b/src/llama_stack/ui/public/favicon.ico
similarity index 100%
rename from llama_stack/ui/public/favicon.ico
rename to src/llama_stack/ui/public/favicon.ico
diff --git a/llama_stack/ui/public/file.svg b/src/llama_stack/ui/public/file.svg
similarity index 100%
rename from llama_stack/ui/public/file.svg
rename to src/llama_stack/ui/public/file.svg
diff --git a/llama_stack/ui/public/globe.svg b/src/llama_stack/ui/public/globe.svg
similarity index 100%
rename from llama_stack/ui/public/globe.svg
rename to src/llama_stack/ui/public/globe.svg
diff --git a/llama_stack/ui/public/logo.webp b/src/llama_stack/ui/public/logo.webp
similarity index 100%
rename from llama_stack/ui/public/logo.webp
rename to src/llama_stack/ui/public/logo.webp
diff --git a/llama_stack/ui/public/next.svg b/src/llama_stack/ui/public/next.svg
similarity index 100%
rename from llama_stack/ui/public/next.svg
rename to src/llama_stack/ui/public/next.svg
diff --git a/llama_stack/ui/public/vercel.svg b/src/llama_stack/ui/public/vercel.svg
similarity index 100%
rename from llama_stack/ui/public/vercel.svg
rename to src/llama_stack/ui/public/vercel.svg
diff --git a/llama_stack/ui/public/window.svg b/src/llama_stack/ui/public/window.svg
similarity index 100%
rename from llama_stack/ui/public/window.svg
rename to src/llama_stack/ui/public/window.svg
diff --git a/llama_stack/ui/tsconfig.json b/src/llama_stack/ui/tsconfig.json
similarity index 100%
rename from llama_stack/ui/tsconfig.json
rename to src/llama_stack/ui/tsconfig.json
diff --git a/llama_stack/ui/types/next-auth.d.ts b/src/llama_stack/ui/types/next-auth.d.ts
similarity index 100%
rename from llama_stack/ui/types/next-auth.d.ts
rename to src/llama_stack/ui/types/next-auth.d.ts