From 471b1b248b47dce66305c14acc46e35440520632 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 27 Oct 2025 12:02:21 -0700
Subject: [PATCH] chore(package): migrate to src/ layout (#3920)

Migrates package structure to src/ layout following Python packaging
best practices.

All code moved from `llama_stack/` to `src/llama_stack/`. Public API
unchanged - imports remain `import llama_stack.*`.

Updated build configs, pre-commit hooks, scripts, and GitHub workflows
accordingly. All hooks pass, package builds cleanly.

**Developer note**: Reinstall after pulling: `pip install -e .`
---
 .github/workflows/integration-auth-tests.yml  |   4 +-
 .../workflows/integration-sql-store-tests.yml |   2 +-
 .github/workflows/integration-tests.yml       |   4 +-
 .../workflows/integration-vector-io-tests.yml |   4 +-
 .github/workflows/pre-commit.yml              |   4 +-
 .github/workflows/precommit-trigger.yml       |   4 +-
 .github/workflows/providers-build.yml         |  32 +-
 .github/workflows/providers-list-deps.yml     |  24 +-
 .github/workflows/python-build-test.yml       |   2 +-
 .../test-external-provider-module.yml         |   2 +-
 .github/workflows/test-external.yml           |   4 +-
 .github/workflows/ui-unit-tests.yml           |  12 +-
 .github/workflows/unit-tests.yml              |   4 +-
 .pre-commit-config.yaml                       |  12 +-
 MANIFEST.in                                   |  18 +-
 pyproject.toml                                | 152 +++++-----
 scripts/check-init-py.sh                      |   2 +-
 scripts/distro_codegen.py                     |   2 +-
 scripts/run-ui-linter.sh                      |   2 +-
 scripts/unit-tests.sh                         |   2 +-
 {llama_stack => src/llama_stack}/__init__.py  |   0
 .../llama_stack}/apis/__init__.py             |   0
 .../llama_stack}/apis/agents/__init__.py      |   0
 .../llama_stack}/apis/agents/agents.py        |   0
 .../apis/agents/openai_responses.py           |   0
 .../llama_stack}/apis/batches/__init__.py     |   0
 .../llama_stack}/apis/batches/batches.py      |   0
 .../llama_stack}/apis/benchmarks/__init__.py  |   0
 .../apis/benchmarks/benchmarks.py             |   0
 .../llama_stack}/apis/common/__init__.py      |   0
 .../llama_stack}/apis/common/content_types.py |   0
 .../llama_stack}/apis/common/errors.py        |   0
 .../llama_stack}/apis/common/job_types.py     |   0
 .../llama_stack}/apis/common/responses.py     |   0
 .../apis/common/training_types.py             |   0
 .../llama_stack}/apis/common/type_system.py   |   0
 .../apis/conversations/__init__.py            |   0
 .../apis/conversations/conversations.py       |   0
 .../llama_stack}/apis/datasetio/__init__.py   |   0
 .../llama_stack}/apis/datasetio/datasetio.py  |   0
 .../llama_stack}/apis/datasets/__init__.py    |   0
 .../llama_stack}/apis/datasets/datasets.py    |   0
 .../llama_stack}/apis/datatypes.py            |   0
 .../llama_stack}/apis/eval/__init__.py        |   0
 .../llama_stack}/apis/eval/eval.py            |   0
 .../llama_stack}/apis/files/__init__.py       |   0
 .../llama_stack}/apis/files/files.py          |   0
 .../llama_stack}/apis/inference/__init__.py   |   0
 .../apis/inference/event_logger.py            |   0
 .../llama_stack}/apis/inference/inference.py  |   0
 .../llama_stack}/apis/inspect/__init__.py     |   0
 .../llama_stack}/apis/inspect/inspect.py      |   0
 .../llama_stack}/apis/models/__init__.py      |   0
 .../llama_stack}/apis/models/models.py        |   0
 .../apis/post_training/__init__.py            |   0
 .../apis/post_training/post_training.py       |   0
 .../llama_stack}/apis/prompts/__init__.py     |   0
 .../llama_stack}/apis/prompts/prompts.py      |   0
 .../llama_stack}/apis/providers/__init__.py   |   0
 .../llama_stack}/apis/providers/providers.py  |   0
 .../llama_stack}/apis/resource.py             |   0
 .../llama_stack}/apis/safety/__init__.py      |   0
 .../llama_stack}/apis/safety/safety.py        |   0
 .../llama_stack}/apis/scoring/__init__.py     |   0
 .../llama_stack}/apis/scoring/scoring.py      |   0
 .../apis/scoring_functions/__init__.py        |   0
 .../scoring_functions/scoring_functions.py    |   0
 .../llama_stack}/apis/shields/__init__.py     |   0
 .../llama_stack}/apis/shields/shields.py      |   0
 .../synthetic_data_generation/__init__.py     |   0
 .../synthetic_data_generation.py              |   0
 .../llama_stack}/apis/telemetry/__init__.py   |   0
 .../llama_stack}/apis/telemetry/telemetry.py  |   0
 .../llama_stack}/apis/tools/__init__.py       |   0
 .../llama_stack}/apis/tools/rag_tool.py       |   0
 .../llama_stack}/apis/tools/tools.py          |   0
 .../llama_stack}/apis/vector_io/__init__.py   |   0
 .../llama_stack}/apis/vector_io/vector_io.py  |   0
 .../apis/vector_stores/__init__.py            |   0
 .../apis/vector_stores/vector_stores.py       |   0
 .../llama_stack}/apis/version.py              |   0
 .../llama_stack}/cli/__init__.py              |   0
 {llama_stack => src/llama_stack}/cli/llama.py |   0
 .../llama_stack}/cli/scripts/__init__.py      |   0
 .../scripts/install-wheel-from-presigned.sh   |   0
 .../llama_stack}/cli/scripts/run.py           |   0
 .../llama_stack}/cli/stack/__init__.py        |   0
 .../llama_stack}/cli/stack/_list_deps.py      |   0
 .../llama_stack}/cli/stack/list_apis.py       |   0
 .../llama_stack}/cli/stack/list_deps.py       |   0
 .../llama_stack}/cli/stack/list_providers.py  |   0
 .../llama_stack}/cli/stack/list_stacks.py     |   0
 .../llama_stack}/cli/stack/remove.py          |   0
 .../llama_stack}/cli/stack/run.py             |   0
 .../llama_stack}/cli/stack/stack.py           |   0
 .../llama_stack}/cli/stack/utils.py           |   0
 .../llama_stack}/cli/subcommand.py            |   0
 {llama_stack => src/llama_stack}/cli/table.py |   0
 {llama_stack => src/llama_stack}/cli/utils.py |   0
 .../llama_stack}/core/__init__.py             |   0
 .../core/access_control/__init__.py           |   0
 .../core/access_control/access_control.py     |   0
 .../core/access_control/conditions.py         |   0
 .../core/access_control/datatypes.py          |   0
 .../llama_stack}/core/build.py                |   0
 .../llama_stack}/core/client.py               |   0
 .../llama_stack}/core/common.sh               |   0
 .../llama_stack}/core/configure.py            |   0
 .../core/conversations/__init__.py            |   0
 .../core/conversations/conversations.py       |   0
 .../llama_stack}/core/datatypes.py            |   0
 .../llama_stack}/core/distribution.py         |   0
 .../llama_stack}/core/external.py             |   0
 .../llama_stack}/core/id_generation.py        |   0
 .../llama_stack}/core/inspect.py              |   0
 .../llama_stack}/core/library_client.py       |   0
 .../llama_stack}/core/prompts/__init__.py     |   0
 .../llama_stack}/core/prompts/prompts.py      |   0
 .../llama_stack}/core/providers.py            |   0
 .../llama_stack}/core/request_headers.py      |   0
 .../llama_stack}/core/resolver.py             |   0
 .../llama_stack}/core/routers/__init__.py     |   0
 .../llama_stack}/core/routers/datasets.py     |   0
 .../llama_stack}/core/routers/eval_scoring.py |   0
 .../llama_stack}/core/routers/inference.py    |   0
 .../llama_stack}/core/routers/safety.py       |   0
 .../llama_stack}/core/routers/tool_runtime.py |   0
 .../llama_stack}/core/routers/vector_io.py    |   0
 .../core/routing_tables/__init__.py           |   0
 .../core/routing_tables/benchmarks.py         |   0
 .../core/routing_tables/common.py             |   0
 .../core/routing_tables/datasets.py           |   0
 .../core/routing_tables/models.py             |   0
 .../core/routing_tables/scoring_functions.py  |   0
 .../core/routing_tables/shields.py            |   0
 .../core/routing_tables/toolgroups.py         |   0
 .../core/routing_tables/vector_stores.py      |   0
 .../llama_stack}/core/server/__init__.py      |   0
 .../llama_stack}/core/server/auth.py          |   0
 .../core/server/auth_providers.py             |   0
 .../llama_stack}/core/server/quota.py         |   0
 .../llama_stack}/core/server/routes.py        |   0
 .../llama_stack}/core/server/server.py        |   0
 .../llama_stack}/core/server/tracing.py       |   0
 .../llama_stack}/core/stack.py                |   0
 .../llama_stack}/core/start_stack.sh          |   0
 .../llama_stack}/core/storage/__init__.py     |   0
 .../llama_stack}/core/storage/datatypes.py    |   0
 .../llama_stack}/core/store/__init__.py       |   0
 .../llama_stack}/core/store/registry.py       |   0
 .../llama_stack}/core/telemetry/__init__.py   |   0
 .../llama_stack}/core/telemetry/telemetry.py  |   0
 .../core/telemetry/trace_protocol.py          |   0
 .../llama_stack}/core/telemetry/tracing.py    |   0
 .../llama_stack}/core/testing_context.py      |   0
 .../llama_stack}/core/ui/Containerfile        |   0
 .../llama_stack}/core/ui/README.md            |   0
 .../llama_stack}/core/ui/__init__.py          |   0
 .../llama_stack}/core/ui/app.py               |   0
 .../llama_stack}/core/ui/modules/__init__.py  |   0
 .../llama_stack}/core/ui/modules/api.py       |   0
 .../llama_stack}/core/ui/modules/utils.py     |   0
 .../llama_stack}/core/ui/page/__init__.py     |   0
 .../core/ui/page/distribution/__init__.py     |   0
 .../core/ui/page/distribution/datasets.py     |   0
 .../core/ui/page/distribution/eval_tasks.py   |   0
 .../core/ui/page/distribution/models.py       |   0
 .../core/ui/page/distribution/providers.py    |   0
 .../core/ui/page/distribution/resources.py    |   0
 .../ui/page/distribution/scoring_functions.py |   0
 .../core/ui/page/distribution/shields.py      |   0
 .../core/ui/page/evaluations/__init__.py      |   0
 .../core/ui/page/evaluations/app_eval.py      |   0
 .../core/ui/page/evaluations/native_eval.py   |   0
 .../core/ui/page/playground/__init__.py       |   0
 .../core/ui/page/playground/chat.py           |   0
 .../core/ui/page/playground/tools.py          |   0
 .../llama_stack}/core/ui/requirements.txt     |   0
 .../llama_stack}/core/utils/__init__.py       |   0
 .../llama_stack}/core/utils/config.py         |   0
 .../llama_stack}/core/utils/config_dirs.py    |   0
 .../core/utils/config_resolution.py           |   0
 .../llama_stack}/core/utils/context.py        |   0
 .../llama_stack}/core/utils/dynamic.py        |   0
 .../llama_stack}/core/utils/exec.py           |   0
 .../llama_stack}/core/utils/image_types.py    |   0
 .../llama_stack}/core/utils/model_utils.py    |   0
 .../core/utils/prompt_for_config.py           |   0
 .../llama_stack}/core/utils/serialize.py      |   0
 .../llama_stack}/distributions/__init__.py    |   0
 .../distributions/ci-tests/__init__.py        |   0
 .../distributions/ci-tests/build.yaml         |  59 ++++
 .../distributions/ci-tests/ci_tests.py        |   0
 .../distributions/ci-tests/run.yaml           | 281 +++++++++++++++++
 .../distributions/dell/__init__.py            |   0
 src/llama_stack/distributions/dell/build.yaml |  33 ++
 .../llama_stack}/distributions/dell/dell.py   |   0
 .../distributions/dell/doc_template.md        |   0
 .../distributions/dell/run-with-safety.yaml   | 141 +++++++++
 src/llama_stack/distributions/dell/run.yaml   | 135 +++++++++
 .../meta-reference-gpu/__init__.py            |   0
 .../meta-reference-gpu/build.yaml             |  32 ++
 .../meta-reference-gpu/doc_template.md        |   0
 .../meta-reference-gpu/meta_reference.py      |   0
 .../meta-reference-gpu/run-with-safety.yaml   | 154 ++++++++++
 .../distributions/meta-reference-gpu/run.yaml | 142 +++++++++
 .../distributions/nvidia/__init__.py          |   0
 .../distributions/nvidia/build.yaml           |  29 ++
 .../distributions/nvidia/doc_template.md      |   0
 .../distributions/nvidia/nvidia.py            |   0
 .../distributions/nvidia/run-with-safety.yaml | 137 +++++++++
 src/llama_stack/distributions/nvidia/run.yaml | 119 ++++++++
 .../distributions/open-benchmark/__init__.py  |   0
 .../distributions/open-benchmark/build.yaml   |  36 +++
 .../open-benchmark/open_benchmark.py          |   0
 .../distributions/open-benchmark/run.yaml     | 255 ++++++++++++++++
 .../distributions/postgres-demo/__init__.py   |   0
 .../distributions/postgres-demo/build.yaml    |  23 ++
 .../postgres-demo/postgres_demo.py            |   0
 .../distributions/postgres-demo/run.yaml      | 118 ++++++++
 .../distributions/starter-gpu/__init__.py     |   0
 .../distributions/starter-gpu/build.yaml      |  60 ++++
 .../distributions/starter-gpu/run.yaml        | 284 ++++++++++++++++++
 .../distributions/starter-gpu/starter_gpu.py  |   0
 .../distributions/starter/__init__.py         |   0
 .../distributions/starter/build.yaml          |  60 ++++
 .../distributions/starter/run.yaml            | 281 +++++++++++++++++
 .../distributions/starter/starter.py          |   0
 .../llama_stack}/distributions/template.py    |   0
 .../distributions/watsonx/__init__.py         |   0
 .../distributions/watsonx/build.yaml          |  33 ++
 .../distributions/watsonx/run.yaml            | 136 +++++++++
 .../distributions/watsonx/watsonx.py          |   0
 {llama_stack => src/llama_stack}/env.py       |   0
 {llama_stack => src/llama_stack}/log.py       |   0
 .../llama_stack}/models/__init__.py           |   0
 .../llama_stack}/models/llama/__init__.py     |   0
 .../llama_stack}/models/llama/checkpoint.py   |   0
 .../llama_stack}/models/llama/datatypes.py    |   0
 .../models/llama/hadamard_utils.py            |   0
 .../models/llama/llama3/__init__.py           |   0
 .../llama_stack}/models/llama/llama3/args.py  |   0
 .../models/llama/llama3/chat_format.py        |   0
 .../llama_stack}/models/llama/llama3/dog.jpg  | Bin
 .../models/llama/llama3/generation.py         |   0
 .../models/llama/llama3/interface.py          |   0
 .../llama_stack}/models/llama/llama3/model.py |   0
 .../llama/llama3/multimodal/__init__.py       |   0
 .../llama/llama3/multimodal/encoder_utils.py  |   0
 .../llama3/multimodal/image_transform.py      |   0
 .../models/llama/llama3/multimodal/model.py   |   0
 .../models/llama/llama3/multimodal/utils.py   |   0
 .../models/llama/llama3/pasta.jpeg            | Bin
 .../llama/llama3/prompt_templates/__init__.py |   0
 .../llama/llama3/prompt_templates/base.py     |   0
 .../llama3/prompt_templates/system_prompts.py |   0
 .../llama3/prompt_templates/tool_response.py  |   0
 .../llama/llama3/quantization/__init__.py     |   0
 .../llama/llama3/quantization/loader.py       |   0
 .../models/llama/llama3/template_data.py      |   0
 .../models/llama/llama3/tokenizer.model       |   0
 .../models/llama/llama3/tokenizer.py          |   0
 .../models/llama/llama3/tool_utils.py         |   0
 .../models/llama/llama3_1/__init__.py         |   0
 .../models/llama/llama3_1/prompt_format.md    |   0
 .../models/llama/llama3_1/prompts.py          |   0
 .../models/llama/llama3_2/__init__.py         |   0
 .../models/llama/llama3_2/prompts_text.py     |   0
 .../models/llama/llama3_2/prompts_vision.py   |   0
 .../llama/llama3_2/text_prompt_format.md      |   0
 .../llama/llama3_2/vision_prompt_format.md    |   0
 .../models/llama/llama3_3/__init__.py         |   0
 .../models/llama/llama3_3/prompts.py          |   0
 .../models/llama/llama4/__init__.py           |   0
 .../llama_stack}/models/llama/llama4/args.py  |   0
 .../models/llama/llama4/chat_format.py        |   0
 .../models/llama/llama4/datatypes.py          |   0
 .../llama_stack}/models/llama/llama4/ffn.py   |   0
 .../models/llama/llama4/generation.py         |   0
 .../llama_stack}/models/llama/llama4/model.py |   0
 .../llama_stack}/models/llama/llama4/moe.py   |   0
 .../models/llama/llama4/preprocess.py         |   0
 .../models/llama/llama4/prompt_format.md      |   0
 .../llama/llama4/prompt_templates/__init__.py |   0
 .../llama4/prompt_templates/system_prompts.py |   0
 .../models/llama/llama4/prompts.py            |   0
 .../llama/llama4/quantization/__init__.py     |   0
 .../llama/llama4/quantization/loader.py       |   0
 .../models/llama/llama4/tokenizer.model       |   0
 .../models/llama/llama4/tokenizer.py          |   0
 .../models/llama/llama4/vision/__init__.py    |   0
 .../models/llama/llama4/vision/embedding.py   |   0
 .../models/llama/llama4/vision/encoder.py     |   0
 .../models/llama/prompt_format.py             |   0
 .../models/llama/quantize_impls.py            |   0
 .../models/llama/resources/dog.jpg            | Bin
 .../models/llama/resources/pasta.jpeg         | Bin
 .../models/llama/resources/small_dog.jpg      | Bin
 .../llama_stack}/models/llama/sku_list.py     |   0
 .../llama_stack}/models/llama/sku_types.py    |   0
 .../models/llama/tokenizer_utils.py           |   0
 .../llama_stack}/providers/__init__.py        |   0
 .../llama_stack}/providers/datatypes.py       |   0
 .../llama_stack}/providers/inline/__init__.py |   0
 .../providers/inline/agents/__init__.py       |   0
 .../inline/agents/meta_reference/__init__.py  |   0
 .../agents/meta_reference/agent_instance.py   |   0
 .../inline/agents/meta_reference/agents.py    |   0
 .../inline/agents/meta_reference/config.py    |   0
 .../agents/meta_reference/persistence.py      |   0
 .../meta_reference/responses/__init__.py      |   0
 .../responses/openai_responses.py             |   0
 .../meta_reference/responses/streaming.py     |   0
 .../meta_reference/responses/tool_executor.py |   0
 .../agents/meta_reference/responses/types.py  |   0
 .../agents/meta_reference/responses/utils.py  |   0
 .../inline/agents/meta_reference/safety.py    |   0
 .../providers/inline/batches/__init__.py      |   0
 .../inline/batches/reference/__init__.py      |   0
 .../inline/batches/reference/batches.py       |   0
 .../inline/batches/reference/config.py        |   0
 .../providers/inline/datasetio/__init__.py    |   0
 .../inline/datasetio/localfs/__init__.py      |   0
 .../inline/datasetio/localfs/config.py        |   0
 .../inline/datasetio/localfs/datasetio.py     |   0
 .../providers/inline/eval/__init__.py         |   0
 .../inline/eval/meta_reference/__init__.py    |   0
 .../inline/eval/meta_reference/config.py      |   0
 .../inline/eval/meta_reference/eval.py        |   0
 .../inline/files/localfs/__init__.py          |   0
 .../providers/inline/files/localfs/config.py  |   0
 .../providers/inline/files/localfs/files.py   |   0
 .../providers/inline/inference/__init__.py    |   0
 .../inference/meta_reference/__init__.py      |   0
 .../inline/inference/meta_reference/common.py |   0
 .../inline/inference/meta_reference/config.py |   0
 .../inference/meta_reference/generators.py    |   0
 .../inference/meta_reference/inference.py     |   0
 .../meta_reference/model_parallel.py          |   0
 .../meta_reference/parallel_utils.py          |   0
 .../sentence_transformers/__init__.py         |   0
 .../inference/sentence_transformers/config.py |   0
 .../sentence_transformers.py                  |   0
 .../project.pbxproj                           |   0
 .../contents.xcworkspacedata                  |   0
 .../xcshareddata/IDEWorkspaceChecks.plist     |   0
 .../LocalInferenceImpl/LocalInference.h       |   0
 .../LocalInferenceImpl/LocalInference.swift   |   0
 .../LocalInferenceImpl/Parsing.swift          |   0
 .../LocalInferenceImpl/PromptTemplate.swift   |   0
 .../LocalInferenceImpl/SystemPrompts.swift    |   0
 .../inline/post_training/__init__.py          |   0
 .../inline/post_training/common/__init__.py   |   0
 .../inline/post_training/common/utils.py      |   0
 .../inline/post_training/common/validator.py  |   0
 .../post_training/huggingface/__init__.py     |   0
 .../post_training/huggingface/config.py       |   0
 .../huggingface/post_training.py              |   0
 .../huggingface/recipes/__init__.py           |   0
 .../recipes/finetune_single_device.py         |   0
 .../recipes/finetune_single_device_dpo.py     |   0
 .../inline/post_training/huggingface/utils.py |   0
 .../post_training/torchtune/__init__.py       |   0
 .../torchtune/common/__init__.py              |   0
 .../torchtune/common/checkpointer.py          |   0
 .../post_training/torchtune/common/utils.py   |   0
 .../inline/post_training/torchtune/config.py  |   0
 .../torchtune/datasets/__init__.py            |   0
 .../torchtune/datasets/format_adapter.py      |   0
 .../post_training/torchtune/datasets/sft.py   |   0
 .../post_training/torchtune/post_training.py  |   0
 .../torchtune/recipes/__init__.py             |   0
 .../recipes/lora_finetuning_single_device.py  |   0
 .../providers/inline/safety/__init__.py       |   0
 .../inline/safety/code_scanner/__init__.py    |   0
 .../safety/code_scanner/code_scanner.py       |   0
 .../inline/safety/code_scanner/config.py      |   0
 .../inline/safety/llama_guard/__init__.py     |   0
 .../inline/safety/llama_guard/config.py       |   0
 .../inline/safety/llama_guard/llama_guard.py  |   0
 .../inline/safety/prompt_guard/__init__.py    |   0
 .../inline/safety/prompt_guard/config.py      |   0
 .../safety/prompt_guard/prompt_guard.py       |   0
 .../providers/inline/scoring/__init__.py      |   0
 .../inline/scoring/basic/__init__.py          |   0
 .../providers/inline/scoring/basic/config.py  |   0
 .../providers/inline/scoring/basic/scoring.py |   0
 .../scoring/basic/scoring_fn/__init__.py      |   0
 .../basic/scoring_fn/docvqa_scoring_fn.py     |   0
 .../basic/scoring_fn/equality_scoring_fn.py   |   0
 .../basic/scoring_fn/fn_defs/__init__.py      |   0
 .../basic/scoring_fn/fn_defs/docvqa.py        |   0
 .../basic/scoring_fn/fn_defs/equality.py      |   0
 .../basic/scoring_fn/fn_defs/ifeval.py        |   0
 .../fn_defs/regex_parser_math_response.py     |   0
 .../regex_parser_multiple_choice_answer.py    |   0
 .../basic/scoring_fn/fn_defs/subset_of.py     |   0
 .../basic/scoring_fn/ifeval_scoring_fn.py     |   0
 .../regex_parser_math_response_scoring_fn.py  |   0
 .../scoring_fn/regex_parser_scoring_fn.py     |   0
 .../basic/scoring_fn/subset_of_scoring_fn.py  |   0
 .../inline/scoring/basic/utils/__init__.py    |   0
 .../scoring/basic/utils/ifeval_utils.py       |   0
 .../inline/scoring/basic/utils/math_utils.py  |   0
 .../inline/scoring/braintrust/__init__.py     |   0
 .../inline/scoring/braintrust/braintrust.py   |   0
 .../inline/scoring/braintrust/config.py       |   0
 .../scoring/braintrust/scoring_fn/__init__.py |   0
 .../braintrust/scoring_fn/fn_defs/__init__.py |   0
 .../scoring_fn/fn_defs/answer_correctness.py  |   0
 .../scoring_fn/fn_defs/answer_relevancy.py    |   0
 .../scoring_fn/fn_defs/answer_similarity.py   |   0
 .../fn_defs/context_entity_recall.py          |   0
 .../scoring_fn/fn_defs/context_precision.py   |   0
 .../scoring_fn/fn_defs/context_recall.py      |   0
 .../scoring_fn/fn_defs/context_relevancy.py   |   0
 .../scoring_fn/fn_defs/factuality.py          |   0
 .../scoring_fn/fn_defs/faithfulness.py        |   0
 .../inline/scoring/llm_as_judge/__init__.py   |   0
 .../inline/scoring/llm_as_judge/config.py     |   0
 .../inline/scoring/llm_as_judge/scoring.py    |   0
 .../llm_as_judge/scoring_fn/__init__.py       |   0
 .../scoring_fn/fn_defs/__init__.py            |   0
 .../fn_defs/llm_as_judge_405b_simpleqa.py     |   0
 .../scoring_fn/fn_defs/llm_as_judge_base.py   |   0
 .../scoring_fn/llm_as_judge_scoring_fn.py     |   0
 .../providers/inline/tool_runtime/__init__.py |   0
 .../inline/tool_runtime/rag/__init__.py       |   0
 .../inline/tool_runtime/rag/config.py         |   0
 .../tool_runtime/rag/context_retriever.py     |   0
 .../inline/tool_runtime/rag/memory.py         |   0
 .../providers/inline/vector_io/__init__.py    |   0
 .../inline/vector_io/chroma/__init__.py       |   0
 .../inline/vector_io/chroma/config.py         |   0
 .../inline/vector_io/faiss/__init__.py        |   0
 .../inline/vector_io/faiss/config.py          |   0
 .../providers/inline/vector_io/faiss/faiss.py |   0
 .../inline/vector_io/milvus/__init__.py       |   0
 .../inline/vector_io/milvus/config.py         |   0
 .../inline/vector_io/qdrant/__init__.py       |   0
 .../inline/vector_io/qdrant/config.py         |   0
 .../inline/vector_io/sqlite_vec/__init__.py   |   0
 .../inline/vector_io/sqlite_vec/config.py     |   0
 .../inline/vector_io/sqlite_vec/sqlite_vec.py |   0
 .../providers/registry/__init__.py            |   0
 .../llama_stack}/providers/registry/agents.py |   0
 .../providers/registry/batches.py             |   0
 .../providers/registry/datasetio.py           |   0
 .../llama_stack}/providers/registry/eval.py   |   0
 .../llama_stack}/providers/registry/files.py  |   0
 .../providers/registry/inference.py           |   0
 .../providers/registry/post_training.py       |   0
 .../llama_stack}/providers/registry/safety.py |   0
 .../providers/registry/scoring.py             |   0
 .../providers/registry/tool_runtime.py        |   0
 .../providers/registry/vector_io.py           |   0
 .../llama_stack}/providers/remote/__init__.py |   0
 .../providers/remote/agents/__init__.py       |   0
 .../providers/remote/datasetio/__init__.py    |   0
 .../remote/datasetio/huggingface/__init__.py  |   0
 .../remote/datasetio/huggingface/config.py    |   0
 .../datasetio/huggingface/huggingface.py      |   0
 .../remote/datasetio/nvidia/README.md         |   0
 .../remote/datasetio/nvidia/__init__.py       |   0
 .../remote/datasetio/nvidia/config.py         |   0
 .../remote/datasetio/nvidia/datasetio.py      |   0
 .../providers/remote/eval/__init__.py         |   0
 .../providers/remote/eval/nvidia/README.md    |   0
 .../providers/remote/eval/nvidia/__init__.py  |   0
 .../providers/remote/eval/nvidia/config.py    |   0
 .../providers/remote/eval/nvidia/eval.py      |   0
 .../providers/remote/files/s3/README.md       |   0
 .../providers/remote/files/s3/__init__.py     |   0
 .../providers/remote/files/s3/config.py       |   0
 .../providers/remote/files/s3/files.py        |   0
 .../providers/remote/inference/__init__.py    |   0
 .../remote/inference/anthropic/__init__.py    |   0
 .../remote/inference/anthropic/anthropic.py   |   0
 .../remote/inference/anthropic/config.py      |   0
 .../remote/inference/azure/__init__.py        |   0
 .../providers/remote/inference/azure/azure.py |   0
 .../remote/inference/azure/config.py          |   0
 .../remote/inference/bedrock/__init__.py      |   0
 .../remote/inference/bedrock/bedrock.py       |   0
 .../remote/inference/bedrock/config.py        |   0
 .../remote/inference/bedrock/models.py        |   0
 .../remote/inference/cerebras/__init__.py     |   0
 .../remote/inference/cerebras/cerebras.py     |   0
 .../remote/inference/cerebras/config.py       |   0
 .../remote/inference/databricks/__init__.py   |   0
 .../remote/inference/databricks/config.py     |   0
 .../remote/inference/databricks/databricks.py |   0
 .../remote/inference/fireworks/__init__.py    |   0
 .../remote/inference/fireworks/config.py      |   0
 .../remote/inference/fireworks/fireworks.py   |   0
 .../remote/inference/gemini/__init__.py       |   0
 .../remote/inference/gemini/config.py         |   0
 .../remote/inference/gemini/gemini.py         |   0
 .../remote/inference/groq/__init__.py         |   0
 .../providers/remote/inference/groq/config.py |   0
 .../providers/remote/inference/groq/groq.py   |   0
 .../inference/llama_openai_compat/__init__.py |   0
 .../inference/llama_openai_compat/config.py   |   0
 .../inference/llama_openai_compat/llama.py    |   0
 .../remote/inference/nvidia/NVIDIA.md         |   0
 .../remote/inference/nvidia/__init__.py       |   0
 .../remote/inference/nvidia/config.py         |   0
 .../remote/inference/nvidia/nvidia.py         |   0
 .../remote/inference/nvidia/utils.py          |   0
 .../remote/inference/ollama/__init__.py       |   0
 .../remote/inference/ollama/config.py         |   0
 .../remote/inference/ollama/ollama.py         |   0
 .../remote/inference/openai/__init__.py       |   0
 .../remote/inference/openai/config.py         |   0
 .../remote/inference/openai/openai.py         |   0
 .../remote/inference/passthrough/__init__.py  |   0
 .../remote/inference/passthrough/config.py    |   0
 .../inference/passthrough/passthrough.py      |   0
 .../remote/inference/runpod/__init__.py       |   0
 .../remote/inference/runpod/config.py         |   0
 .../remote/inference/runpod/runpod.py         |   0
 .../remote/inference/sambanova/__init__.py    |   0
 .../remote/inference/sambanova/config.py      |   0
 .../remote/inference/sambanova/sambanova.py   |   0
 .../remote/inference/tgi/__init__.py          |   0
 .../providers/remote/inference/tgi/config.py  |   0
 .../providers/remote/inference/tgi/tgi.py     |   0
 .../remote/inference/together/__init__.py     |   0
 .../remote/inference/together/config.py       |   0
 .../remote/inference/together/together.py     |   0
 .../remote/inference/vertexai/__init__.py     |   0
 .../remote/inference/vertexai/config.py       |   0
 .../remote/inference/vertexai/vertexai.py     |   0
 .../remote/inference/vllm/__init__.py         |   0
 .../providers/remote/inference/vllm/config.py |   0
 .../providers/remote/inference/vllm/vllm.py   |   0
 .../remote/inference/watsonx/__init__.py      |   0
 .../remote/inference/watsonx/config.py        |   0
 .../remote/inference/watsonx/watsonx.py       |   0
 .../remote/post_training/__init__.py          |   0
 .../remote/post_training/nvidia/README.md     |   0
 .../remote/post_training/nvidia/__init__.py   |   0
 .../remote/post_training/nvidia/config.py     |   0
 .../remote/post_training/nvidia/models.py     |   0
 .../post_training/nvidia/post_training.py     |   0
 .../remote/post_training/nvidia/utils.py      |   0
 .../providers/remote/safety/__init__.py       |   0
 .../remote/safety/bedrock/__init__.py         |   0
 .../remote/safety/bedrock/bedrock.py          |   0
 .../providers/remote/safety/bedrock/config.py |   0
 .../providers/remote/safety/nvidia/README.md  |   0
 .../remote/safety/nvidia/__init__.py          |   0
 .../providers/remote/safety/nvidia/config.py  |   0
 .../providers/remote/safety/nvidia/nvidia.py  |   0
 .../remote/safety/sambanova/__init__.py       |   0
 .../remote/safety/sambanova/config.py         |   0
 .../remote/safety/sambanova/sambanova.py      |   0
 .../providers/remote/tool_runtime/__init__.py |   0
 .../tool_runtime/bing_search/__init__.py      |   0
 .../tool_runtime/bing_search/bing_search.py   |   0
 .../remote/tool_runtime/bing_search/config.py |   0
 .../tool_runtime/brave_search/__init__.py     |   0
 .../tool_runtime/brave_search/brave_search.py |   0
 .../tool_runtime/brave_search/config.py       |   0
 .../model_context_protocol/__init__.py        |   0
 .../model_context_protocol/config.py          |   0
 .../model_context_protocol.py                 |   0
 .../tool_runtime/tavily_search/__init__.py    |   0
 .../tool_runtime/tavily_search/config.py      |   0
 .../tavily_search/tavily_search.py            |   0
 .../tool_runtime/wolfram_alpha/__init__.py    |   0
 .../tool_runtime/wolfram_alpha/config.py      |   0
 .../wolfram_alpha/wolfram_alpha.py            |   0
 .../providers/remote/vector_io/__init__.py    |   0
 .../remote/vector_io/chroma/__init__.py       |   0
 .../remote/vector_io/chroma/chroma.py         |   0
 .../remote/vector_io/chroma/config.py         |   0
 .../remote/vector_io/milvus/__init__.py       |   0
 .../remote/vector_io/milvus/config.py         |   0
 .../remote/vector_io/milvus/milvus.py         |   0
 .../remote/vector_io/pgvector/__init__.py     |   0
 .../remote/vector_io/pgvector/config.py       |   0
 .../remote/vector_io/pgvector/pgvector.py     |   0
 .../remote/vector_io/qdrant/__init__.py       |   0
 .../remote/vector_io/qdrant/config.py         |   0
 .../remote/vector_io/qdrant/qdrant.py         |   0
 .../remote/vector_io/weaviate/__init__.py     |   0
 .../remote/vector_io/weaviate/config.py       |   0
 .../remote/vector_io/weaviate/weaviate.py     |   0
 .../llama_stack}/providers/utils/__init__.py  |   0
 .../providers/utils/bedrock/__init__.py       |   0
 .../providers/utils/bedrock/client.py         |   0
 .../providers/utils/bedrock/config.py         |   0
 .../utils/bedrock/refreshable_boto_session.py |   0
 .../providers/utils/common/__init__.py        |   0
 .../utils/common/data_schema_validator.py     |   0
 .../providers/utils/datasetio/__init__.py     |   0
 .../providers/utils/datasetio/url_utils.py    |   0
 .../providers/utils/files/__init__.py         |   0
 .../providers/utils/files/form_data.py        |   0
 .../providers/utils/inference/__init__.py     |   0
 .../utils/inference/embedding_mixin.py        |   0
 .../utils/inference/inference_store.py        |   0
 .../utils/inference/litellm_openai_mixin.py   |   0
 .../utils/inference/model_registry.py         |   0
 .../utils/inference/openai_compat.py          |   0
 .../providers/utils/inference/openai_mixin.py |   0
 .../utils/inference/prompt_adapter.py         |   0
 .../providers/utils/kvstore/__init__.py       |   0
 .../providers/utils/kvstore/api.py            |   0
 .../providers/utils/kvstore/config.py         |   0
 .../providers/utils/kvstore/kvstore.py        |   0
 .../utils/kvstore/mongodb/__init__.py         |   0
 .../utils/kvstore/mongodb/mongodb.py          |   0
 .../utils/kvstore/postgres/__init__.py        |   0
 .../utils/kvstore/postgres/postgres.py        |   0
 .../providers/utils/kvstore/redis/__init__.py |   0
 .../providers/utils/kvstore/redis/redis.py    |   0
 .../utils/kvstore/sqlite/__init__.py          |   0
 .../providers/utils/kvstore/sqlite/config.py  |   0
 .../providers/utils/kvstore/sqlite/sqlite.py  |   0
 .../providers/utils/memory/__init__.py        |   0
 .../providers/utils/memory/file_utils.py      |   0
 .../utils/memory/openai_vector_store_mixin.py |   0
 .../providers/utils/memory/vector_store.py    |   0
 .../providers/utils/pagination.py             |   0
 .../providers/utils/responses/__init__.py     |   0
 .../utils/responses/responses_store.py        |   0
 .../llama_stack}/providers/utils/scheduler.py |   0
 .../providers/utils/scoring/__init__.py       |   0
 .../utils/scoring/aggregation_utils.py        |   0
 .../utils/scoring/base_scoring_fn.py          |   0
 .../utils/scoring/basic_scoring_utils.py      |   0
 .../providers/utils/sqlstore/__init__.py      |   0
 .../providers/utils/sqlstore/api.py           |   0
 .../utils/sqlstore/authorized_sqlstore.py     |   0
 .../utils/sqlstore/sqlalchemy_sqlstore.py     |   0
 .../providers/utils/sqlstore/sqlstore.py      |   0
 .../providers/utils/tools/__init__.py         |   0
 .../llama_stack}/providers/utils/tools/mcp.py |   0
 .../providers/utils/tools/ttl_dict.py         |   0
 .../providers/utils/vector_io/__init__.py     |   0
 .../providers/utils/vector_io/vector_utils.py |   0
 .../llama_stack}/schema_utils.py              |   0
 .../llama_stack}/strong_typing/__init__.py    |   0
 .../llama_stack}/strong_typing/auxiliary.py   |  23 +-
 .../llama_stack}/strong_typing/classdef.py    |  96 +++---
 .../llama_stack}/strong_typing/core.py        |  12 +-
 .../strong_typing/deserializer.py             | 129 ++++----
 .../llama_stack}/strong_typing/docstring.py   |  33 +-
 .../llama_stack}/strong_typing/exception.py   |   0
 .../llama_stack}/strong_typing/inspection.py  | 111 ++++---
 .../llama_stack}/strong_typing/mapping.py     |   3 +-
 .../llama_stack}/strong_typing/name.py        |   8 +-
 .../llama_stack}/strong_typing/py.typed       |   0
 .../llama_stack}/strong_typing/schema.py      |  83 +++--
 .../strong_typing/serialization.py            |   4 +-
 .../llama_stack}/strong_typing/serializer.py  |  72 ++---
 .../llama_stack}/strong_typing/slots.py       |   8 +-
 .../llama_stack}/strong_typing/topological.py |  15 +-
 .../llama_stack}/testing/__init__.py          |   0
 .../llama_stack}/testing/api_recorder.py      |   2 +-
 .../llama_stack}/ui/.gitignore                |   0
 {llama_stack => src/llama_stack}/ui/.nvmrc    |   0
 .../llama_stack}/ui/.prettierignore           |   0
 .../llama_stack}/ui/.prettierrc               |   0
 {llama_stack => src/llama_stack}/ui/README.md |   0
 .../ui/app/api/auth/[...nextauth]/route.ts    |   0
 .../ui/app/api/v1/[...path]/route.ts          |   0
 .../llama_stack}/ui/app/auth/signin/page.tsx  |   0
 .../chat-playground/chunk-processor.test.tsx  |   0
 .../ui/app/chat-playground/page.test.tsx      |   0
 .../ui/app/chat-playground/page.tsx           |   0
 .../llama_stack}/ui/app/globals.css           |   0
 .../llama_stack}/ui/app/layout.tsx            |   0
 .../app/logs/chat-completions/[id]/page.tsx   |   0
 .../ui/app/logs/chat-completions/layout.tsx   |   0
 .../ui/app/logs/chat-completions/page.tsx     |   0
 .../ui/app/logs/responses/[id]/page.tsx       |   0
 .../ui/app/logs/responses/layout.tsx          |   0
 .../ui/app/logs/responses/page.tsx            |   0
 .../contents/[contentId]/page.test.tsx        |   0
 .../[fileId]/contents/[contentId]/page.tsx    |   0
 .../files/[fileId]/contents/page.test.tsx     |   0
 .../[id]/files/[fileId]/contents/page.tsx     |   0
 .../[id]/files/[fileId]/page.test.tsx         |   0
 .../[id]/files/[fileId]/page.tsx              |   0
 .../ui/app/logs/vector-stores/[id]/page.tsx   |   0
 .../ui/app/logs/vector-stores/layout.tsx      |   0
 .../ui/app/logs/vector-stores/page.tsx        |   0
 .../llama_stack}/ui/app/page.tsx              |   0
 .../llama_stack}/ui/components.json           |   0
 .../chat-completion-detail.test.tsx           |   0
 .../chat-completion-detail.tsx                |   0
 .../chat-completion-table.test.tsx            |   0
 .../chat-completions-table.tsx                |   0
 .../chat-completions/chat-messasge-item.tsx   |   0
 .../chat-playground/chat-message.tsx          |   0
 .../ui/components/chat-playground/chat.tsx    |   0
 .../chat-playground/conversations.test.tsx    |   0
 .../chat-playground/conversations.tsx         |   0
 .../chat-playground/interrupt-prompt.tsx      |   0
 .../chat-playground/markdown-renderer.tsx     |   0
 .../chat-playground/message-components.tsx    |   0
 .../chat-playground/message-input.tsx         |   0
 .../chat-playground/message-list.tsx          |   0
 .../chat-playground/prompt-suggestions.tsx    |   0
 .../chat-playground/typing-indicator.tsx      |   0
 .../chat-playground/vector-db-creator.tsx     |   0
 .../ui/components/layout/app-sidebar.tsx      |   0
 .../ui/components/layout/detail-layout.tsx    |   0
 .../ui/components/layout/logs-layout.tsx      |   0
 .../ui/components/layout/page-breadcrumb.tsx  |   0
 .../logs/logs-table-scroll.test.tsx           |   0
 .../ui/components/logs/logs-table.test.tsx    |   0
 .../ui/components/logs/logs-table.tsx         |   0
 .../components/providers/session-provider.tsx |   0
 .../grouping/grouped-items-display.tsx        |   0
 .../responses/hooks/function-call-grouping.ts |   0
 .../responses/items/function-call-item.tsx    |   0
 .../responses/items/generic-item.tsx          |   0
 .../items/grouped-function-call-item.tsx      |   0
 .../ui/components/responses/items/index.ts    |   0
 .../responses/items/item-renderer.tsx         |   0
 .../responses/items/message-item.tsx          |   0
 .../responses/items/web-search-item.tsx       |   0
 .../responses/responses-detail.test.tsx       |   0
 .../components/responses/responses-detail.tsx |   0
 .../responses/responses-table.test.tsx        |   0
 .../components/responses/responses-table.tsx  |   0
 .../components/responses/utils/item-types.ts  |   0
 .../ui/components/ui/audio-visualizer.tsx     |   0
 .../ui/components/ui/breadcrumb.tsx           |   0
 .../llama_stack}/ui/components/ui/button.tsx  |   0
 .../llama_stack}/ui/components/ui/card.tsx    |   0
 .../ui/components/ui/collapsible.tsx          |   0
 .../ui/components/ui/copy-button.tsx          |   0
 .../ui/components/ui/dropdown-menu.tsx        |   0
 .../ui/components/ui/file-preview.tsx         |   0
 .../llama_stack}/ui/components/ui/input.tsx   |   0
 .../ui/components/ui/mode-toggle.tsx          |   0
 .../llama_stack}/ui/components/ui/select.tsx  |   0
 .../ui/components/ui/separator.tsx            |   0
 .../llama_stack}/ui/components/ui/sheet.tsx   |   0
 .../llama_stack}/ui/components/ui/sidebar.tsx |   0
 .../ui/components/ui/sign-in-button.tsx       |   0
 .../ui/components/ui/skeleton.tsx             |   0
 .../llama_stack}/ui/components/ui/sonner.tsx  |   0
 .../llama_stack}/ui/components/ui/table.tsx   |   0
 .../ui/components/ui/theme-provider.tsx       |   0
 .../llama_stack}/ui/components/ui/tooltip.tsx |   0
 .../vector-store-detail.test.tsx              |   0
 .../vector-stores/vector-store-detail.tsx     |   0
 .../ui/e2e/logs-table-scroll.spec.ts          |   0
 .../llama_stack}/ui/eslint.config.mjs         |   0
 .../ui/hooks/use-audio-recording.ts           |   0
 .../llama_stack}/ui/hooks/use-auth-client.ts  |   0
 .../llama_stack}/ui/hooks/use-auto-scroll.ts  |   0
 .../ui/hooks/use-autosize-textarea.ts         |   0
 .../ui/hooks/use-copy-to-clipboard.ts         |   0
 .../ui/hooks/use-infinite-scroll.ts           |   0
 .../llama_stack}/ui/hooks/use-mobile.ts       |   0
 .../llama_stack}/ui/hooks/use-pagination.ts   |   0
 .../llama_stack}/ui/instrumentation.ts        |   0
 .../llama_stack}/ui/jest.config.ts            |   0
 .../llama_stack}/ui/jest.setup.ts             |   0
 .../llama_stack}/ui/lib/audio-utils.ts        |   0
 .../llama_stack}/ui/lib/auth.ts               |   0
 .../llama_stack}/ui/lib/config-validator.ts   |   0
 .../llama_stack}/ui/lib/contents-api.ts       |   0
 .../ui/lib/format-message-content.test.ts     |   0
 .../ui/lib/format-message-content.ts          |   0
 .../llama_stack}/ui/lib/format-tool-call.tsx  |   0
 .../ui/lib/message-content-utils.ts           |   0
 .../llama_stack}/ui/lib/truncate-text.ts      |   0
 .../llama_stack}/ui/lib/types.ts              |   0
 .../llama_stack}/ui/lib/utils.tsx             |   0
 .../llama_stack}/ui/next.config.ts            |   0
 .../llama_stack}/ui/package-lock.json         |   0
 .../llama_stack}/ui/package.json              |   0
 .../llama_stack}/ui/playwright.config.ts      |   0
 .../llama_stack}/ui/postcss.config.mjs        |   0
 .../llama_stack}/ui/public/favicon.ico        | Bin
 .../llama_stack}/ui/public/file.svg           |   0
 .../llama_stack}/ui/public/globe.svg          |   0
 .../llama_stack}/ui/public/logo.webp          | Bin
 .../llama_stack}/ui/public/next.svg           |   0
 .../llama_stack}/ui/public/vercel.svg         |   0
 .../llama_stack}/ui/public/window.svg         |   0
 .../llama_stack}/ui/tsconfig.json             |   0
 .../llama_stack}/ui/types/next-auth.d.ts      |   0
 791 files changed, 2983 insertions(+), 456 deletions(-)
 rename {llama_stack => src/llama_stack}/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/agents/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/agents/agents.py (100%)
 rename {llama_stack => src/llama_stack}/apis/agents/openai_responses.py (100%)
 rename {llama_stack => src/llama_stack}/apis/batches/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/batches/batches.py (100%)
 rename {llama_stack => src/llama_stack}/apis/benchmarks/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/benchmarks/benchmarks.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/content_types.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/errors.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/job_types.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/responses.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/training_types.py (100%)
 rename {llama_stack => src/llama_stack}/apis/common/type_system.py (100%)
 rename {llama_stack => src/llama_stack}/apis/conversations/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/conversations/conversations.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datasetio/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datasetio/datasetio.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datasets/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datasets/datasets.py (100%)
 rename {llama_stack => src/llama_stack}/apis/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/apis/eval/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/eval/eval.py (100%)
 rename {llama_stack => src/llama_stack}/apis/files/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/files/files.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inference/event_logger.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inference/inference.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inspect/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/inspect/inspect.py (100%)
 rename {llama_stack => src/llama_stack}/apis/models/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/models/models.py (100%)
 rename {llama_stack => src/llama_stack}/apis/post_training/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/post_training/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/apis/prompts/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/prompts/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/apis/providers/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/providers/providers.py (100%)
 rename {llama_stack => src/llama_stack}/apis/resource.py (100%)
 rename {llama_stack => src/llama_stack}/apis/safety/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/safety/safety.py (100%)
 rename {llama_stack => src/llama_stack}/apis/scoring/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/scoring/scoring.py (100%)
 rename {llama_stack => src/llama_stack}/apis/scoring_functions/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/scoring_functions/scoring_functions.py (100%)
 rename {llama_stack => src/llama_stack}/apis/shields/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/shields/shields.py (100%)
 rename {llama_stack => src/llama_stack}/apis/synthetic_data_generation/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/synthetic_data_generation/synthetic_data_generation.py (100%)
 rename {llama_stack => src/llama_stack}/apis/telemetry/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/telemetry/telemetry.py (100%)
 rename {llama_stack => src/llama_stack}/apis/tools/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/tools/rag_tool.py (100%)
 rename {llama_stack => src/llama_stack}/apis/tools/tools.py (100%)
 rename {llama_stack => src/llama_stack}/apis/vector_io/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/vector_io/vector_io.py (100%)
 rename {llama_stack => src/llama_stack}/apis/vector_stores/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/apis/vector_stores/vector_stores.py (100%)
 rename {llama_stack => src/llama_stack}/apis/version.py (100%)
 rename {llama_stack => src/llama_stack}/cli/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/cli/llama.py (100%)
 rename {llama_stack => src/llama_stack}/cli/scripts/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/cli/scripts/install-wheel-from-presigned.sh (100%)
 rename {llama_stack => src/llama_stack}/cli/scripts/run.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/_list_deps.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/list_apis.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/list_deps.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/list_providers.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/list_stacks.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/remove.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/run.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/stack.py (100%)
 rename {llama_stack => src/llama_stack}/cli/stack/utils.py (100%)
 rename {llama_stack => src/llama_stack}/cli/subcommand.py (100%)
 rename {llama_stack => src/llama_stack}/cli/table.py (100%)
 rename {llama_stack => src/llama_stack}/cli/utils.py (100%)
 rename {llama_stack => src/llama_stack}/core/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/access_control/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/access_control/access_control.py (100%)
 rename {llama_stack => src/llama_stack}/core/access_control/conditions.py (100%)
 rename {llama_stack => src/llama_stack}/core/access_control/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/core/build.py (100%)
 rename {llama_stack => src/llama_stack}/core/client.py (100%)
 rename {llama_stack => src/llama_stack}/core/common.sh (100%)
 rename {llama_stack => src/llama_stack}/core/configure.py (100%)
 rename {llama_stack => src/llama_stack}/core/conversations/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/conversations/conversations.py (100%)
 rename {llama_stack => src/llama_stack}/core/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/core/distribution.py (100%)
 rename {llama_stack => src/llama_stack}/core/external.py (100%)
 rename {llama_stack => src/llama_stack}/core/id_generation.py (100%)
 rename {llama_stack => src/llama_stack}/core/inspect.py (100%)
 rename {llama_stack => src/llama_stack}/core/library_client.py (100%)
 rename {llama_stack => src/llama_stack}/core/prompts/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/prompts/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/core/providers.py (100%)
 rename {llama_stack => src/llama_stack}/core/request_headers.py (100%)
 rename {llama_stack => src/llama_stack}/core/resolver.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/datasets.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/eval_scoring.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/inference.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/safety.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/tool_runtime.py (100%)
 rename {llama_stack => src/llama_stack}/core/routers/vector_io.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/benchmarks.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/common.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/datasets.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/models.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/scoring_functions.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/shields.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/toolgroups.py (100%)
 rename {llama_stack => src/llama_stack}/core/routing_tables/vector_stores.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/auth.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/auth_providers.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/quota.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/routes.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/server.py (100%)
 rename {llama_stack => src/llama_stack}/core/server/tracing.py (100%)
 rename {llama_stack => src/llama_stack}/core/stack.py (100%)
 rename {llama_stack => src/llama_stack}/core/start_stack.sh (100%)
 rename {llama_stack => src/llama_stack}/core/storage/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/storage/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/core/store/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/store/registry.py (100%)
 rename {llama_stack => src/llama_stack}/core/telemetry/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/telemetry/telemetry.py (100%)
 rename {llama_stack => src/llama_stack}/core/telemetry/trace_protocol.py (100%)
 rename {llama_stack => src/llama_stack}/core/telemetry/tracing.py (100%)
 rename {llama_stack => src/llama_stack}/core/testing_context.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/Containerfile (100%)
 rename {llama_stack => src/llama_stack}/core/ui/README.md (100%)
 rename {llama_stack => src/llama_stack}/core/ui/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/app.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/modules/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/modules/api.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/modules/utils.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/datasets.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/eval_tasks.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/models.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/providers.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/resources.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/scoring_functions.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/distribution/shields.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/evaluations/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/evaluations/app_eval.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/evaluations/native_eval.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/playground/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/playground/chat.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/page/playground/tools.py (100%)
 rename {llama_stack => src/llama_stack}/core/ui/requirements.txt (100%)
 rename {llama_stack => src/llama_stack}/core/utils/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/config.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/config_dirs.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/config_resolution.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/context.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/dynamic.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/exec.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/image_types.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/model_utils.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/prompt_for_config.py (100%)
 rename {llama_stack => src/llama_stack}/core/utils/serialize.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/ci-tests/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/ci-tests/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/ci-tests/ci_tests.py (100%)
 create mode 100644 src/llama_stack/distributions/ci-tests/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/dell/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/dell/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/dell/dell.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/dell/doc_template.md (100%)
 create mode 100644 src/llama_stack/distributions/dell/run-with-safety.yaml
 create mode 100644 src/llama_stack/distributions/dell/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/meta-reference-gpu/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/meta-reference-gpu/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/meta-reference-gpu/doc_template.md (100%)
 rename {llama_stack => src/llama_stack}/distributions/meta-reference-gpu/meta_reference.py (100%)
 create mode 100644 src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
 create mode 100644 src/llama_stack/distributions/meta-reference-gpu/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/nvidia/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/nvidia/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/nvidia/doc_template.md (100%)
 rename {llama_stack => src/llama_stack}/distributions/nvidia/nvidia.py (100%)
 create mode 100644 src/llama_stack/distributions/nvidia/run-with-safety.yaml
 create mode 100644 src/llama_stack/distributions/nvidia/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/open-benchmark/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/open-benchmark/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/open-benchmark/open_benchmark.py (100%)
 create mode 100644 src/llama_stack/distributions/open-benchmark/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/postgres-demo/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/postgres-demo/build.yaml
 rename {llama_stack => src/llama_stack}/distributions/postgres-demo/postgres_demo.py (100%)
 create mode 100644 src/llama_stack/distributions/postgres-demo/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/starter-gpu/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/starter-gpu/build.yaml
 create mode 100644 src/llama_stack/distributions/starter-gpu/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/starter-gpu/starter_gpu.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/starter/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/starter/build.yaml
 create mode 100644 src/llama_stack/distributions/starter/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/starter/starter.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/template.py (100%)
 rename {llama_stack => src/llama_stack}/distributions/watsonx/__init__.py (100%)
 create mode 100644 src/llama_stack/distributions/watsonx/build.yaml
 create mode 100644 src/llama_stack/distributions/watsonx/run.yaml
 rename {llama_stack => src/llama_stack}/distributions/watsonx/watsonx.py (100%)
 rename {llama_stack => src/llama_stack}/env.py (100%)
 rename {llama_stack => src/llama_stack}/log.py (100%)
 rename {llama_stack => src/llama_stack}/models/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/checkpoint.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/hadamard_utils.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/args.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/chat_format.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/dog.jpg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/generation.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/interface.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/model.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/encoder_utils.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/image_transform.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/model.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/multimodal/utils.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/pasta.jpeg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/prompt_templates/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/prompt_templates/base.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/prompt_templates/system_prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/prompt_templates/tool_response.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/quantization/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/quantization/loader.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/template_data.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/tokenizer.model (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/tokenizer.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3/tool_utils.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_1/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_1/prompt_format.md (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_1/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/prompts_text.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/prompts_vision.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/text_prompt_format.md (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_2/vision_prompt_format.md (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_3/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama3_3/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/args.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/chat_format.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/ffn.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/generation.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/model.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/moe.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/preprocess.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/prompt_format.md (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/prompt_templates/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/prompt_templates/system_prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/prompts.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/quantization/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/quantization/loader.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/tokenizer.model (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/tokenizer.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/vision/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/vision/embedding.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/llama4/vision/encoder.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/prompt_format.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/quantize_impls.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/resources/dog.jpg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/resources/pasta.jpeg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/resources/small_dog.jpg (100%)
 rename {llama_stack => src/llama_stack}/models/llama/sku_list.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/sku_types.py (100%)
 rename {llama_stack => src/llama_stack}/models/llama/tokenizer_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/datatypes.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/agent_instance.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/agents.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/persistence.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/openai_responses.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/streaming.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/tool_executor.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/types.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/responses/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/agents/meta_reference/safety.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/batches/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/batches/reference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/batches/reference/batches.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/batches/reference/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/datasetio/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/datasetio/localfs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/datasetio/localfs/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/datasetio/localfs/datasetio.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/eval/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/eval/meta_reference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/eval/meta_reference/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/eval/meta_reference/eval.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/files/localfs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/files/localfs/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/files/localfs/files.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/common.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/generators.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/inference.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/model_parallel.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/meta_reference/parallel_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/sentence_transformers/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/sentence_transformers/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/inference/sentence_transformers/sentence_transformers.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/common/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/common/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/common/validator.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/recipes/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/recipes/finetune_single_device.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/huggingface/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/common/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/common/checkpointer.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/common/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/datasets/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/datasets/format_adapter.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/datasets/sft.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/recipes/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/code_scanner/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/code_scanner/code_scanner.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/code_scanner/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/llama_guard/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/llama_guard/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/llama_guard/llama_guard.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/prompt_guard/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/prompt_guard/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/safety/prompt_guard/prompt_guard.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/utils/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/utils/ifeval_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/basic/utils/math_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/braintrust.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/rag/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/rag/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/rag/context_retriever.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/tool_runtime/rag/memory.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/chroma/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/chroma/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/faiss/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/faiss/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/faiss/faiss.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/milvus/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/milvus/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/qdrant/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/qdrant/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/sqlite_vec/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/sqlite_vec/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/inline/vector_io/sqlite_vec/sqlite_vec.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/agents.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/batches.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/datasetio.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/eval.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/files.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/inference.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/safety.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/scoring.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/tool_runtime.py (100%)
 rename {llama_stack => src/llama_stack}/providers/registry/vector_io.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/agents/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/huggingface/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/huggingface/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/huggingface/huggingface.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/nvidia/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/datasetio/nvidia/datasetio.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/nvidia/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/eval/nvidia/eval.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/files/s3/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/files/s3/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/files/s3/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/files/s3/files.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/anthropic/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/anthropic/anthropic.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/anthropic/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/azure/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/azure/azure.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/azure/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/bedrock/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/bedrock/bedrock.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/bedrock/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/bedrock/models.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/cerebras/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/cerebras/cerebras.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/cerebras/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/databricks/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/databricks/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/databricks/databricks.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/fireworks/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/fireworks/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/fireworks/fireworks.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/gemini/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/gemini/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/gemini/gemini.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/groq/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/groq/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/groq/groq.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/llama_openai_compat/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/llama_openai_compat/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/llama_openai_compat/llama.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/NVIDIA.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/nvidia.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/nvidia/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/ollama/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/ollama/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/ollama/ollama.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/openai/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/openai/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/openai/openai.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/passthrough/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/passthrough/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/passthrough/passthrough.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/runpod/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/runpod/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/runpod/runpod.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/sambanova/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/sambanova/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/sambanova/sambanova.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/tgi/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/tgi/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/tgi/tgi.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/together/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/together/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/together/together.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vertexai/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vertexai/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vertexai/vertexai.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vllm/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vllm/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/vllm/vllm.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/watsonx/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/watsonx/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/inference/watsonx/watsonx.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/models.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/post_training.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/post_training/nvidia/utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/bedrock/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/bedrock/bedrock.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/bedrock/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/nvidia/README.md (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/nvidia/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/nvidia/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/nvidia/nvidia.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/sambanova/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/sambanova/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/safety/sambanova/sambanova.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/bing_search/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/bing_search/bing_search.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/bing_search/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/brave_search/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/brave_search/brave_search.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/brave_search/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/model_context_protocol/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/model_context_protocol/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/tavily_search/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/tavily_search/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/tavily_search/tavily_search.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/wolfram_alpha/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/wolfram_alpha/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/chroma/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/chroma/chroma.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/chroma/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/milvus/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/milvus/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/milvus/milvus.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/pgvector/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/pgvector/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/pgvector/pgvector.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/qdrant/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/qdrant/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/qdrant/qdrant.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/weaviate/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/weaviate/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/remote/vector_io/weaviate/weaviate.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/bedrock/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/bedrock/client.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/bedrock/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/bedrock/refreshable_boto_session.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/common/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/common/data_schema_validator.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/datasetio/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/datasetio/url_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/files/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/files/form_data.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/embedding_mixin.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/inference_store.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/litellm_openai_mixin.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/model_registry.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/openai_compat.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/openai_mixin.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/inference/prompt_adapter.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/api.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/kvstore.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/mongodb/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/mongodb/mongodb.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/postgres/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/postgres/postgres.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/redis/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/redis/redis.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/sqlite/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/sqlite/config.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/kvstore/sqlite/sqlite.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/memory/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/memory/file_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/memory/openai_vector_store_mixin.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/memory/vector_store.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/pagination.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/responses/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/responses/responses_store.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scheduler.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scoring/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scoring/aggregation_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scoring/base_scoring_fn.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/scoring/basic_scoring_utils.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/api.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/authorized_sqlstore.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/sqlalchemy_sqlstore.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/sqlstore/sqlstore.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/tools/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/tools/mcp.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/tools/ttl_dict.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/vector_io/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/providers/utils/vector_io/vector_utils.py (100%)
 rename {llama_stack => src/llama_stack}/schema_utils.py (100%)
 rename {llama_stack => src/llama_stack}/strong_typing/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/strong_typing/auxiliary.py (89%)
 rename {llama_stack => src/llama_stack}/strong_typing/classdef.py (86%)
 rename {llama_stack => src/llama_stack}/strong_typing/core.py (80%)
 rename {llama_stack => src/llama_stack}/strong_typing/deserializer.py (89%)
 rename {llama_stack => src/llama_stack}/strong_typing/docstring.py (93%)
 rename {llama_stack => src/llama_stack}/strong_typing/exception.py (100%)
 rename {llama_stack => src/llama_stack}/strong_typing/inspection.py (91%)
 rename {llama_stack => src/llama_stack}/strong_typing/mapping.py (89%)
 rename {llama_stack => src/llama_stack}/strong_typing/name.py (95%)
 rename {llama_stack => src/llama_stack}/strong_typing/py.typed (100%)
 rename {llama_stack => src/llama_stack}/strong_typing/schema.py (92%)
 rename {llama_stack => src/llama_stack}/strong_typing/serialization.py (95%)
 rename {llama_stack => src/llama_stack}/strong_typing/serializer.py (87%)
 rename {llama_stack => src/llama_stack}/strong_typing/slots.py (71%)
 rename {llama_stack => src/llama_stack}/strong_typing/topological.py (88%)
 rename {llama_stack => src/llama_stack}/testing/__init__.py (100%)
 rename {llama_stack => src/llama_stack}/testing/api_recorder.py (99%)
 rename {llama_stack => src/llama_stack}/ui/.gitignore (100%)
 rename {llama_stack => src/llama_stack}/ui/.nvmrc (100%)
 rename {llama_stack => src/llama_stack}/ui/.prettierignore (100%)
 rename {llama_stack => src/llama_stack}/ui/.prettierrc (100%)
 rename {llama_stack => src/llama_stack}/ui/README.md (100%)
 rename {llama_stack => src/llama_stack}/ui/app/api/auth/[...nextauth]/route.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/app/api/v1/[...path]/route.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/app/auth/signin/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/chat-playground/chunk-processor.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/chat-playground/page.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/chat-playground/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/globals.css (100%)
 rename {llama_stack => src/llama_stack}/ui/app/layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/chat-completions/[id]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/chat-completions/layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/chat-completions/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/responses/[id]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/responses/layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/responses/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/[id]/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/logs/vector-stores/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/app/page.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components.json (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-completion-detail.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-completion-detail.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-completion-table.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-completions-table.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-completions/chat-messasge-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/chat-message.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/chat.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/conversations.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/conversations.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/interrupt-prompt.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/markdown-renderer.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/message-components.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/message-input.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/message-list.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/prompt-suggestions.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/typing-indicator.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/chat-playground/vector-db-creator.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/layout/app-sidebar.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/layout/detail-layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/layout/logs-layout.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/layout/page-breadcrumb.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/logs/logs-table-scroll.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/logs/logs-table.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/logs/logs-table.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/providers/session-provider.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/grouping/grouped-items-display.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/hooks/function-call-grouping.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/function-call-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/generic-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/grouped-function-call-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/index.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/item-renderer.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/message-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/items/web-search-item.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/responses-detail.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/responses-detail.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/responses-table.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/responses-table.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/responses/utils/item-types.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/audio-visualizer.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/breadcrumb.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/button.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/card.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/collapsible.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/copy-button.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/dropdown-menu.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/file-preview.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/input.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/mode-toggle.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/select.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/separator.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/sheet.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/sidebar.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/sign-in-button.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/skeleton.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/sonner.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/table.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/theme-provider.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/ui/tooltip.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/vector-stores/vector-store-detail.test.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/components/vector-stores/vector-store-detail.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/e2e/logs-table-scroll.spec.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/eslint.config.mjs (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-audio-recording.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-auth-client.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-auto-scroll.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-autosize-textarea.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-copy-to-clipboard.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-infinite-scroll.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-mobile.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/hooks/use-pagination.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/instrumentation.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/jest.config.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/jest.setup.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/audio-utils.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/auth.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/config-validator.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/contents-api.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/format-message-content.test.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/format-message-content.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/format-tool-call.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/message-content-utils.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/truncate-text.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/types.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/lib/utils.tsx (100%)
 rename {llama_stack => src/llama_stack}/ui/next.config.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/package-lock.json (100%)
 rename {llama_stack => src/llama_stack}/ui/package.json (100%)
 rename {llama_stack => src/llama_stack}/ui/playwright.config.ts (100%)
 rename {llama_stack => src/llama_stack}/ui/postcss.config.mjs (100%)
 rename {llama_stack => src/llama_stack}/ui/public/favicon.ico (100%)
 rename {llama_stack => src/llama_stack}/ui/public/file.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/public/globe.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/public/logo.webp (100%)
 rename {llama_stack => src/llama_stack}/ui/public/next.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/public/vercel.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/public/window.svg (100%)
 rename {llama_stack => src/llama_stack}/ui/tsconfig.json (100%)
 rename {llama_stack => src/llama_stack}/ui/types/next-auth.d.ts (100%)

diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
index c13ed6cbe..2de3fe9df 100644
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@@ -9,8 +9,8 @@ on:
     branches: [ main ]
     paths:
       - 'distributions/**'
-      - 'llama_stack/**'
-      - '!llama_stack/ui/**'
+      - 'src/llama_stack/**'
+      - '!src/llama_stack/ui/**'
       - 'tests/integration/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.github/workflows/integration-sql-store-tests.yml b/.github/workflows/integration-sql-store-tests.yml
index 47f6d546a..0653b3fa8 100644
--- a/.github/workflows/integration-sql-store-tests.yml
+++ b/.github/workflows/integration-sql-store-tests.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/providers/utils/sqlstore/**'
+      - 'src/llama_stack/providers/utils/sqlstore/**'
       - 'tests/integration/sqlstore/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
index d38e8337b..2b8965aad 100644
--- a/.github/workflows/integration-tests.yml
+++ b/.github/workflows/integration-tests.yml
@@ -9,8 +9,8 @@ on:
     branches: [ main ]
     types: [opened, synchronize, reopened]
     paths:
-      - 'llama_stack/**'
-      - '!llama_stack/ui/**'
+      - 'src/llama_stack/**'
+      - '!src/llama_stack/ui/**'
       - 'tests/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.github/workflows/integration-vector-io-tests.yml b/.github/workflows/integration-vector-io-tests.yml
index ee837a159..0b4e174bc 100644
--- a/.github/workflows/integration-vector-io-tests.yml
+++ b/.github/workflows/integration-vector-io-tests.yml
@@ -8,8 +8,8 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/**'
-      - '!llama_stack/ui/**'
+      - 'src/llama_stack/**'
+      - '!src/llama_stack/ui/**'
       - 'tests/integration/vector_io/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 0fdd50acc..99ef87196 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -41,11 +41,11 @@ jobs:
         with:
           node-version: '20'
           cache: 'npm'
-          cache-dependency-path: 'llama_stack/ui/'
+          cache-dependency-path: 'src/llama_stack/ui/'
 
       - name: Install npm dependencies
         run: npm ci
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
 
       - name: Run pre-commit
         id: precommit
diff --git a/.github/workflows/precommit-trigger.yml b/.github/workflows/precommit-trigger.yml
index b05898d29..502230448 100644
--- a/.github/workflows/precommit-trigger.yml
+++ b/.github/workflows/precommit-trigger.yml
@@ -145,12 +145,12 @@ jobs:
         with:
           node-version: '20'
           cache: 'npm'
-          cache-dependency-path: 'llama_stack/ui/'
+          cache-dependency-path: 'src/llama_stack/ui/'
 
       - name: Install npm dependencies
         if: steps.check_author.outputs.authorized == 'true'
         run: npm ci
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
 
       - name: Run pre-commit
         if: steps.check_author.outputs.authorized == 'true'
diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml
index ffc44f9c1..2b2ca6330 100644
--- a/.github/workflows/providers-build.yml
+++ b/.github/workflows/providers-build.yml
@@ -7,24 +7,24 @@ on:
     branches:
       - main
     paths:
-      - 'llama_stack/cli/stack/build.py'
-      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/core/build.*'
-      - 'llama_stack/core/*.sh'
+      - 'src/llama_stack/cli/stack/build.py'
+      - 'src/llama_stack/cli/stack/_build.py'
+      - 'src/llama_stack/core/build.*'
+      - 'src/llama_stack/core/*.sh'
       - '.github/workflows/providers-build.yml'
-      - 'llama_stack/distributions/**'
+      - 'src/llama_stack/distributions/**'
       - 'pyproject.toml'
       - 'containers/Containerfile'
       - '.dockerignore'
 
   pull_request:
     paths:
-      - 'llama_stack/cli/stack/build.py'
-      - 'llama_stack/cli/stack/_build.py'
-      - 'llama_stack/core/build.*'
-      - 'llama_stack/core/*.sh'
+      - 'src/llama_stack/cli/stack/build.py'
+      - 'src/llama_stack/cli/stack/_build.py'
+      - 'src/llama_stack/core/build.*'
+      - 'src/llama_stack/core/*.sh'
       - '.github/workflows/providers-build.yml'
-      - 'llama_stack/distributions/**'
+      - 'src/llama_stack/distributions/**'
       - 'pyproject.toml'
       - 'containers/Containerfile'
       - '.dockerignore'
@@ -45,7 +45,7 @@ jobs:
       - name: Generate Distribution List
         id: set-matrix
         run: |
-          distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
+          distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
           echo "distros=$distros" >> "$GITHUB_OUTPUT"
 
   build:
@@ -107,13 +107,13 @@ jobs:
 
       - name: Build container image
         run: |
-          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
+          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
           docker build . \
             -f containers/Containerfile \
             --build-arg INSTALL_MODE=editable \
             --build-arg DISTRO_NAME=ci-tests \
             --build-arg BASE_IMAGE="$BASE_IMAGE" \
-            --build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
+            --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
             -t llama-stack:ci-tests
 
       - name: Inspect the container image entrypoint
@@ -143,17 +143,17 @@ jobs:
         run: |
           yq -i '
             .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
-          ' llama_stack/distributions/ci-tests/build.yaml
+          ' src/llama_stack/distributions/ci-tests/build.yaml
 
       - name: Build UBI9 container image
         run: |
-          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
+          BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
           docker build . \
             -f containers/Containerfile \
             --build-arg INSTALL_MODE=editable \
             --build-arg DISTRO_NAME=ci-tests \
             --build-arg BASE_IMAGE="$BASE_IMAGE" \
-            --build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
+            --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
             -t llama-stack:ci-tests-ubi9
 
       - name: Inspect UBI9 image
diff --git a/.github/workflows/providers-list-deps.yml b/.github/workflows/providers-list-deps.yml
index e30e1e5fb..88659dbe3 100644
--- a/.github/workflows/providers-list-deps.yml
+++ b/.github/workflows/providers-list-deps.yml
@@ -7,22 +7,22 @@ on:
     branches:
       - main
     paths:
-      - 'llama_stack/cli/stack/list_deps.py'
-      - 'llama_stack/cli/stack/_list_deps.py'
-      - 'llama_stack/core/build.*'
-      - 'llama_stack/core/*.sh'
+      - 'src/llama_stack/cli/stack/list_deps.py'
+      - 'src/llama_stack/cli/stack/_list_deps.py'
+      - 'src/llama_stack/core/build.*'
+      - 'src/llama_stack/core/*.sh'
       - '.github/workflows/providers-list-deps.yml'
-      - 'llama_stack/templates/**'
+      - 'src/llama_stack/templates/**'
       - 'pyproject.toml'
 
   pull_request:
     paths:
-      - 'llama_stack/cli/stack/list_deps.py'
-      - 'llama_stack/cli/stack/_list_deps.py'
-      - 'llama_stack/core/build.*'
-      - 'llama_stack/core/*.sh'
+      - 'src/llama_stack/cli/stack/list_deps.py'
+      - 'src/llama_stack/cli/stack/_list_deps.py'
+      - 'src/llama_stack/core/build.*'
+      - 'src/llama_stack/core/*.sh'
       - '.github/workflows/providers-list-deps.yml'
-      - 'llama_stack/templates/**'
+      - 'src/llama_stack/templates/**'
       - 'pyproject.toml'
 
 concurrency:
@@ -41,7 +41,7 @@ jobs:
       - name: Generate Distribution List
         id: set-matrix
         run: |
-          distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
+          distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
           echo "distros=$distros" >> "$GITHUB_OUTPUT"
 
   list-deps:
@@ -102,4 +102,4 @@ jobs:
           USE_COPY_NOT_MOUNT: "true"
           LLAMA_STACK_DIR: "."
         run: |
-          uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
+          uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml
diff --git a/.github/workflows/python-build-test.yml b/.github/workflows/python-build-test.yml
index e36ea8780..49caea6b3 100644
--- a/.github/workflows/python-build-test.yml
+++ b/.github/workflows/python-build-test.yml
@@ -10,7 +10,7 @@ on:
     branches:
       - main
     paths-ignore:
-        - 'llama_stack/ui/**'
+        - 'src/llama_stack/ui/**'
 
 jobs:
   build:
diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml
index ded29dc8f..39f2356aa 100644
--- a/.github/workflows/test-external-provider-module.yml
+++ b/.github/workflows/test-external-provider-module.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/**'
+      - 'src/llama_stack/**'
       - 'tests/integration/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml
index 19cc2057d..d1d88c688 100644
--- a/.github/workflows/test-external.yml
+++ b/.github/workflows/test-external.yml
@@ -8,8 +8,8 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/**'
-      - '!llama_stack/ui/**'
+      - 'src/llama_stack/**'
+      - '!src/llama_stack/ui/**'
       - 'tests/integration/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.github/workflows/ui-unit-tests.yml b/.github/workflows/ui-unit-tests.yml
index e8f318b8e..a2ae1c2c3 100644
--- a/.github/workflows/ui-unit-tests.yml
+++ b/.github/workflows/ui-unit-tests.yml
@@ -8,7 +8,7 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/ui/**'
+      - 'src/llama_stack/ui/**'
       - '.github/workflows/ui-unit-tests.yml' # This workflow
   workflow_dispatch:
 
@@ -33,22 +33,22 @@ jobs:
         with:
           node-version: ${{ matrix.node-version }}
           cache: 'npm'
-          cache-dependency-path: 'llama_stack/ui/package-lock.json'
+          cache-dependency-path: 'src/llama_stack/ui/package-lock.json'
 
       - name: Install dependencies
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
         run: npm ci
 
       - name: Run linting
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
         run: npm run lint
 
       - name: Run format check
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
         run: npm run format:check
 
       - name: Run unit tests
-        working-directory: llama_stack/ui
+        working-directory: src/llama_stack/ui
         env:
           CI: true
 
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
index 4c3b68624..182643721 100644
--- a/.github/workflows/unit-tests.yml
+++ b/.github/workflows/unit-tests.yml
@@ -8,8 +8,8 @@ on:
   pull_request:
     branches: [ main ]
     paths:
-      - 'llama_stack/**'
-      - '!llama_stack/ui/**'
+      - 'src/llama_stack/**'
+      - '!src/llama_stack/ui/**'
       - 'tests/unit/**'
       - 'uv.lock'
       - 'pyproject.toml'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b7880a9fc..1aac22f6c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -42,7 +42,7 @@ repos:
     hooks:
     -   id: ruff
         args: [ --fix ]
-        exclude: ^llama_stack/strong_typing/.*$
+        exclude: ^src/llama_stack/strong_typing/.*$
     -   id: ruff-format
 
 -   repo: https://github.com/adamchainz/blacken-docs
@@ -86,7 +86,7 @@ repos:
         language: python
         pass_filenames: false
         require_serial: true
-        files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
+        files: ^src/llama_stack/distributions/.*$|^src/llama_stack/providers/.*/inference/.*/models\.py$
       - id: provider-codegen
         name: Provider Codegen
         additional_dependencies:
@@ -95,7 +95,7 @@ repos:
         language: python
         pass_filenames: false
         require_serial: true
-        files: ^llama_stack/providers/.*$
+        files: ^src/llama_stack/providers/.*$
       - id: openapi-codegen
         name: API Spec Codegen
         additional_dependencies:
@@ -104,7 +104,7 @@ repos:
         language: python
         pass_filenames: false
         require_serial: true
-        files: ^llama_stack/apis/|^docs/openapi_generator/
+        files: ^src/llama_stack/apis/|^docs/openapi_generator/
       - id: check-workflows-use-hashes
         name: Check GitHub Actions use SHA-pinned actions
         entry: ./scripts/check-workflows-use-hashes.sh
@@ -120,7 +120,7 @@ repos:
         pass_filenames: false
         require_serial: true
         always_run: true
-        files: ^llama_stack/.*$
+        files: ^src/llama_stack/.*$
       - id: forbid-pytest-asyncio
         name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture
         entry: bash
@@ -150,7 +150,7 @@ repos:
         name: Format & Lint UI
         entry: bash ./scripts/run-ui-linter.sh
         language: system
-        files: ^llama_stack/ui/.*\.(ts|tsx)$
+        files: ^src/llama_stack/ui/.*\.(ts|tsx)$
         pass_filenames: false
         require_serial: true
 
diff --git a/MANIFEST.in b/MANIFEST.in
index b10795c92..09206f2fb 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,11 +1,11 @@
 include pyproject.toml
-include llama_stack/models/llama/llama3/tokenizer.model
-include llama_stack/models/llama/llama4/tokenizer.model
-include llama_stack/core/*.sh
-include llama_stack/cli/scripts/*.sh
-include llama_stack/distributions/*/*.yaml
-exclude llama_stack/distributions/ci-tests
+include src/llama_stack/models/llama/llama3/tokenizer.model
+include src/llama_stack/models/llama/llama4/tokenizer.model
+include src/llama_stack/core/*.sh
+include src/llama_stack/cli/scripts/*.sh
+include src/llama_stack/distributions/*/*.yaml
+exclude src/llama_stack/distributions/ci-tests
 include tests/integration/test_cases/inference/*.json
-include llama_stack/models/llama/*/*.md
-include llama_stack/tests/integration/*.jpg
-prune llama_stack/distributions/ci-tests
+include src/llama_stack/models/llama/*/*.md
+include src/llama_stack/tests/integration/*.jpg
+prune src/llama_stack/distributions/ci-tests
diff --git a/pyproject.toml b/pyproject.toml
index 741dd17e5..9b26f7ae8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -150,7 +150,7 @@ llama = "llama_stack.cli.llama:main"
 install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
 
 [tool.setuptools.packages.find]
-where = ["."]
+where = ["src"]
 include = ["llama_stack", "llama_stack.*"]
 
 [[tool.uv.index]]
@@ -217,17 +217,17 @@ unfixable = [
 # Ignore the following errors for the following files
 [tool.ruff.lint.per-file-ignores]
 "tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
-"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
-"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
+"src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
+"src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
     "RUF001",
     "PLE2515",
 ]
-"llama_stack/apis/**/__init__.py" = [
+"src/llama_stack/apis/**/__init__.py" = [
     "F403",
 ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
 
 [tool.mypy]
-mypy_path = ["llama_stack"]
+mypy_path = ["src"]
 packages = ["llama_stack"]
 plugins = ['pydantic.mypy']
 disable_error_code = []
@@ -239,77 +239,77 @@ follow_imports = "silent"
 # to exclude the entire directory.
 exclude = [
     # As we fix more and more of these, we should remove them from the list
-    "^llama_stack.core/build\\.py$",
-    "^llama_stack.core/client\\.py$",
-    "^llama_stack.core/request_headers\\.py$",
-    "^llama_stack.core/routers/",
-    "^llama_stack.core/routing_tables/",
-    "^llama_stack.core/server/endpoints\\.py$",
-    "^llama_stack.core/server/server\\.py$",
-    "^llama_stack.core/stack\\.py$",
-    "^llama_stack.core/store/registry\\.py$",
-    "^llama_stack.core/utils/exec\\.py$",
-    "^llama_stack.core/utils/prompt_for_config\\.py$",
-    "^llama_stack/models/llama/llama3/interface\\.py$",
-    "^llama_stack/models/llama/llama3/tokenizer\\.py$",
-    "^llama_stack/models/llama/llama3/tool_utils\\.py$",
-    "^llama_stack/providers/inline/agents/meta_reference/",
-    "^llama_stack/providers/inline/datasetio/localfs/",
-    "^llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
-    "^llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
-    "^llama_stack/models/llama/llama3/generation\\.py$",
-    "^llama_stack/models/llama/llama3/multimodal/model\\.py$",
-    "^llama_stack/models/llama/llama4/",
-    "^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
-    "^llama_stack/providers/inline/post_training/common/validator\\.py$",
-    "^llama_stack/providers/inline/safety/code_scanner/",
-    "^llama_stack/providers/inline/safety/llama_guard/",
-    "^llama_stack/providers/inline/scoring/basic/",
-    "^llama_stack/providers/inline/scoring/braintrust/",
-    "^llama_stack/providers/inline/scoring/llm_as_judge/",
-    "^llama_stack/providers/remote/agents/sample/",
-    "^llama_stack/providers/remote/datasetio/huggingface/",
-    "^llama_stack/providers/remote/datasetio/nvidia/",
-    "^llama_stack/providers/remote/inference/bedrock/",
-    "^llama_stack/providers/remote/inference/nvidia/",
-    "^llama_stack/providers/remote/inference/passthrough/",
-    "^llama_stack/providers/remote/inference/runpod/",
-    "^llama_stack/providers/remote/inference/tgi/",
-    "^llama_stack/providers/remote/inference/watsonx/",
-    "^llama_stack/providers/remote/safety/bedrock/",
-    "^llama_stack/providers/remote/safety/nvidia/",
-    "^llama_stack/providers/remote/safety/sambanova/",
-    "^llama_stack/providers/remote/safety/sample/",
-    "^llama_stack/providers/remote/tool_runtime/bing_search/",
-    "^llama_stack/providers/remote/tool_runtime/brave_search/",
-    "^llama_stack/providers/remote/tool_runtime/model_context_protocol/",
-    "^llama_stack/providers/remote/tool_runtime/tavily_search/",
-    "^llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
-    "^llama_stack/providers/remote/post_training/nvidia/",
-    "^llama_stack/providers/remote/vector_io/chroma/",
-    "^llama_stack/providers/remote/vector_io/milvus/",
-    "^llama_stack/providers/remote/vector_io/pgvector/",
-    "^llama_stack/providers/remote/vector_io/qdrant/",
-    "^llama_stack/providers/remote/vector_io/sample/",
-    "^llama_stack/providers/remote/vector_io/weaviate/",
-    "^llama_stack/providers/utils/bedrock/client\\.py$",
-    "^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
-    "^llama_stack/providers/utils/inference/embedding_mixin\\.py$",
-    "^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
-    "^llama_stack/providers/utils/inference/model_registry\\.py$",
-    "^llama_stack/providers/utils/inference/openai_compat\\.py$",
-    "^llama_stack/providers/utils/inference/prompt_adapter\\.py$",
-    "^llama_stack/providers/utils/kvstore/kvstore\\.py$",
-    "^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
-    "^llama_stack/providers/utils/kvstore/redis/redis\\.py$",
-    "^llama_stack/providers/utils/memory/vector_store\\.py$",
-    "^llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
-    "^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
-    "^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
-    "^llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
-    "^llama_stack/providers/utils/telemetry/tracing\\.py$",
-    "^llama_stack/strong_typing/auxiliary\\.py$",
-    "^llama_stack/distributions/template\\.py$",
+    "^src/llama_stack/core/build\\.py$",
+    "^src/llama_stack/core/client\\.py$",
+    "^src/llama_stack/core/request_headers\\.py$",
+    "^src/llama_stack/core/routers/",
+    "^src/llama_stack/core/routing_tables/",
+    "^src/llama_stack/core/server/endpoints\\.py$",
+    "^src/llama_stack/core/server/server\\.py$",
+    "^src/llama_stack/core/stack\\.py$",
+    "^src/llama_stack/core/store/registry\\.py$",
+    "^src/llama_stack/core/utils/exec\\.py$",
+    "^src/llama_stack/core/utils/prompt_for_config\\.py$",
+    "^src/llama_stack/models/llama/llama3/interface\\.py$",
+    "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
+    "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
+    "^src/llama_stack/providers/inline/agents/meta_reference/",
+    "^src/llama_stack/providers/inline/datasetio/localfs/",
+    "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
+    "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
+    "^src/llama_stack/models/llama/llama3/generation\\.py$",
+    "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
+    "^src/llama_stack/models/llama/llama4/",
+    "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
+    "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
+    "^src/llama_stack/providers/inline/safety/code_scanner/",
+    "^src/llama_stack/providers/inline/safety/llama_guard/",
+    "^src/llama_stack/providers/inline/scoring/basic/",
+    "^src/llama_stack/providers/inline/scoring/braintrust/",
+    "^src/llama_stack/providers/inline/scoring/llm_as_judge/",
+    "^src/llama_stack/providers/remote/agents/sample/",
+    "^src/llama_stack/providers/remote/datasetio/huggingface/",
+    "^src/llama_stack/providers/remote/datasetio/nvidia/",
+    "^src/llama_stack/providers/remote/inference/bedrock/",
+    "^src/llama_stack/providers/remote/inference/nvidia/",
+    "^src/llama_stack/providers/remote/inference/passthrough/",
+    "^src/llama_stack/providers/remote/inference/runpod/",
+    "^src/llama_stack/providers/remote/inference/tgi/",
+    "^src/llama_stack/providers/remote/inference/watsonx/",
+    "^src/llama_stack/providers/remote/safety/bedrock/",
+    "^src/llama_stack/providers/remote/safety/nvidia/",
+    "^src/llama_stack/providers/remote/safety/sambanova/",
+    "^src/llama_stack/providers/remote/safety/sample/",
+    "^src/llama_stack/providers/remote/tool_runtime/bing_search/",
+    "^src/llama_stack/providers/remote/tool_runtime/brave_search/",
+    "^src/llama_stack/providers/remote/tool_runtime/model_context_protocol/",
+    "^src/llama_stack/providers/remote/tool_runtime/tavily_search/",
+    "^src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
+    "^src/llama_stack/providers/remote/post_training/nvidia/",
+    "^src/llama_stack/providers/remote/vector_io/chroma/",
+    "^src/llama_stack/providers/remote/vector_io/milvus/",
+    "^src/llama_stack/providers/remote/vector_io/pgvector/",
+    "^src/llama_stack/providers/remote/vector_io/qdrant/",
+    "^src/llama_stack/providers/remote/vector_io/sample/",
+    "^src/llama_stack/providers/remote/vector_io/weaviate/",
+    "^src/llama_stack/providers/utils/bedrock/client\\.py$",
+    "^src/llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
+    "^src/llama_stack/providers/utils/inference/embedding_mixin\\.py$",
+    "^src/llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
+    "^src/llama_stack/providers/utils/inference/model_registry\\.py$",
+    "^src/llama_stack/providers/utils/inference/openai_compat\\.py$",
+    "^src/llama_stack/providers/utils/inference/prompt_adapter\\.py$",
+    "^src/llama_stack/providers/utils/kvstore/kvstore\\.py$",
+    "^src/llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
+    "^src/llama_stack/providers/utils/kvstore/redis/redis\\.py$",
+    "^src/llama_stack/providers/utils/memory/vector_store\\.py$",
+    "^src/llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
+    "^src/llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
+    "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
+    "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
+    "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
+    "^src/llama_stack/strong_typing/auxiliary\\.py$",
+    "^src/llama_stack/distributions/template\\.py$",
 ]
 
 [[tool.mypy.overrides]]
diff --git a/scripts/check-init-py.sh b/scripts/check-init-py.sh
index c6e8fd417..76b3ed8e2 100755
--- a/scripts/check-init-py.sh
+++ b/scripts/check-init-py.sh
@@ -16,7 +16,7 @@ if (( BASH_VERSINFO[0] < 4 )); then
     exit 1
 fi
 
-PACKAGE_DIR="${1:-llama_stack}"
+PACKAGE_DIR="${1:-src/llama_stack}"
 
 if [ ! -d "$PACKAGE_DIR" ]; then
     echo "ERROR: Package directory '$PACKAGE_DIR' does not exist"
diff --git a/scripts/distro_codegen.py b/scripts/distro_codegen.py
index ff5025b78..68190c7f5 100755
--- a/scripts/distro_codegen.py
+++ b/scripts/distro_codegen.py
@@ -93,7 +93,7 @@ def pre_import_distros(distro_dirs: list[Path]) -> None:
 
 
 def main():
-    distros_dir = REPO_ROOT / "llama_stack" / "distributions"
+    distros_dir = REPO_ROOT / "src" / "llama_stack" / "distributions"
     change_tracker = ChangedPathTracker()
 
     with Progress(
diff --git a/scripts/run-ui-linter.sh b/scripts/run-ui-linter.sh
index 3ced4483b..b63c44e7a 100755
--- a/scripts/run-ui-linter.sh
+++ b/scripts/run-ui-linter.sh
@@ -6,7 +6,7 @@
 # the root directory of this source tree.
 
 set -e
-cd llama_stack/ui
+cd src/llama_stack/ui
 
 if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then
   echo "UI dependencies not installed, skipping prettier/linter check"
diff --git a/scripts/unit-tests.sh b/scripts/unit-tests.sh
index ff42d3039..481c6fc95 100755
--- a/scripts/unit-tests.sh
+++ b/scripts/unit-tests.sh
@@ -27,4 +27,4 @@ fi
 
 # Run unit tests with coverage
 uv run --python "$PYTHON_VERSION" --with-editable . --group unit \
-    coverage run --source=llama_stack -m pytest -s -v tests/unit/ "$@"
+    coverage run --source=src/llama_stack -m pytest -s -v tests/unit/ "$@"
diff --git a/llama_stack/__init__.py b/src/llama_stack/__init__.py
similarity index 100%
rename from llama_stack/__init__.py
rename to src/llama_stack/__init__.py
diff --git a/llama_stack/apis/__init__.py b/src/llama_stack/apis/__init__.py
similarity index 100%
rename from llama_stack/apis/__init__.py
rename to src/llama_stack/apis/__init__.py
diff --git a/llama_stack/apis/agents/__init__.py b/src/llama_stack/apis/agents/__init__.py
similarity index 100%
rename from llama_stack/apis/agents/__init__.py
rename to src/llama_stack/apis/agents/__init__.py
diff --git a/llama_stack/apis/agents/agents.py b/src/llama_stack/apis/agents/agents.py
similarity index 100%
rename from llama_stack/apis/agents/agents.py
rename to src/llama_stack/apis/agents/agents.py
diff --git a/llama_stack/apis/agents/openai_responses.py b/src/llama_stack/apis/agents/openai_responses.py
similarity index 100%
rename from llama_stack/apis/agents/openai_responses.py
rename to src/llama_stack/apis/agents/openai_responses.py
diff --git a/llama_stack/apis/batches/__init__.py b/src/llama_stack/apis/batches/__init__.py
similarity index 100%
rename from llama_stack/apis/batches/__init__.py
rename to src/llama_stack/apis/batches/__init__.py
diff --git a/llama_stack/apis/batches/batches.py b/src/llama_stack/apis/batches/batches.py
similarity index 100%
rename from llama_stack/apis/batches/batches.py
rename to src/llama_stack/apis/batches/batches.py
diff --git a/llama_stack/apis/benchmarks/__init__.py b/src/llama_stack/apis/benchmarks/__init__.py
similarity index 100%
rename from llama_stack/apis/benchmarks/__init__.py
rename to src/llama_stack/apis/benchmarks/__init__.py
diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/src/llama_stack/apis/benchmarks/benchmarks.py
similarity index 100%
rename from llama_stack/apis/benchmarks/benchmarks.py
rename to src/llama_stack/apis/benchmarks/benchmarks.py
diff --git a/llama_stack/apis/common/__init__.py b/src/llama_stack/apis/common/__init__.py
similarity index 100%
rename from llama_stack/apis/common/__init__.py
rename to src/llama_stack/apis/common/__init__.py
diff --git a/llama_stack/apis/common/content_types.py b/src/llama_stack/apis/common/content_types.py
similarity index 100%
rename from llama_stack/apis/common/content_types.py
rename to src/llama_stack/apis/common/content_types.py
diff --git a/llama_stack/apis/common/errors.py b/src/llama_stack/apis/common/errors.py
similarity index 100%
rename from llama_stack/apis/common/errors.py
rename to src/llama_stack/apis/common/errors.py
diff --git a/llama_stack/apis/common/job_types.py b/src/llama_stack/apis/common/job_types.py
similarity index 100%
rename from llama_stack/apis/common/job_types.py
rename to src/llama_stack/apis/common/job_types.py
diff --git a/llama_stack/apis/common/responses.py b/src/llama_stack/apis/common/responses.py
similarity index 100%
rename from llama_stack/apis/common/responses.py
rename to src/llama_stack/apis/common/responses.py
diff --git a/llama_stack/apis/common/training_types.py b/src/llama_stack/apis/common/training_types.py
similarity index 100%
rename from llama_stack/apis/common/training_types.py
rename to src/llama_stack/apis/common/training_types.py
diff --git a/llama_stack/apis/common/type_system.py b/src/llama_stack/apis/common/type_system.py
similarity index 100%
rename from llama_stack/apis/common/type_system.py
rename to src/llama_stack/apis/common/type_system.py
diff --git a/llama_stack/apis/conversations/__init__.py b/src/llama_stack/apis/conversations/__init__.py
similarity index 100%
rename from llama_stack/apis/conversations/__init__.py
rename to src/llama_stack/apis/conversations/__init__.py
diff --git a/llama_stack/apis/conversations/conversations.py b/src/llama_stack/apis/conversations/conversations.py
similarity index 100%
rename from llama_stack/apis/conversations/conversations.py
rename to src/llama_stack/apis/conversations/conversations.py
diff --git a/llama_stack/apis/datasetio/__init__.py b/src/llama_stack/apis/datasetio/__init__.py
similarity index 100%
rename from llama_stack/apis/datasetio/__init__.py
rename to src/llama_stack/apis/datasetio/__init__.py
diff --git a/llama_stack/apis/datasetio/datasetio.py b/src/llama_stack/apis/datasetio/datasetio.py
similarity index 100%
rename from llama_stack/apis/datasetio/datasetio.py
rename to src/llama_stack/apis/datasetio/datasetio.py
diff --git a/llama_stack/apis/datasets/__init__.py b/src/llama_stack/apis/datasets/__init__.py
similarity index 100%
rename from llama_stack/apis/datasets/__init__.py
rename to src/llama_stack/apis/datasets/__init__.py
diff --git a/llama_stack/apis/datasets/datasets.py b/src/llama_stack/apis/datasets/datasets.py
similarity index 100%
rename from llama_stack/apis/datasets/datasets.py
rename to src/llama_stack/apis/datasets/datasets.py
diff --git a/llama_stack/apis/datatypes.py b/src/llama_stack/apis/datatypes.py
similarity index 100%
rename from llama_stack/apis/datatypes.py
rename to src/llama_stack/apis/datatypes.py
diff --git a/llama_stack/apis/eval/__init__.py b/src/llama_stack/apis/eval/__init__.py
similarity index 100%
rename from llama_stack/apis/eval/__init__.py
rename to src/llama_stack/apis/eval/__init__.py
diff --git a/llama_stack/apis/eval/eval.py b/src/llama_stack/apis/eval/eval.py
similarity index 100%
rename from llama_stack/apis/eval/eval.py
rename to src/llama_stack/apis/eval/eval.py
diff --git a/llama_stack/apis/files/__init__.py b/src/llama_stack/apis/files/__init__.py
similarity index 100%
rename from llama_stack/apis/files/__init__.py
rename to src/llama_stack/apis/files/__init__.py
diff --git a/llama_stack/apis/files/files.py b/src/llama_stack/apis/files/files.py
similarity index 100%
rename from llama_stack/apis/files/files.py
rename to src/llama_stack/apis/files/files.py
diff --git a/llama_stack/apis/inference/__init__.py b/src/llama_stack/apis/inference/__init__.py
similarity index 100%
rename from llama_stack/apis/inference/__init__.py
rename to src/llama_stack/apis/inference/__init__.py
diff --git a/llama_stack/apis/inference/event_logger.py b/src/llama_stack/apis/inference/event_logger.py
similarity index 100%
rename from llama_stack/apis/inference/event_logger.py
rename to src/llama_stack/apis/inference/event_logger.py
diff --git a/llama_stack/apis/inference/inference.py b/src/llama_stack/apis/inference/inference.py
similarity index 100%
rename from llama_stack/apis/inference/inference.py
rename to src/llama_stack/apis/inference/inference.py
diff --git a/llama_stack/apis/inspect/__init__.py b/src/llama_stack/apis/inspect/__init__.py
similarity index 100%
rename from llama_stack/apis/inspect/__init__.py
rename to src/llama_stack/apis/inspect/__init__.py
diff --git a/llama_stack/apis/inspect/inspect.py b/src/llama_stack/apis/inspect/inspect.py
similarity index 100%
rename from llama_stack/apis/inspect/inspect.py
rename to src/llama_stack/apis/inspect/inspect.py
diff --git a/llama_stack/apis/models/__init__.py b/src/llama_stack/apis/models/__init__.py
similarity index 100%
rename from llama_stack/apis/models/__init__.py
rename to src/llama_stack/apis/models/__init__.py
diff --git a/llama_stack/apis/models/models.py b/src/llama_stack/apis/models/models.py
similarity index 100%
rename from llama_stack/apis/models/models.py
rename to src/llama_stack/apis/models/models.py
diff --git a/llama_stack/apis/post_training/__init__.py b/src/llama_stack/apis/post_training/__init__.py
similarity index 100%
rename from llama_stack/apis/post_training/__init__.py
rename to src/llama_stack/apis/post_training/__init__.py
diff --git a/llama_stack/apis/post_training/post_training.py b/src/llama_stack/apis/post_training/post_training.py
similarity index 100%
rename from llama_stack/apis/post_training/post_training.py
rename to src/llama_stack/apis/post_training/post_training.py
diff --git a/llama_stack/apis/prompts/__init__.py b/src/llama_stack/apis/prompts/__init__.py
similarity index 100%
rename from llama_stack/apis/prompts/__init__.py
rename to src/llama_stack/apis/prompts/__init__.py
diff --git a/llama_stack/apis/prompts/prompts.py b/src/llama_stack/apis/prompts/prompts.py
similarity index 100%
rename from llama_stack/apis/prompts/prompts.py
rename to src/llama_stack/apis/prompts/prompts.py
diff --git a/llama_stack/apis/providers/__init__.py b/src/llama_stack/apis/providers/__init__.py
similarity index 100%
rename from llama_stack/apis/providers/__init__.py
rename to src/llama_stack/apis/providers/__init__.py
diff --git a/llama_stack/apis/providers/providers.py b/src/llama_stack/apis/providers/providers.py
similarity index 100%
rename from llama_stack/apis/providers/providers.py
rename to src/llama_stack/apis/providers/providers.py
diff --git a/llama_stack/apis/resource.py b/src/llama_stack/apis/resource.py
similarity index 100%
rename from llama_stack/apis/resource.py
rename to src/llama_stack/apis/resource.py
diff --git a/llama_stack/apis/safety/__init__.py b/src/llama_stack/apis/safety/__init__.py
similarity index 100%
rename from llama_stack/apis/safety/__init__.py
rename to src/llama_stack/apis/safety/__init__.py
diff --git a/llama_stack/apis/safety/safety.py b/src/llama_stack/apis/safety/safety.py
similarity index 100%
rename from llama_stack/apis/safety/safety.py
rename to src/llama_stack/apis/safety/safety.py
diff --git a/llama_stack/apis/scoring/__init__.py b/src/llama_stack/apis/scoring/__init__.py
similarity index 100%
rename from llama_stack/apis/scoring/__init__.py
rename to src/llama_stack/apis/scoring/__init__.py
diff --git a/llama_stack/apis/scoring/scoring.py b/src/llama_stack/apis/scoring/scoring.py
similarity index 100%
rename from llama_stack/apis/scoring/scoring.py
rename to src/llama_stack/apis/scoring/scoring.py
diff --git a/llama_stack/apis/scoring_functions/__init__.py b/src/llama_stack/apis/scoring_functions/__init__.py
similarity index 100%
rename from llama_stack/apis/scoring_functions/__init__.py
rename to src/llama_stack/apis/scoring_functions/__init__.py
diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/src/llama_stack/apis/scoring_functions/scoring_functions.py
similarity index 100%
rename from llama_stack/apis/scoring_functions/scoring_functions.py
rename to src/llama_stack/apis/scoring_functions/scoring_functions.py
diff --git a/llama_stack/apis/shields/__init__.py b/src/llama_stack/apis/shields/__init__.py
similarity index 100%
rename from llama_stack/apis/shields/__init__.py
rename to src/llama_stack/apis/shields/__init__.py
diff --git a/llama_stack/apis/shields/shields.py b/src/llama_stack/apis/shields/shields.py
similarity index 100%
rename from llama_stack/apis/shields/shields.py
rename to src/llama_stack/apis/shields/shields.py
diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/src/llama_stack/apis/synthetic_data_generation/__init__.py
similarity index 100%
rename from llama_stack/apis/synthetic_data_generation/__init__.py
rename to src/llama_stack/apis/synthetic_data_generation/__init__.py
diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/src/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
similarity index 100%
rename from llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
rename to src/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
diff --git a/llama_stack/apis/telemetry/__init__.py b/src/llama_stack/apis/telemetry/__init__.py
similarity index 100%
rename from llama_stack/apis/telemetry/__init__.py
rename to src/llama_stack/apis/telemetry/__init__.py
diff --git a/llama_stack/apis/telemetry/telemetry.py b/src/llama_stack/apis/telemetry/telemetry.py
similarity index 100%
rename from llama_stack/apis/telemetry/telemetry.py
rename to src/llama_stack/apis/telemetry/telemetry.py
diff --git a/llama_stack/apis/tools/__init__.py b/src/llama_stack/apis/tools/__init__.py
similarity index 100%
rename from llama_stack/apis/tools/__init__.py
rename to src/llama_stack/apis/tools/__init__.py
diff --git a/llama_stack/apis/tools/rag_tool.py b/src/llama_stack/apis/tools/rag_tool.py
similarity index 100%
rename from llama_stack/apis/tools/rag_tool.py
rename to src/llama_stack/apis/tools/rag_tool.py
diff --git a/llama_stack/apis/tools/tools.py b/src/llama_stack/apis/tools/tools.py
similarity index 100%
rename from llama_stack/apis/tools/tools.py
rename to src/llama_stack/apis/tools/tools.py
diff --git a/llama_stack/apis/vector_io/__init__.py b/src/llama_stack/apis/vector_io/__init__.py
similarity index 100%
rename from llama_stack/apis/vector_io/__init__.py
rename to src/llama_stack/apis/vector_io/__init__.py
diff --git a/llama_stack/apis/vector_io/vector_io.py b/src/llama_stack/apis/vector_io/vector_io.py
similarity index 100%
rename from llama_stack/apis/vector_io/vector_io.py
rename to src/llama_stack/apis/vector_io/vector_io.py
diff --git a/llama_stack/apis/vector_stores/__init__.py b/src/llama_stack/apis/vector_stores/__init__.py
similarity index 100%
rename from llama_stack/apis/vector_stores/__init__.py
rename to src/llama_stack/apis/vector_stores/__init__.py
diff --git a/llama_stack/apis/vector_stores/vector_stores.py b/src/llama_stack/apis/vector_stores/vector_stores.py
similarity index 100%
rename from llama_stack/apis/vector_stores/vector_stores.py
rename to src/llama_stack/apis/vector_stores/vector_stores.py
diff --git a/llama_stack/apis/version.py b/src/llama_stack/apis/version.py
similarity index 100%
rename from llama_stack/apis/version.py
rename to src/llama_stack/apis/version.py
diff --git a/llama_stack/cli/__init__.py b/src/llama_stack/cli/__init__.py
similarity index 100%
rename from llama_stack/cli/__init__.py
rename to src/llama_stack/cli/__init__.py
diff --git a/llama_stack/cli/llama.py b/src/llama_stack/cli/llama.py
similarity index 100%
rename from llama_stack/cli/llama.py
rename to src/llama_stack/cli/llama.py
diff --git a/llama_stack/cli/scripts/__init__.py b/src/llama_stack/cli/scripts/__init__.py
similarity index 100%
rename from llama_stack/cli/scripts/__init__.py
rename to src/llama_stack/cli/scripts/__init__.py
diff --git a/llama_stack/cli/scripts/install-wheel-from-presigned.sh b/src/llama_stack/cli/scripts/install-wheel-from-presigned.sh
similarity index 100%
rename from llama_stack/cli/scripts/install-wheel-from-presigned.sh
rename to src/llama_stack/cli/scripts/install-wheel-from-presigned.sh
diff --git a/llama_stack/cli/scripts/run.py b/src/llama_stack/cli/scripts/run.py
similarity index 100%
rename from llama_stack/cli/scripts/run.py
rename to src/llama_stack/cli/scripts/run.py
diff --git a/llama_stack/cli/stack/__init__.py b/src/llama_stack/cli/stack/__init__.py
similarity index 100%
rename from llama_stack/cli/stack/__init__.py
rename to src/llama_stack/cli/stack/__init__.py
diff --git a/llama_stack/cli/stack/_list_deps.py b/src/llama_stack/cli/stack/_list_deps.py
similarity index 100%
rename from llama_stack/cli/stack/_list_deps.py
rename to src/llama_stack/cli/stack/_list_deps.py
diff --git a/llama_stack/cli/stack/list_apis.py b/src/llama_stack/cli/stack/list_apis.py
similarity index 100%
rename from llama_stack/cli/stack/list_apis.py
rename to src/llama_stack/cli/stack/list_apis.py
diff --git a/llama_stack/cli/stack/list_deps.py b/src/llama_stack/cli/stack/list_deps.py
similarity index 100%
rename from llama_stack/cli/stack/list_deps.py
rename to src/llama_stack/cli/stack/list_deps.py
diff --git a/llama_stack/cli/stack/list_providers.py b/src/llama_stack/cli/stack/list_providers.py
similarity index 100%
rename from llama_stack/cli/stack/list_providers.py
rename to src/llama_stack/cli/stack/list_providers.py
diff --git a/llama_stack/cli/stack/list_stacks.py b/src/llama_stack/cli/stack/list_stacks.py
similarity index 100%
rename from llama_stack/cli/stack/list_stacks.py
rename to src/llama_stack/cli/stack/list_stacks.py
diff --git a/llama_stack/cli/stack/remove.py b/src/llama_stack/cli/stack/remove.py
similarity index 100%
rename from llama_stack/cli/stack/remove.py
rename to src/llama_stack/cli/stack/remove.py
diff --git a/llama_stack/cli/stack/run.py b/src/llama_stack/cli/stack/run.py
similarity index 100%
rename from llama_stack/cli/stack/run.py
rename to src/llama_stack/cli/stack/run.py
diff --git a/llama_stack/cli/stack/stack.py b/src/llama_stack/cli/stack/stack.py
similarity index 100%
rename from llama_stack/cli/stack/stack.py
rename to src/llama_stack/cli/stack/stack.py
diff --git a/llama_stack/cli/stack/utils.py b/src/llama_stack/cli/stack/utils.py
similarity index 100%
rename from llama_stack/cli/stack/utils.py
rename to src/llama_stack/cli/stack/utils.py
diff --git a/llama_stack/cli/subcommand.py b/src/llama_stack/cli/subcommand.py
similarity index 100%
rename from llama_stack/cli/subcommand.py
rename to src/llama_stack/cli/subcommand.py
diff --git a/llama_stack/cli/table.py b/src/llama_stack/cli/table.py
similarity index 100%
rename from llama_stack/cli/table.py
rename to src/llama_stack/cli/table.py
diff --git a/llama_stack/cli/utils.py b/src/llama_stack/cli/utils.py
similarity index 100%
rename from llama_stack/cli/utils.py
rename to src/llama_stack/cli/utils.py
diff --git a/llama_stack/core/__init__.py b/src/llama_stack/core/__init__.py
similarity index 100%
rename from llama_stack/core/__init__.py
rename to src/llama_stack/core/__init__.py
diff --git a/llama_stack/core/access_control/__init__.py b/src/llama_stack/core/access_control/__init__.py
similarity index 100%
rename from llama_stack/core/access_control/__init__.py
rename to src/llama_stack/core/access_control/__init__.py
diff --git a/llama_stack/core/access_control/access_control.py b/src/llama_stack/core/access_control/access_control.py
similarity index 100%
rename from llama_stack/core/access_control/access_control.py
rename to src/llama_stack/core/access_control/access_control.py
diff --git a/llama_stack/core/access_control/conditions.py b/src/llama_stack/core/access_control/conditions.py
similarity index 100%
rename from llama_stack/core/access_control/conditions.py
rename to src/llama_stack/core/access_control/conditions.py
diff --git a/llama_stack/core/access_control/datatypes.py b/src/llama_stack/core/access_control/datatypes.py
similarity index 100%
rename from llama_stack/core/access_control/datatypes.py
rename to src/llama_stack/core/access_control/datatypes.py
diff --git a/llama_stack/core/build.py b/src/llama_stack/core/build.py
similarity index 100%
rename from llama_stack/core/build.py
rename to src/llama_stack/core/build.py
diff --git a/llama_stack/core/client.py b/src/llama_stack/core/client.py
similarity index 100%
rename from llama_stack/core/client.py
rename to src/llama_stack/core/client.py
diff --git a/llama_stack/core/common.sh b/src/llama_stack/core/common.sh
similarity index 100%
rename from llama_stack/core/common.sh
rename to src/llama_stack/core/common.sh
diff --git a/llama_stack/core/configure.py b/src/llama_stack/core/configure.py
similarity index 100%
rename from llama_stack/core/configure.py
rename to src/llama_stack/core/configure.py
diff --git a/llama_stack/core/conversations/__init__.py b/src/llama_stack/core/conversations/__init__.py
similarity index 100%
rename from llama_stack/core/conversations/__init__.py
rename to src/llama_stack/core/conversations/__init__.py
diff --git a/llama_stack/core/conversations/conversations.py b/src/llama_stack/core/conversations/conversations.py
similarity index 100%
rename from llama_stack/core/conversations/conversations.py
rename to src/llama_stack/core/conversations/conversations.py
diff --git a/llama_stack/core/datatypes.py b/src/llama_stack/core/datatypes.py
similarity index 100%
rename from llama_stack/core/datatypes.py
rename to src/llama_stack/core/datatypes.py
diff --git a/llama_stack/core/distribution.py b/src/llama_stack/core/distribution.py
similarity index 100%
rename from llama_stack/core/distribution.py
rename to src/llama_stack/core/distribution.py
diff --git a/llama_stack/core/external.py b/src/llama_stack/core/external.py
similarity index 100%
rename from llama_stack/core/external.py
rename to src/llama_stack/core/external.py
diff --git a/llama_stack/core/id_generation.py b/src/llama_stack/core/id_generation.py
similarity index 100%
rename from llama_stack/core/id_generation.py
rename to src/llama_stack/core/id_generation.py
diff --git a/llama_stack/core/inspect.py b/src/llama_stack/core/inspect.py
similarity index 100%
rename from llama_stack/core/inspect.py
rename to src/llama_stack/core/inspect.py
diff --git a/llama_stack/core/library_client.py b/src/llama_stack/core/library_client.py
similarity index 100%
rename from llama_stack/core/library_client.py
rename to src/llama_stack/core/library_client.py
diff --git a/llama_stack/core/prompts/__init__.py b/src/llama_stack/core/prompts/__init__.py
similarity index 100%
rename from llama_stack/core/prompts/__init__.py
rename to src/llama_stack/core/prompts/__init__.py
diff --git a/llama_stack/core/prompts/prompts.py b/src/llama_stack/core/prompts/prompts.py
similarity index 100%
rename from llama_stack/core/prompts/prompts.py
rename to src/llama_stack/core/prompts/prompts.py
diff --git a/llama_stack/core/providers.py b/src/llama_stack/core/providers.py
similarity index 100%
rename from llama_stack/core/providers.py
rename to src/llama_stack/core/providers.py
diff --git a/llama_stack/core/request_headers.py b/src/llama_stack/core/request_headers.py
similarity index 100%
rename from llama_stack/core/request_headers.py
rename to src/llama_stack/core/request_headers.py
diff --git a/llama_stack/core/resolver.py b/src/llama_stack/core/resolver.py
similarity index 100%
rename from llama_stack/core/resolver.py
rename to src/llama_stack/core/resolver.py
diff --git a/llama_stack/core/routers/__init__.py b/src/llama_stack/core/routers/__init__.py
similarity index 100%
rename from llama_stack/core/routers/__init__.py
rename to src/llama_stack/core/routers/__init__.py
diff --git a/llama_stack/core/routers/datasets.py b/src/llama_stack/core/routers/datasets.py
similarity index 100%
rename from llama_stack/core/routers/datasets.py
rename to src/llama_stack/core/routers/datasets.py
diff --git a/llama_stack/core/routers/eval_scoring.py b/src/llama_stack/core/routers/eval_scoring.py
similarity index 100%
rename from llama_stack/core/routers/eval_scoring.py
rename to src/llama_stack/core/routers/eval_scoring.py
diff --git a/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py
similarity index 100%
rename from llama_stack/core/routers/inference.py
rename to src/llama_stack/core/routers/inference.py
diff --git a/llama_stack/core/routers/safety.py b/src/llama_stack/core/routers/safety.py
similarity index 100%
rename from llama_stack/core/routers/safety.py
rename to src/llama_stack/core/routers/safety.py
diff --git a/llama_stack/core/routers/tool_runtime.py b/src/llama_stack/core/routers/tool_runtime.py
similarity index 100%
rename from llama_stack/core/routers/tool_runtime.py
rename to src/llama_stack/core/routers/tool_runtime.py
diff --git a/llama_stack/core/routers/vector_io.py b/src/llama_stack/core/routers/vector_io.py
similarity index 100%
rename from llama_stack/core/routers/vector_io.py
rename to src/llama_stack/core/routers/vector_io.py
diff --git a/llama_stack/core/routing_tables/__init__.py b/src/llama_stack/core/routing_tables/__init__.py
similarity index 100%
rename from llama_stack/core/routing_tables/__init__.py
rename to src/llama_stack/core/routing_tables/__init__.py
diff --git a/llama_stack/core/routing_tables/benchmarks.py b/src/llama_stack/core/routing_tables/benchmarks.py
similarity index 100%
rename from llama_stack/core/routing_tables/benchmarks.py
rename to src/llama_stack/core/routing_tables/benchmarks.py
diff --git a/llama_stack/core/routing_tables/common.py b/src/llama_stack/core/routing_tables/common.py
similarity index 100%
rename from llama_stack/core/routing_tables/common.py
rename to src/llama_stack/core/routing_tables/common.py
diff --git a/llama_stack/core/routing_tables/datasets.py b/src/llama_stack/core/routing_tables/datasets.py
similarity index 100%
rename from llama_stack/core/routing_tables/datasets.py
rename to src/llama_stack/core/routing_tables/datasets.py
diff --git a/llama_stack/core/routing_tables/models.py b/src/llama_stack/core/routing_tables/models.py
similarity index 100%
rename from llama_stack/core/routing_tables/models.py
rename to src/llama_stack/core/routing_tables/models.py
diff --git a/llama_stack/core/routing_tables/scoring_functions.py b/src/llama_stack/core/routing_tables/scoring_functions.py
similarity index 100%
rename from llama_stack/core/routing_tables/scoring_functions.py
rename to src/llama_stack/core/routing_tables/scoring_functions.py
diff --git a/llama_stack/core/routing_tables/shields.py b/src/llama_stack/core/routing_tables/shields.py
similarity index 100%
rename from llama_stack/core/routing_tables/shields.py
rename to src/llama_stack/core/routing_tables/shields.py
diff --git a/llama_stack/core/routing_tables/toolgroups.py b/src/llama_stack/core/routing_tables/toolgroups.py
similarity index 100%
rename from llama_stack/core/routing_tables/toolgroups.py
rename to src/llama_stack/core/routing_tables/toolgroups.py
diff --git a/llama_stack/core/routing_tables/vector_stores.py b/src/llama_stack/core/routing_tables/vector_stores.py
similarity index 100%
rename from llama_stack/core/routing_tables/vector_stores.py
rename to src/llama_stack/core/routing_tables/vector_stores.py
diff --git a/llama_stack/core/server/__init__.py b/src/llama_stack/core/server/__init__.py
similarity index 100%
rename from llama_stack/core/server/__init__.py
rename to src/llama_stack/core/server/__init__.py
diff --git a/llama_stack/core/server/auth.py b/src/llama_stack/core/server/auth.py
similarity index 100%
rename from llama_stack/core/server/auth.py
rename to src/llama_stack/core/server/auth.py
diff --git a/llama_stack/core/server/auth_providers.py b/src/llama_stack/core/server/auth_providers.py
similarity index 100%
rename from llama_stack/core/server/auth_providers.py
rename to src/llama_stack/core/server/auth_providers.py
diff --git a/llama_stack/core/server/quota.py b/src/llama_stack/core/server/quota.py
similarity index 100%
rename from llama_stack/core/server/quota.py
rename to src/llama_stack/core/server/quota.py
diff --git a/llama_stack/core/server/routes.py b/src/llama_stack/core/server/routes.py
similarity index 100%
rename from llama_stack/core/server/routes.py
rename to src/llama_stack/core/server/routes.py
diff --git a/llama_stack/core/server/server.py b/src/llama_stack/core/server/server.py
similarity index 100%
rename from llama_stack/core/server/server.py
rename to src/llama_stack/core/server/server.py
diff --git a/llama_stack/core/server/tracing.py b/src/llama_stack/core/server/tracing.py
similarity index 100%
rename from llama_stack/core/server/tracing.py
rename to src/llama_stack/core/server/tracing.py
diff --git a/llama_stack/core/stack.py b/src/llama_stack/core/stack.py
similarity index 100%
rename from llama_stack/core/stack.py
rename to src/llama_stack/core/stack.py
diff --git a/llama_stack/core/start_stack.sh b/src/llama_stack/core/start_stack.sh
similarity index 100%
rename from llama_stack/core/start_stack.sh
rename to src/llama_stack/core/start_stack.sh
diff --git a/llama_stack/core/storage/__init__.py b/src/llama_stack/core/storage/__init__.py
similarity index 100%
rename from llama_stack/core/storage/__init__.py
rename to src/llama_stack/core/storage/__init__.py
diff --git a/llama_stack/core/storage/datatypes.py b/src/llama_stack/core/storage/datatypes.py
similarity index 100%
rename from llama_stack/core/storage/datatypes.py
rename to src/llama_stack/core/storage/datatypes.py
diff --git a/llama_stack/core/store/__init__.py b/src/llama_stack/core/store/__init__.py
similarity index 100%
rename from llama_stack/core/store/__init__.py
rename to src/llama_stack/core/store/__init__.py
diff --git a/llama_stack/core/store/registry.py b/src/llama_stack/core/store/registry.py
similarity index 100%
rename from llama_stack/core/store/registry.py
rename to src/llama_stack/core/store/registry.py
diff --git a/llama_stack/core/telemetry/__init__.py b/src/llama_stack/core/telemetry/__init__.py
similarity index 100%
rename from llama_stack/core/telemetry/__init__.py
rename to src/llama_stack/core/telemetry/__init__.py
diff --git a/llama_stack/core/telemetry/telemetry.py b/src/llama_stack/core/telemetry/telemetry.py
similarity index 100%
rename from llama_stack/core/telemetry/telemetry.py
rename to src/llama_stack/core/telemetry/telemetry.py
diff --git a/llama_stack/core/telemetry/trace_protocol.py b/src/llama_stack/core/telemetry/trace_protocol.py
similarity index 100%
rename from llama_stack/core/telemetry/trace_protocol.py
rename to src/llama_stack/core/telemetry/trace_protocol.py
diff --git a/llama_stack/core/telemetry/tracing.py b/src/llama_stack/core/telemetry/tracing.py
similarity index 100%
rename from llama_stack/core/telemetry/tracing.py
rename to src/llama_stack/core/telemetry/tracing.py
diff --git a/llama_stack/core/testing_context.py b/src/llama_stack/core/testing_context.py
similarity index 100%
rename from llama_stack/core/testing_context.py
rename to src/llama_stack/core/testing_context.py
diff --git a/llama_stack/core/ui/Containerfile b/src/llama_stack/core/ui/Containerfile
similarity index 100%
rename from llama_stack/core/ui/Containerfile
rename to src/llama_stack/core/ui/Containerfile
diff --git a/llama_stack/core/ui/README.md b/src/llama_stack/core/ui/README.md
similarity index 100%
rename from llama_stack/core/ui/README.md
rename to src/llama_stack/core/ui/README.md
diff --git a/llama_stack/core/ui/__init__.py b/src/llama_stack/core/ui/__init__.py
similarity index 100%
rename from llama_stack/core/ui/__init__.py
rename to src/llama_stack/core/ui/__init__.py
diff --git a/llama_stack/core/ui/app.py b/src/llama_stack/core/ui/app.py
similarity index 100%
rename from llama_stack/core/ui/app.py
rename to src/llama_stack/core/ui/app.py
diff --git a/llama_stack/core/ui/modules/__init__.py b/src/llama_stack/core/ui/modules/__init__.py
similarity index 100%
rename from llama_stack/core/ui/modules/__init__.py
rename to src/llama_stack/core/ui/modules/__init__.py
diff --git a/llama_stack/core/ui/modules/api.py b/src/llama_stack/core/ui/modules/api.py
similarity index 100%
rename from llama_stack/core/ui/modules/api.py
rename to src/llama_stack/core/ui/modules/api.py
diff --git a/llama_stack/core/ui/modules/utils.py b/src/llama_stack/core/ui/modules/utils.py
similarity index 100%
rename from llama_stack/core/ui/modules/utils.py
rename to src/llama_stack/core/ui/modules/utils.py
diff --git a/llama_stack/core/ui/page/__init__.py b/src/llama_stack/core/ui/page/__init__.py
similarity index 100%
rename from llama_stack/core/ui/page/__init__.py
rename to src/llama_stack/core/ui/page/__init__.py
diff --git a/llama_stack/core/ui/page/distribution/__init__.py b/src/llama_stack/core/ui/page/distribution/__init__.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/__init__.py
rename to src/llama_stack/core/ui/page/distribution/__init__.py
diff --git a/llama_stack/core/ui/page/distribution/datasets.py b/src/llama_stack/core/ui/page/distribution/datasets.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/datasets.py
rename to src/llama_stack/core/ui/page/distribution/datasets.py
diff --git a/llama_stack/core/ui/page/distribution/eval_tasks.py b/src/llama_stack/core/ui/page/distribution/eval_tasks.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/eval_tasks.py
rename to src/llama_stack/core/ui/page/distribution/eval_tasks.py
diff --git a/llama_stack/core/ui/page/distribution/models.py b/src/llama_stack/core/ui/page/distribution/models.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/models.py
rename to src/llama_stack/core/ui/page/distribution/models.py
diff --git a/llama_stack/core/ui/page/distribution/providers.py b/src/llama_stack/core/ui/page/distribution/providers.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/providers.py
rename to src/llama_stack/core/ui/page/distribution/providers.py
diff --git a/llama_stack/core/ui/page/distribution/resources.py b/src/llama_stack/core/ui/page/distribution/resources.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/resources.py
rename to src/llama_stack/core/ui/page/distribution/resources.py
diff --git a/llama_stack/core/ui/page/distribution/scoring_functions.py b/src/llama_stack/core/ui/page/distribution/scoring_functions.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/scoring_functions.py
rename to src/llama_stack/core/ui/page/distribution/scoring_functions.py
diff --git a/llama_stack/core/ui/page/distribution/shields.py b/src/llama_stack/core/ui/page/distribution/shields.py
similarity index 100%
rename from llama_stack/core/ui/page/distribution/shields.py
rename to src/llama_stack/core/ui/page/distribution/shields.py
diff --git a/llama_stack/core/ui/page/evaluations/__init__.py b/src/llama_stack/core/ui/page/evaluations/__init__.py
similarity index 100%
rename from llama_stack/core/ui/page/evaluations/__init__.py
rename to src/llama_stack/core/ui/page/evaluations/__init__.py
diff --git a/llama_stack/core/ui/page/evaluations/app_eval.py b/src/llama_stack/core/ui/page/evaluations/app_eval.py
similarity index 100%
rename from llama_stack/core/ui/page/evaluations/app_eval.py
rename to src/llama_stack/core/ui/page/evaluations/app_eval.py
diff --git a/llama_stack/core/ui/page/evaluations/native_eval.py b/src/llama_stack/core/ui/page/evaluations/native_eval.py
similarity index 100%
rename from llama_stack/core/ui/page/evaluations/native_eval.py
rename to src/llama_stack/core/ui/page/evaluations/native_eval.py
diff --git a/llama_stack/core/ui/page/playground/__init__.py b/src/llama_stack/core/ui/page/playground/__init__.py
similarity index 100%
rename from llama_stack/core/ui/page/playground/__init__.py
rename to src/llama_stack/core/ui/page/playground/__init__.py
diff --git a/llama_stack/core/ui/page/playground/chat.py b/src/llama_stack/core/ui/page/playground/chat.py
similarity index 100%
rename from llama_stack/core/ui/page/playground/chat.py
rename to src/llama_stack/core/ui/page/playground/chat.py
diff --git a/llama_stack/core/ui/page/playground/tools.py b/src/llama_stack/core/ui/page/playground/tools.py
similarity index 100%
rename from llama_stack/core/ui/page/playground/tools.py
rename to src/llama_stack/core/ui/page/playground/tools.py
diff --git a/llama_stack/core/ui/requirements.txt b/src/llama_stack/core/ui/requirements.txt
similarity index 100%
rename from llama_stack/core/ui/requirements.txt
rename to src/llama_stack/core/ui/requirements.txt
diff --git a/llama_stack/core/utils/__init__.py b/src/llama_stack/core/utils/__init__.py
similarity index 100%
rename from llama_stack/core/utils/__init__.py
rename to src/llama_stack/core/utils/__init__.py
diff --git a/llama_stack/core/utils/config.py b/src/llama_stack/core/utils/config.py
similarity index 100%
rename from llama_stack/core/utils/config.py
rename to src/llama_stack/core/utils/config.py
diff --git a/llama_stack/core/utils/config_dirs.py b/src/llama_stack/core/utils/config_dirs.py
similarity index 100%
rename from llama_stack/core/utils/config_dirs.py
rename to src/llama_stack/core/utils/config_dirs.py
diff --git a/llama_stack/core/utils/config_resolution.py b/src/llama_stack/core/utils/config_resolution.py
similarity index 100%
rename from llama_stack/core/utils/config_resolution.py
rename to src/llama_stack/core/utils/config_resolution.py
diff --git a/llama_stack/core/utils/context.py b/src/llama_stack/core/utils/context.py
similarity index 100%
rename from llama_stack/core/utils/context.py
rename to src/llama_stack/core/utils/context.py
diff --git a/llama_stack/core/utils/dynamic.py b/src/llama_stack/core/utils/dynamic.py
similarity index 100%
rename from llama_stack/core/utils/dynamic.py
rename to src/llama_stack/core/utils/dynamic.py
diff --git a/llama_stack/core/utils/exec.py b/src/llama_stack/core/utils/exec.py
similarity index 100%
rename from llama_stack/core/utils/exec.py
rename to src/llama_stack/core/utils/exec.py
diff --git a/llama_stack/core/utils/image_types.py b/src/llama_stack/core/utils/image_types.py
similarity index 100%
rename from llama_stack/core/utils/image_types.py
rename to src/llama_stack/core/utils/image_types.py
diff --git a/llama_stack/core/utils/model_utils.py b/src/llama_stack/core/utils/model_utils.py
similarity index 100%
rename from llama_stack/core/utils/model_utils.py
rename to src/llama_stack/core/utils/model_utils.py
diff --git a/llama_stack/core/utils/prompt_for_config.py b/src/llama_stack/core/utils/prompt_for_config.py
similarity index 100%
rename from llama_stack/core/utils/prompt_for_config.py
rename to src/llama_stack/core/utils/prompt_for_config.py
diff --git a/llama_stack/core/utils/serialize.py b/src/llama_stack/core/utils/serialize.py
similarity index 100%
rename from llama_stack/core/utils/serialize.py
rename to src/llama_stack/core/utils/serialize.py
diff --git a/llama_stack/distributions/__init__.py b/src/llama_stack/distributions/__init__.py
similarity index 100%
rename from llama_stack/distributions/__init__.py
rename to src/llama_stack/distributions/__init__.py
diff --git a/llama_stack/distributions/ci-tests/__init__.py b/src/llama_stack/distributions/ci-tests/__init__.py
similarity index 100%
rename from llama_stack/distributions/ci-tests/__init__.py
rename to src/llama_stack/distributions/ci-tests/__init__.py
diff --git a/src/llama_stack/distributions/ci-tests/build.yaml b/src/llama_stack/distributions/ci-tests/build.yaml
new file mode 100644
index 000000000..c01e415a9
--- /dev/null
+++ b/src/llama_stack/distributions/ci-tests/build.yaml
@@ -0,0 +1,59 @@
+version: 2
+distribution_spec:
+  description: CI tests for Llama Stack
+  providers:
+    inference:
+    - provider_type: remote::cerebras
+    - provider_type: remote::ollama
+    - provider_type: remote::vllm
+    - provider_type: remote::tgi
+    - provider_type: remote::fireworks
+    - provider_type: remote::together
+    - provider_type: remote::bedrock
+    - provider_type: remote::nvidia
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::vertexai
+    - provider_type: remote::groq
+    - provider_type: remote::sambanova
+    - provider_type: remote::azure
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: inline::sqlite-vec
+    - provider_type: inline::milvus
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
+    files:
+    - provider_type: inline::localfs
+    safety:
+    - provider_type: inline::llama-guard
+    - provider_type: inline::code-scanner
+    agents:
+    - provider_type: inline::meta-reference
+    post_training:
+    - provider_type: inline::torchtune-cpu
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    batches:
+    - provider_type: inline::reference
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- asyncpg
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/ci-tests/ci_tests.py b/src/llama_stack/distributions/ci-tests/ci_tests.py
similarity index 100%
rename from llama_stack/distributions/ci-tests/ci_tests.py
rename to src/llama_stack/distributions/ci-tests/ci_tests.py
diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml
new file mode 100644
index 000000000..702acff8e
--- /dev/null
+++ b/src/llama_stack/distributions/ci-tests/run.yaml
@@ -0,0 +1,281 @@
+version: 2
+image_name: ci-tests
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  post_training:
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/llama_stack/distributions/dell/__init__.py b/src/llama_stack/distributions/dell/__init__.py
similarity index 100%
rename from llama_stack/distributions/dell/__init__.py
rename to src/llama_stack/distributions/dell/__init__.py
diff --git a/src/llama_stack/distributions/dell/build.yaml b/src/llama_stack/distributions/dell/build.yaml
new file mode 100644
index 000000000..7bc26ca9e
--- /dev/null
+++ b/src/llama_stack/distributions/dell/build.yaml
@@ -0,0 +1,33 @@
+version: 2
+distribution_spec:
+  description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
+    container
+  providers:
+    inference:
+    - provider_type: remote::tgi
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/dell/dell.py b/src/llama_stack/distributions/dell/dell.py
similarity index 100%
rename from llama_stack/distributions/dell/dell.py
rename to src/llama_stack/distributions/dell/dell.py
diff --git a/llama_stack/distributions/dell/doc_template.md b/src/llama_stack/distributions/dell/doc_template.md
similarity index 100%
rename from llama_stack/distributions/dell/doc_template.md
rename to src/llama_stack/distributions/dell/doc_template.md
diff --git a/src/llama_stack/distributions/dell/run-with-safety.yaml b/src/llama_stack/distributions/dell/run-with-safety.yaml
new file mode 100644
index 000000000..2563f2f4b
--- /dev/null
+++ b/src/llama_stack/distributions/dell/run-with-safety.yaml
@@ -0,0 +1,141 @@
+version: 2
+image_name: dell
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: tgi0
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_URL}
+  - provider_id: tgi1
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_SAFETY_URL}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: tgi0
+    model_type: llm
+  - metadata: {}
+    model_id: ${env.SAFETY_MODEL}
+    provider_id: tgi1
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields:
+  - shield_id: ${env.SAFETY_MODEL}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: brave-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/distributions/dell/run.yaml b/src/llama_stack/distributions/dell/run.yaml
new file mode 100644
index 000000000..bc3117d88
--- /dev/null
+++ b/src/llama_stack/distributions/dell/run.yaml
@@ -0,0 +1,135 @@
+version: 2
+image_name: dell
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: tgi0
+    provider_type: remote::tgi
+    config:
+      url: ${env.DEH_URL}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: chromadb
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: tgi0
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: brave-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/meta-reference-gpu/__init__.py b/src/llama_stack/distributions/meta-reference-gpu/__init__.py
similarity index 100%
rename from llama_stack/distributions/meta-reference-gpu/__init__.py
rename to src/llama_stack/distributions/meta-reference-gpu/__init__.py
diff --git a/src/llama_stack/distributions/meta-reference-gpu/build.yaml b/src/llama_stack/distributions/meta-reference-gpu/build.yaml
new file mode 100644
index 000000000..1513742a7
--- /dev/null
+++ b/src/llama_stack/distributions/meta-reference-gpu/build.yaml
@@ -0,0 +1,32 @@
+version: 2
+distribution_spec:
+  description: Use Meta Reference for running LLM inference
+  providers:
+    inference:
+    - provider_type: inline::meta-reference
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md
similarity index 100%
rename from llama_stack/distributions/meta-reference-gpu/doc_template.md
rename to src/llama_stack/distributions/meta-reference-gpu/doc_template.md
diff --git a/llama_stack/distributions/meta-reference-gpu/meta_reference.py b/src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
similarity index 100%
rename from llama_stack/distributions/meta-reference-gpu/meta_reference.py
rename to src/llama_stack/distributions/meta-reference-gpu/meta_reference.py
diff --git a/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
new file mode 100644
index 000000000..01b5db4f9
--- /dev/null
+++ b/src/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@@ -0,0 +1,154 @@
+version: 2
+image_name: meta-reference-gpu
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: meta-reference-inference
+    provider_type: inline::meta-reference
+    config:
+      model: ${env.INFERENCE_MODEL}
+      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
+      quantization:
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  - provider_id: meta-reference-safety
+    provider_type: inline::meta-reference
+    config:
+      model: ${env.SAFETY_MODEL}
+      checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null}
+      quantization:
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: meta-reference-inference
+    model_type: llm
+  - metadata: {}
+    model_id: ${env.SAFETY_MODEL}
+    provider_id: meta-reference-safety
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields:
+  - shield_id: ${env.SAFETY_MODEL}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/distributions/meta-reference-gpu/run.yaml b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
new file mode 100644
index 000000000..5c7f75ca8
--- /dev/null
+++ b/src/llama_stack/distributions/meta-reference-gpu/run.yaml
@@ -0,0 +1,142 @@
+version: 2
+image_name: meta-reference-gpu
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: meta-reference-inference
+    provider_type: inline::meta-reference
+    config:
+      model: ${env.INFERENCE_MODEL}
+      checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
+      quantization:
+        type: ${env.QUANTIZATION_TYPE:=bf16}
+      model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
+      max_batch_size: ${env.MAX_BATCH_SIZE:=1}
+      max_seq_len: ${env.MAX_SEQ_LEN:=4096}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: meta-reference-inference
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/nvidia/__init__.py b/src/llama_stack/distributions/nvidia/__init__.py
similarity index 100%
rename from llama_stack/distributions/nvidia/__init__.py
rename to src/llama_stack/distributions/nvidia/__init__.py
diff --git a/src/llama_stack/distributions/nvidia/build.yaml b/src/llama_stack/distributions/nvidia/build.yaml
new file mode 100644
index 000000000..8ddd12439
--- /dev/null
+++ b/src/llama_stack/distributions/nvidia/build.yaml
@@ -0,0 +1,29 @@
+version: 2
+distribution_spec:
+  description: Use NVIDIA NIM for running LLM inference, evaluation and safety
+  providers:
+    inference:
+    - provider_type: remote::nvidia
+    vector_io:
+    - provider_type: inline::faiss
+    safety:
+    - provider_type: remote::nvidia
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: remote::nvidia
+    post_training:
+    - provider_type: remote::nvidia
+    datasetio:
+    - provider_type: inline::localfs
+    - provider_type: remote::nvidia
+    scoring:
+    - provider_type: inline::basic
+    tool_runtime:
+    - provider_type: inline::rag-runtime
+    files:
+    - provider_type: inline::localfs
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/nvidia/doc_template.md b/src/llama_stack/distributions/nvidia/doc_template.md
similarity index 100%
rename from llama_stack/distributions/nvidia/doc_template.md
rename to src/llama_stack/distributions/nvidia/doc_template.md
diff --git a/llama_stack/distributions/nvidia/nvidia.py b/src/llama_stack/distributions/nvidia/nvidia.py
similarity index 100%
rename from llama_stack/distributions/nvidia/nvidia.py
rename to src/llama_stack/distributions/nvidia/nvidia.py
diff --git a/src/llama_stack/distributions/nvidia/run-with-safety.yaml b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
new file mode 100644
index 000000000..c23d0f9cb
--- /dev/null
+++ b/src/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -0,0 +1,137 @@
+version: 2
+image_name: nvidia
+apis:
+- agents
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
+  post_training:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:=}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
+  datasetio:
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:=}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  tool_runtime:
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: nvidia
+    model_type: llm
+  - metadata: {}
+    model_id: ${env.SAFETY_MODEL}
+    provider_id: nvidia
+    model_type: llm
+  shields:
+  - shield_id: ${env.SAFETY_MODEL}
+    provider_id: nvidia
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/src/llama_stack/distributions/nvidia/run.yaml b/src/llama_stack/distributions/nvidia/run.yaml
new file mode 100644
index 000000000..8c50b8bfb
--- /dev/null
+++ b/src/llama_stack/distributions/nvidia/run.yaml
@@ -0,0 +1,119 @@
+version: 2
+image_name: nvidia
+apis:
+- agents
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
+      config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
+  post_training:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:=}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
+  datasetio:
+  - provider_id: nvidia
+    provider_type: remote::nvidia
+    config:
+      api_key: ${env.NVIDIA_API_KEY:=}
+      dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
+      project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
+      datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  tool_runtime:
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/open-benchmark/__init__.py b/src/llama_stack/distributions/open-benchmark/__init__.py
similarity index 100%
rename from llama_stack/distributions/open-benchmark/__init__.py
rename to src/llama_stack/distributions/open-benchmark/__init__.py
diff --git a/src/llama_stack/distributions/open-benchmark/build.yaml b/src/llama_stack/distributions/open-benchmark/build.yaml
new file mode 100644
index 000000000..05acd98e3
--- /dev/null
+++ b/src/llama_stack/distributions/open-benchmark/build.yaml
@@ -0,0 +1,36 @@
+version: 2
+distribution_spec:
+  description: Distribution for running open benchmarks
+  providers:
+    inference:
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::groq
+    - provider_type: remote::together
+    vector_io:
+    - provider_type: inline::sqlite-vec
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/open-benchmark/open_benchmark.py b/src/llama_stack/distributions/open-benchmark/open_benchmark.py
similarity index 100%
rename from llama_stack/distributions/open-benchmark/open_benchmark.py
rename to src/llama_stack/distributions/open-benchmark/open_benchmark.py
diff --git a/src/llama_stack/distributions/open-benchmark/run.yaml b/src/llama_stack/distributions/open-benchmark/run.yaml
new file mode 100644
index 000000000..912e48dd3
--- /dev/null
+++ b/src/llama_stack/distributions/open-benchmark/run.yaml
@@ -0,0 +1,255 @@
+version: 2
+image_name: open-benchmark
+apis:
+- agents
+- datasetio
+- eval
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  vector_io:
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: gpt-4o
+    provider_id: openai
+    provider_model_id: gpt-4o
+    model_type: llm
+  - metadata: {}
+    model_id: claude-3-5-sonnet-latest
+    provider_id: anthropic
+    provider_model_id: claude-3-5-sonnet-latest
+    model_type: llm
+  - metadata: {}
+    model_id: gemini/gemini-1.5-flash
+    provider_id: gemini
+    provider_model_id: gemini/gemini-1.5-flash
+    model_type: llm
+  - metadata: {}
+    model_id: meta-llama/Llama-3.3-70B-Instruct
+    provider_id: groq
+    provider_model_id: groq/llama-3.3-70b-versatile
+    model_type: llm
+  - metadata: {}
+    model_id: meta-llama/Llama-3.1-405B-Instruct
+    provider_id: together
+    provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
+    model_type: llm
+  shields:
+  - shield_id: meta-llama/Llama-Guard-3-8B
+  vector_dbs: []
+  datasets:
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/simpleqa?split=train
+    metadata: {}
+    dataset_id: simpleqa
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
+    metadata: {}
+    dataset_id: mmlu_cot
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
+    metadata: {}
+    dataset_id: gpqa_cot
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/math_500?split=test
+    metadata: {}
+    dataset_id: math_500
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/IfEval?split=train
+    metadata: {}
+    dataset_id: ifeval
+  - purpose: eval/messages-answer
+    source:
+      type: uri
+      uri: huggingface://datasets/llamastack/docvqa?split=val
+    metadata: {}
+    dataset_id: docvqa
+  scoring_fns: []
+  benchmarks:
+  - dataset_id: simpleqa
+    scoring_functions:
+    - llm-as-judge::405b-simpleqa
+    metadata: {}
+    benchmark_id: meta-reference-simpleqa
+  - dataset_id: mmlu_cot
+    scoring_functions:
+    - basic::regex_parser_multiple_choice_answer
+    metadata: {}
+    benchmark_id: meta-reference-mmlu-cot
+  - dataset_id: gpqa_cot
+    scoring_functions:
+    - basic::regex_parser_multiple_choice_answer
+    metadata: {}
+    benchmark_id: meta-reference-gpqa-cot
+  - dataset_id: math_500
+    scoring_functions:
+    - basic::regex_parser_math_response
+    metadata: {}
+    benchmark_id: meta-reference-math-500
+  - dataset_id: ifeval
+    scoring_functions:
+    - basic::ifeval
+    metadata: {}
+    benchmark_id: meta-reference-ifeval
+  - dataset_id: docvqa
+    scoring_functions:
+    - basic::docvqa
+    metadata: {}
+    benchmark_id: meta-reference-docvqa
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/postgres-demo/__init__.py b/src/llama_stack/distributions/postgres-demo/__init__.py
similarity index 100%
rename from llama_stack/distributions/postgres-demo/__init__.py
rename to src/llama_stack/distributions/postgres-demo/__init__.py
diff --git a/src/llama_stack/distributions/postgres-demo/build.yaml b/src/llama_stack/distributions/postgres-demo/build.yaml
new file mode 100644
index 000000000..063dc3999
--- /dev/null
+++ b/src/llama_stack/distributions/postgres-demo/build.yaml
@@ -0,0 +1,23 @@
+version: 2
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers
+  providers:
+    inference:
+    - provider_type: remote::vllm
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: remote::chromadb
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+image_type: venv
+additional_pip_packages:
+- asyncpg
+- psycopg2-binary
+- sqlalchemy[asyncio]
diff --git a/llama_stack/distributions/postgres-demo/postgres_demo.py b/src/llama_stack/distributions/postgres-demo/postgres_demo.py
similarity index 100%
rename from llama_stack/distributions/postgres-demo/postgres_demo.py
rename to src/llama_stack/distributions/postgres-demo/postgres_demo.py
diff --git a/src/llama_stack/distributions/postgres-demo/run.yaml b/src/llama_stack/distributions/postgres-demo/run.yaml
new file mode 100644
index 000000000..dd1c2bc7f
--- /dev/null
+++ b/src/llama_stack/distributions/postgres-demo/run.yaml
@@ -0,0 +1,118 @@
+version: 2
+image_name: postgres-demo
+apis:
+- agents
+- inference
+- safety
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: vllm-inference
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=http://localhost:8000/v1}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models:
+  - metadata: {}
+    model_id: ${env.INFERENCE_MODEL}
+    provider_id: vllm-inference
+    model_type: llm
+  - metadata:
+      embedding_dimension: 768
+    model_id: nomic-embed-text-v1.5
+    provider_id: sentence-transformers
+    model_type: embedding
+  shields:
+  - shield_id: meta-llama/Llama-Guard-3-8B
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/starter-gpu/__init__.py b/src/llama_stack/distributions/starter-gpu/__init__.py
similarity index 100%
rename from llama_stack/distributions/starter-gpu/__init__.py
rename to src/llama_stack/distributions/starter-gpu/__init__.py
diff --git a/src/llama_stack/distributions/starter-gpu/build.yaml b/src/llama_stack/distributions/starter-gpu/build.yaml
new file mode 100644
index 000000000..b2e2a0c85
--- /dev/null
+++ b/src/llama_stack/distributions/starter-gpu/build.yaml
@@ -0,0 +1,60 @@
+version: 2
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers.
+    This distribution is intended for GPU-enabled environments.
+  providers:
+    inference:
+    - provider_type: remote::cerebras
+    - provider_type: remote::ollama
+    - provider_type: remote::vllm
+    - provider_type: remote::tgi
+    - provider_type: remote::fireworks
+    - provider_type: remote::together
+    - provider_type: remote::bedrock
+    - provider_type: remote::nvidia
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::vertexai
+    - provider_type: remote::groq
+    - provider_type: remote::sambanova
+    - provider_type: remote::azure
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: inline::sqlite-vec
+    - provider_type: inline::milvus
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
+    files:
+    - provider_type: inline::localfs
+    safety:
+    - provider_type: inline::llama-guard
+    - provider_type: inline::code-scanner
+    agents:
+    - provider_type: inline::meta-reference
+    post_training:
+    - provider_type: inline::huggingface-gpu
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    batches:
+    - provider_type: inline::reference
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- asyncpg
+- sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml
new file mode 100644
index 000000000..807f0d678
--- /dev/null
+++ b/src/llama_stack/distributions/starter-gpu/run.yaml
@@ -0,0 +1,284 @@
+version: 2
+image_name: starter-gpu
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  post_training:
+  - provider_id: huggingface-gpu
+    provider_type: inline::huggingface-gpu
+    config:
+      checkpoint_format: huggingface
+      distributed_backend: null
+      device: cpu
+      dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/llama_stack/distributions/starter-gpu/starter_gpu.py b/src/llama_stack/distributions/starter-gpu/starter_gpu.py
similarity index 100%
rename from llama_stack/distributions/starter-gpu/starter_gpu.py
rename to src/llama_stack/distributions/starter-gpu/starter_gpu.py
diff --git a/llama_stack/distributions/starter/__init__.py b/src/llama_stack/distributions/starter/__init__.py
similarity index 100%
rename from llama_stack/distributions/starter/__init__.py
rename to src/llama_stack/distributions/starter/__init__.py
diff --git a/src/llama_stack/distributions/starter/build.yaml b/src/llama_stack/distributions/starter/build.yaml
new file mode 100644
index 000000000..baa80ef3e
--- /dev/null
+++ b/src/llama_stack/distributions/starter/build.yaml
@@ -0,0 +1,60 @@
+version: 2
+distribution_spec:
+  description: Quick start template for running Llama Stack with several popular providers.
+    This distribution is intended for CPU-only environments.
+  providers:
+    inference:
+    - provider_type: remote::cerebras
+    - provider_type: remote::ollama
+    - provider_type: remote::vllm
+    - provider_type: remote::tgi
+    - provider_type: remote::fireworks
+    - provider_type: remote::together
+    - provider_type: remote::bedrock
+    - provider_type: remote::nvidia
+    - provider_type: remote::openai
+    - provider_type: remote::anthropic
+    - provider_type: remote::gemini
+    - provider_type: remote::vertexai
+    - provider_type: remote::groq
+    - provider_type: remote::sambanova
+    - provider_type: remote::azure
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    - provider_type: inline::sqlite-vec
+    - provider_type: inline::milvus
+    - provider_type: remote::chromadb
+    - provider_type: remote::pgvector
+    - provider_type: remote::qdrant
+    - provider_type: remote::weaviate
+    files:
+    - provider_type: inline::localfs
+    safety:
+    - provider_type: inline::llama-guard
+    - provider_type: inline::code-scanner
+    agents:
+    - provider_type: inline::meta-reference
+    post_training:
+    - provider_type: inline::torchtune-cpu
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    batches:
+    - provider_type: inline::reference
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- asyncpg
+- sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml
new file mode 100644
index 000000000..eb4652af0
--- /dev/null
+++ b/src/llama_stack/distributions/starter/run.yaml
@@ -0,0 +1,281 @@
+version: 2
+image_name: starter
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- post_training
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
+    provider_type: remote::cerebras
+    config:
+      base_url: https://api.cerebras.ai
+      api_key: ${env.CEREBRAS_API_KEY:=}
+  - provider_id: ${env.OLLAMA_URL:+ollama}
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:=http://localhost:11434}
+  - provider_id: ${env.VLLM_URL:+vllm}
+    provider_type: remote::vllm
+    config:
+      url: ${env.VLLM_URL:=}
+      max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+      api_token: ${env.VLLM_API_TOKEN:=fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+  - provider_id: ${env.TGI_URL:+tgi}
+    provider_type: remote::tgi
+    config:
+      url: ${env.TGI_URL:=}
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: https://api.fireworks.ai/inference/v1
+      api_key: ${env.FIREWORKS_API_KEY:=}
+  - provider_id: together
+    provider_type: remote::together
+    config:
+      url: https://api.together.xyz/v1
+      api_key: ${env.TOGETHER_API_KEY:=}
+  - provider_id: bedrock
+    provider_type: remote::bedrock
+  - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
+    provider_type: remote::nvidia
+    config:
+      url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
+      api_key: ${env.NVIDIA_API_KEY:=}
+      append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
+  - provider_id: openai
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY:=}
+      base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
+  - provider_id: anthropic
+    provider_type: remote::anthropic
+    config:
+      api_key: ${env.ANTHROPIC_API_KEY:=}
+  - provider_id: gemini
+    provider_type: remote::gemini
+    config:
+      api_key: ${env.GEMINI_API_KEY:=}
+  - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
+    provider_type: remote::vertexai
+    config:
+      project: ${env.VERTEX_AI_PROJECT:=}
+      location: ${env.VERTEX_AI_LOCATION:=us-central1}
+  - provider_id: groq
+    provider_type: remote::groq
+    config:
+      url: https://api.groq.com
+      api_key: ${env.GROQ_API_KEY:=}
+  - provider_id: sambanova
+    provider_type: remote::sambanova
+    config:
+      url: https://api.sambanova.ai/v1
+      api_key: ${env.SAMBANOVA_API_KEY:=}
+  - provider_id: ${env.AZURE_API_KEY:+azure}
+    provider_type: remote::azure
+    config:
+      api_key: ${env.AZURE_API_KEY:=}
+      api_base: ${env.AZURE_API_BASE:=}
+      api_version: ${env.AZURE_API_VERSION:=}
+      api_type: ${env.AZURE_API_TYPE:=}
+  - provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  - provider_id: sqlite-vec
+    provider_type: inline::sqlite-vec
+    config:
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
+      persistence:
+        namespace: vector_io::sqlite_vec
+        backend: kv_default
+  - provider_id: ${env.MILVUS_URL:+milvus}
+    provider_type: inline::milvus
+    config:
+      db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
+      persistence:
+        namespace: vector_io::milvus
+        backend: kv_default
+  - provider_id: ${env.CHROMADB_URL:+chromadb}
+    provider_type: remote::chromadb
+    config:
+      url: ${env.CHROMADB_URL:=}
+      persistence:
+        namespace: vector_io::chroma_remote
+        backend: kv_default
+  - provider_id: ${env.PGVECTOR_DB:+pgvector}
+    provider_type: remote::pgvector
+    config:
+      host: ${env.PGVECTOR_HOST:=localhost}
+      port: ${env.PGVECTOR_PORT:=5432}
+      db: ${env.PGVECTOR_DB:=}
+      user: ${env.PGVECTOR_USER:=}
+      password: ${env.PGVECTOR_PASSWORD:=}
+      persistence:
+        namespace: vector_io::pgvector
+        backend: kv_default
+  - provider_id: ${env.QDRANT_URL:+qdrant}
+    provider_type: remote::qdrant
+    config:
+      api_key: ${env.QDRANT_API_KEY:=}
+      persistence:
+        namespace: vector_io::qdrant_remote
+        backend: kv_default
+  - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
+    provider_type: remote::weaviate
+    config:
+      weaviate_api_key: null
+      weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
+      persistence:
+        namespace: vector_io::weaviate
+        backend: kv_default
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  - provider_id: code-scanner
+    provider_type: inline::code-scanner
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  post_training:
+  - provider_id: torchtune-cpu
+    provider_type: inline::torchtune-cpu
+    config:
+      checkpoint_format: meta
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  batches:
+  - provider_id: reference
+    provider_type: inline::reference
+    config:
+      kvstore:
+        namespace: batches
+        backend: kv_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: ${env.SAFETY_MODEL:+llama-guard}
+    provider_shield_id: ${env.SAFETY_MODEL:=}
+  - shield_id: code-scanner
+    provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
+    provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model:
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard
diff --git a/llama_stack/distributions/starter/starter.py b/src/llama_stack/distributions/starter/starter.py
similarity index 100%
rename from llama_stack/distributions/starter/starter.py
rename to src/llama_stack/distributions/starter/starter.py
diff --git a/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
similarity index 100%
rename from llama_stack/distributions/template.py
rename to src/llama_stack/distributions/template.py
diff --git a/llama_stack/distributions/watsonx/__init__.py b/src/llama_stack/distributions/watsonx/__init__.py
similarity index 100%
rename from llama_stack/distributions/watsonx/__init__.py
rename to src/llama_stack/distributions/watsonx/__init__.py
diff --git a/src/llama_stack/distributions/watsonx/build.yaml b/src/llama_stack/distributions/watsonx/build.yaml
new file mode 100644
index 000000000..dba1a94e2
--- /dev/null
+++ b/src/llama_stack/distributions/watsonx/build.yaml
@@ -0,0 +1,33 @@
+version: 2
+distribution_spec:
+  description: Use watsonx for running LLM inference
+  providers:
+    inference:
+    - provider_type: remote::watsonx
+    - provider_type: inline::sentence-transformers
+    vector_io:
+    - provider_type: inline::faiss
+    safety:
+    - provider_type: inline::llama-guard
+    agents:
+    - provider_type: inline::meta-reference
+    eval:
+    - provider_type: inline::meta-reference
+    datasetio:
+    - provider_type: remote::huggingface
+    - provider_type: inline::localfs
+    scoring:
+    - provider_type: inline::basic
+    - provider_type: inline::llm-as-judge
+    - provider_type: inline::braintrust
+    tool_runtime:
+    - provider_type: remote::brave-search
+    - provider_type: remote::tavily-search
+    - provider_type: inline::rag-runtime
+    - provider_type: remote::model-context-protocol
+    files:
+    - provider_type: inline::localfs
+image_type: venv
+additional_pip_packages:
+- aiosqlite
+- sqlalchemy[asyncio]
diff --git a/src/llama_stack/distributions/watsonx/run.yaml b/src/llama_stack/distributions/watsonx/run.yaml
new file mode 100644
index 000000000..8456115d2
--- /dev/null
+++ b/src/llama_stack/distributions/watsonx/run.yaml
@@ -0,0 +1,136 @@
+version: 2
+image_name: watsonx
+apis:
+- agents
+- datasetio
+- eval
+- files
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+providers:
+  inference:
+  - provider_id: watsonx
+    provider_type: remote::watsonx
+    config:
+      url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
+      api_key: ${env.WATSONX_API_KEY:=}
+      project_id: ${env.WATSONX_PROJECT_ID:=}
+  vector_io:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config:
+      excluded_categories: []
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence:
+        agent_state:
+          namespace: agents
+          backend: kv_default
+        responses:
+          table_name: responses
+          backend: sql_default
+          max_write_queue_size: 10000
+          num_writers: 4
+  eval:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      kvstore:
+        namespace: eval
+        backend: kv_default
+  datasetio:
+  - provider_id: huggingface
+    provider_type: remote::huggingface
+    config:
+      kvstore:
+        namespace: datasetio::huggingface
+        backend: kv_default
+  - provider_id: localfs
+    provider_type: inline::localfs
+    config:
+      kvstore:
+        namespace: datasetio::localfs
+        backend: kv_default
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: ${env.OPENAI_API_KEY:=}
+  tool_runtime:
+  - provider_id: brave-search
+    provider_type: remote::brave-search
+    config:
+      api_key: ${env.BRAVE_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: tavily-search
+    provider_type: remote::tavily-search
+    config:
+      api_key: ${env.TAVILY_SEARCH_API_KEY:=}
+      max_results: 3
+  - provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  - provider_id: model-context-protocol
+    provider_type: remote::model-context-protocol
+  files:
+  - provider_id: meta-reference-files
+    provider_type: inline::localfs
+    config:
+      storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields: []
+  vector_dbs: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::websearch
+    provider_id: tavily-search
+  - toolgroup_id: builtin::rag
+    provider_id: rag-runtime
+server:
+  port: 8321
+telemetry:
+  enabled: true
diff --git a/llama_stack/distributions/watsonx/watsonx.py b/src/llama_stack/distributions/watsonx/watsonx.py
similarity index 100%
rename from llama_stack/distributions/watsonx/watsonx.py
rename to src/llama_stack/distributions/watsonx/watsonx.py
diff --git a/llama_stack/env.py b/src/llama_stack/env.py
similarity index 100%
rename from llama_stack/env.py
rename to src/llama_stack/env.py
diff --git a/llama_stack/log.py b/src/llama_stack/log.py
similarity index 100%
rename from llama_stack/log.py
rename to src/llama_stack/log.py
diff --git a/llama_stack/models/__init__.py b/src/llama_stack/models/__init__.py
similarity index 100%
rename from llama_stack/models/__init__.py
rename to src/llama_stack/models/__init__.py
diff --git a/llama_stack/models/llama/__init__.py b/src/llama_stack/models/llama/__init__.py
similarity index 100%
rename from llama_stack/models/llama/__init__.py
rename to src/llama_stack/models/llama/__init__.py
diff --git a/llama_stack/models/llama/checkpoint.py b/src/llama_stack/models/llama/checkpoint.py
similarity index 100%
rename from llama_stack/models/llama/checkpoint.py
rename to src/llama_stack/models/llama/checkpoint.py
diff --git a/llama_stack/models/llama/datatypes.py b/src/llama_stack/models/llama/datatypes.py
similarity index 100%
rename from llama_stack/models/llama/datatypes.py
rename to src/llama_stack/models/llama/datatypes.py
diff --git a/llama_stack/models/llama/hadamard_utils.py b/src/llama_stack/models/llama/hadamard_utils.py
similarity index 100%
rename from llama_stack/models/llama/hadamard_utils.py
rename to src/llama_stack/models/llama/hadamard_utils.py
diff --git a/llama_stack/models/llama/llama3/__init__.py b/src/llama_stack/models/llama/llama3/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3/__init__.py
rename to src/llama_stack/models/llama/llama3/__init__.py
diff --git a/llama_stack/models/llama/llama3/args.py b/src/llama_stack/models/llama/llama3/args.py
similarity index 100%
rename from llama_stack/models/llama/llama3/args.py
rename to src/llama_stack/models/llama/llama3/args.py
diff --git a/llama_stack/models/llama/llama3/chat_format.py b/src/llama_stack/models/llama/llama3/chat_format.py
similarity index 100%
rename from llama_stack/models/llama/llama3/chat_format.py
rename to src/llama_stack/models/llama/llama3/chat_format.py
diff --git a/llama_stack/models/llama/llama3/dog.jpg b/src/llama_stack/models/llama/llama3/dog.jpg
similarity index 100%
rename from llama_stack/models/llama/llama3/dog.jpg
rename to src/llama_stack/models/llama/llama3/dog.jpg
diff --git a/llama_stack/models/llama/llama3/generation.py b/src/llama_stack/models/llama/llama3/generation.py
similarity index 100%
rename from llama_stack/models/llama/llama3/generation.py
rename to src/llama_stack/models/llama/llama3/generation.py
diff --git a/llama_stack/models/llama/llama3/interface.py b/src/llama_stack/models/llama/llama3/interface.py
similarity index 100%
rename from llama_stack/models/llama/llama3/interface.py
rename to src/llama_stack/models/llama/llama3/interface.py
diff --git a/llama_stack/models/llama/llama3/model.py b/src/llama_stack/models/llama/llama3/model.py
similarity index 100%
rename from llama_stack/models/llama/llama3/model.py
rename to src/llama_stack/models/llama/llama3/model.py
diff --git a/llama_stack/models/llama/llama3/multimodal/__init__.py b/src/llama_stack/models/llama/llama3/multimodal/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/__init__.py
rename to src/llama_stack/models/llama/llama3/multimodal/__init__.py
diff --git a/llama_stack/models/llama/llama3/multimodal/encoder_utils.py b/src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/encoder_utils.py
rename to src/llama_stack/models/llama/llama3/multimodal/encoder_utils.py
diff --git a/llama_stack/models/llama/llama3/multimodal/image_transform.py b/src/llama_stack/models/llama/llama3/multimodal/image_transform.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/image_transform.py
rename to src/llama_stack/models/llama/llama3/multimodal/image_transform.py
diff --git a/llama_stack/models/llama/llama3/multimodal/model.py b/src/llama_stack/models/llama/llama3/multimodal/model.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/model.py
rename to src/llama_stack/models/llama/llama3/multimodal/model.py
diff --git a/llama_stack/models/llama/llama3/multimodal/utils.py b/src/llama_stack/models/llama/llama3/multimodal/utils.py
similarity index 100%
rename from llama_stack/models/llama/llama3/multimodal/utils.py
rename to src/llama_stack/models/llama/llama3/multimodal/utils.py
diff --git a/llama_stack/models/llama/llama3/pasta.jpeg b/src/llama_stack/models/llama/llama3/pasta.jpeg
similarity index 100%
rename from llama_stack/models/llama/llama3/pasta.jpeg
rename to src/llama_stack/models/llama/llama3/pasta.jpeg
diff --git a/llama_stack/models/llama/llama3/prompt_templates/__init__.py b/src/llama_stack/models/llama/llama3/prompt_templates/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3/prompt_templates/__init__.py
rename to src/llama_stack/models/llama/llama3/prompt_templates/__init__.py
diff --git a/llama_stack/models/llama/llama3/prompt_templates/base.py b/src/llama_stack/models/llama/llama3/prompt_templates/base.py
similarity index 100%
rename from llama_stack/models/llama/llama3/prompt_templates/base.py
rename to src/llama_stack/models/llama/llama3/prompt_templates/base.py
diff --git a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
rename to src/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py
diff --git a/llama_stack/models/llama/llama3/prompt_templates/tool_response.py b/src/llama_stack/models/llama/llama3/prompt_templates/tool_response.py
similarity index 100%
rename from llama_stack/models/llama/llama3/prompt_templates/tool_response.py
rename to src/llama_stack/models/llama/llama3/prompt_templates/tool_response.py
diff --git a/llama_stack/models/llama/llama3/quantization/__init__.py b/src/llama_stack/models/llama/llama3/quantization/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3/quantization/__init__.py
rename to src/llama_stack/models/llama/llama3/quantization/__init__.py
diff --git a/llama_stack/models/llama/llama3/quantization/loader.py b/src/llama_stack/models/llama/llama3/quantization/loader.py
similarity index 100%
rename from llama_stack/models/llama/llama3/quantization/loader.py
rename to src/llama_stack/models/llama/llama3/quantization/loader.py
diff --git a/llama_stack/models/llama/llama3/template_data.py b/src/llama_stack/models/llama/llama3/template_data.py
similarity index 100%
rename from llama_stack/models/llama/llama3/template_data.py
rename to src/llama_stack/models/llama/llama3/template_data.py
diff --git a/llama_stack/models/llama/llama3/tokenizer.model b/src/llama_stack/models/llama/llama3/tokenizer.model
similarity index 100%
rename from llama_stack/models/llama/llama3/tokenizer.model
rename to src/llama_stack/models/llama/llama3/tokenizer.model
diff --git a/llama_stack/models/llama/llama3/tokenizer.py b/src/llama_stack/models/llama/llama3/tokenizer.py
similarity index 100%
rename from llama_stack/models/llama/llama3/tokenizer.py
rename to src/llama_stack/models/llama/llama3/tokenizer.py
diff --git a/llama_stack/models/llama/llama3/tool_utils.py b/src/llama_stack/models/llama/llama3/tool_utils.py
similarity index 100%
rename from llama_stack/models/llama/llama3/tool_utils.py
rename to src/llama_stack/models/llama/llama3/tool_utils.py
diff --git a/llama_stack/models/llama/llama3_1/__init__.py b/src/llama_stack/models/llama/llama3_1/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3_1/__init__.py
rename to src/llama_stack/models/llama/llama3_1/__init__.py
diff --git a/llama_stack/models/llama/llama3_1/prompt_format.md b/src/llama_stack/models/llama/llama3_1/prompt_format.md
similarity index 100%
rename from llama_stack/models/llama/llama3_1/prompt_format.md
rename to src/llama_stack/models/llama/llama3_1/prompt_format.md
diff --git a/llama_stack/models/llama/llama3_1/prompts.py b/src/llama_stack/models/llama/llama3_1/prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama3_1/prompts.py
rename to src/llama_stack/models/llama/llama3_1/prompts.py
diff --git a/llama_stack/models/llama/llama3_2/__init__.py b/src/llama_stack/models/llama/llama3_2/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3_2/__init__.py
rename to src/llama_stack/models/llama/llama3_2/__init__.py
diff --git a/llama_stack/models/llama/llama3_2/prompts_text.py b/src/llama_stack/models/llama/llama3_2/prompts_text.py
similarity index 100%
rename from llama_stack/models/llama/llama3_2/prompts_text.py
rename to src/llama_stack/models/llama/llama3_2/prompts_text.py
diff --git a/llama_stack/models/llama/llama3_2/prompts_vision.py b/src/llama_stack/models/llama/llama3_2/prompts_vision.py
similarity index 100%
rename from llama_stack/models/llama/llama3_2/prompts_vision.py
rename to src/llama_stack/models/llama/llama3_2/prompts_vision.py
diff --git a/llama_stack/models/llama/llama3_2/text_prompt_format.md b/src/llama_stack/models/llama/llama3_2/text_prompt_format.md
similarity index 100%
rename from llama_stack/models/llama/llama3_2/text_prompt_format.md
rename to src/llama_stack/models/llama/llama3_2/text_prompt_format.md
diff --git a/llama_stack/models/llama/llama3_2/vision_prompt_format.md b/src/llama_stack/models/llama/llama3_2/vision_prompt_format.md
similarity index 100%
rename from llama_stack/models/llama/llama3_2/vision_prompt_format.md
rename to src/llama_stack/models/llama/llama3_2/vision_prompt_format.md
diff --git a/llama_stack/models/llama/llama3_3/__init__.py b/src/llama_stack/models/llama/llama3_3/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama3_3/__init__.py
rename to src/llama_stack/models/llama/llama3_3/__init__.py
diff --git a/llama_stack/models/llama/llama3_3/prompts.py b/src/llama_stack/models/llama/llama3_3/prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama3_3/prompts.py
rename to src/llama_stack/models/llama/llama3_3/prompts.py
diff --git a/llama_stack/models/llama/llama4/__init__.py b/src/llama_stack/models/llama/llama4/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama4/__init__.py
rename to src/llama_stack/models/llama/llama4/__init__.py
diff --git a/llama_stack/models/llama/llama4/args.py b/src/llama_stack/models/llama/llama4/args.py
similarity index 100%
rename from llama_stack/models/llama/llama4/args.py
rename to src/llama_stack/models/llama/llama4/args.py
diff --git a/llama_stack/models/llama/llama4/chat_format.py b/src/llama_stack/models/llama/llama4/chat_format.py
similarity index 100%
rename from llama_stack/models/llama/llama4/chat_format.py
rename to src/llama_stack/models/llama/llama4/chat_format.py
diff --git a/llama_stack/models/llama/llama4/datatypes.py b/src/llama_stack/models/llama/llama4/datatypes.py
similarity index 100%
rename from llama_stack/models/llama/llama4/datatypes.py
rename to src/llama_stack/models/llama/llama4/datatypes.py
diff --git a/llama_stack/models/llama/llama4/ffn.py b/src/llama_stack/models/llama/llama4/ffn.py
similarity index 100%
rename from llama_stack/models/llama/llama4/ffn.py
rename to src/llama_stack/models/llama/llama4/ffn.py
diff --git a/llama_stack/models/llama/llama4/generation.py b/src/llama_stack/models/llama/llama4/generation.py
similarity index 100%
rename from llama_stack/models/llama/llama4/generation.py
rename to src/llama_stack/models/llama/llama4/generation.py
diff --git a/llama_stack/models/llama/llama4/model.py b/src/llama_stack/models/llama/llama4/model.py
similarity index 100%
rename from llama_stack/models/llama/llama4/model.py
rename to src/llama_stack/models/llama/llama4/model.py
diff --git a/llama_stack/models/llama/llama4/moe.py b/src/llama_stack/models/llama/llama4/moe.py
similarity index 100%
rename from llama_stack/models/llama/llama4/moe.py
rename to src/llama_stack/models/llama/llama4/moe.py
diff --git a/llama_stack/models/llama/llama4/preprocess.py b/src/llama_stack/models/llama/llama4/preprocess.py
similarity index 100%
rename from llama_stack/models/llama/llama4/preprocess.py
rename to src/llama_stack/models/llama/llama4/preprocess.py
diff --git a/llama_stack/models/llama/llama4/prompt_format.md b/src/llama_stack/models/llama/llama4/prompt_format.md
similarity index 100%
rename from llama_stack/models/llama/llama4/prompt_format.md
rename to src/llama_stack/models/llama/llama4/prompt_format.md
diff --git a/llama_stack/models/llama/llama4/prompt_templates/__init__.py b/src/llama_stack/models/llama/llama4/prompt_templates/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama4/prompt_templates/__init__.py
rename to src/llama_stack/models/llama/llama4/prompt_templates/__init__.py
diff --git a/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py b/src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
rename to src/llama_stack/models/llama/llama4/prompt_templates/system_prompts.py
diff --git a/llama_stack/models/llama/llama4/prompts.py b/src/llama_stack/models/llama/llama4/prompts.py
similarity index 100%
rename from llama_stack/models/llama/llama4/prompts.py
rename to src/llama_stack/models/llama/llama4/prompts.py
diff --git a/llama_stack/models/llama/llama4/quantization/__init__.py b/src/llama_stack/models/llama/llama4/quantization/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama4/quantization/__init__.py
rename to src/llama_stack/models/llama/llama4/quantization/__init__.py
diff --git a/llama_stack/models/llama/llama4/quantization/loader.py b/src/llama_stack/models/llama/llama4/quantization/loader.py
similarity index 100%
rename from llama_stack/models/llama/llama4/quantization/loader.py
rename to src/llama_stack/models/llama/llama4/quantization/loader.py
diff --git a/llama_stack/models/llama/llama4/tokenizer.model b/src/llama_stack/models/llama/llama4/tokenizer.model
similarity index 100%
rename from llama_stack/models/llama/llama4/tokenizer.model
rename to src/llama_stack/models/llama/llama4/tokenizer.model
diff --git a/llama_stack/models/llama/llama4/tokenizer.py b/src/llama_stack/models/llama/llama4/tokenizer.py
similarity index 100%
rename from llama_stack/models/llama/llama4/tokenizer.py
rename to src/llama_stack/models/llama/llama4/tokenizer.py
diff --git a/llama_stack/models/llama/llama4/vision/__init__.py b/src/llama_stack/models/llama/llama4/vision/__init__.py
similarity index 100%
rename from llama_stack/models/llama/llama4/vision/__init__.py
rename to src/llama_stack/models/llama/llama4/vision/__init__.py
diff --git a/llama_stack/models/llama/llama4/vision/embedding.py b/src/llama_stack/models/llama/llama4/vision/embedding.py
similarity index 100%
rename from llama_stack/models/llama/llama4/vision/embedding.py
rename to src/llama_stack/models/llama/llama4/vision/embedding.py
diff --git a/llama_stack/models/llama/llama4/vision/encoder.py b/src/llama_stack/models/llama/llama4/vision/encoder.py
similarity index 100%
rename from llama_stack/models/llama/llama4/vision/encoder.py
rename to src/llama_stack/models/llama/llama4/vision/encoder.py
diff --git a/llama_stack/models/llama/prompt_format.py b/src/llama_stack/models/llama/prompt_format.py
similarity index 100%
rename from llama_stack/models/llama/prompt_format.py
rename to src/llama_stack/models/llama/prompt_format.py
diff --git a/llama_stack/models/llama/quantize_impls.py b/src/llama_stack/models/llama/quantize_impls.py
similarity index 100%
rename from llama_stack/models/llama/quantize_impls.py
rename to src/llama_stack/models/llama/quantize_impls.py
diff --git a/llama_stack/models/llama/resources/dog.jpg b/src/llama_stack/models/llama/resources/dog.jpg
similarity index 100%
rename from llama_stack/models/llama/resources/dog.jpg
rename to src/llama_stack/models/llama/resources/dog.jpg
diff --git a/llama_stack/models/llama/resources/pasta.jpeg b/src/llama_stack/models/llama/resources/pasta.jpeg
similarity index 100%
rename from llama_stack/models/llama/resources/pasta.jpeg
rename to src/llama_stack/models/llama/resources/pasta.jpeg
diff --git a/llama_stack/models/llama/resources/small_dog.jpg b/src/llama_stack/models/llama/resources/small_dog.jpg
similarity index 100%
rename from llama_stack/models/llama/resources/small_dog.jpg
rename to src/llama_stack/models/llama/resources/small_dog.jpg
diff --git a/llama_stack/models/llama/sku_list.py b/src/llama_stack/models/llama/sku_list.py
similarity index 100%
rename from llama_stack/models/llama/sku_list.py
rename to src/llama_stack/models/llama/sku_list.py
diff --git a/llama_stack/models/llama/sku_types.py b/src/llama_stack/models/llama/sku_types.py
similarity index 100%
rename from llama_stack/models/llama/sku_types.py
rename to src/llama_stack/models/llama/sku_types.py
diff --git a/llama_stack/models/llama/tokenizer_utils.py b/src/llama_stack/models/llama/tokenizer_utils.py
similarity index 100%
rename from llama_stack/models/llama/tokenizer_utils.py
rename to src/llama_stack/models/llama/tokenizer_utils.py
diff --git a/llama_stack/providers/__init__.py b/src/llama_stack/providers/__init__.py
similarity index 100%
rename from llama_stack/providers/__init__.py
rename to src/llama_stack/providers/__init__.py
diff --git a/llama_stack/providers/datatypes.py b/src/llama_stack/providers/datatypes.py
similarity index 100%
rename from llama_stack/providers/datatypes.py
rename to src/llama_stack/providers/datatypes.py
diff --git a/llama_stack/providers/inline/__init__.py b/src/llama_stack/providers/inline/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/__init__.py
rename to src/llama_stack/providers/inline/__init__.py
diff --git a/llama_stack/providers/inline/agents/__init__.py b/src/llama_stack/providers/inline/agents/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/agents/__init__.py
rename to src/llama_stack/providers/inline/agents/__init__.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/__init__.py
rename to src/llama_stack/providers/inline/agents/meta_reference/__init__.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/agent_instance.py
rename to src/llama_stack/providers/inline/agents/meta_reference/agent_instance.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/src/llama_stack/providers/inline/agents/meta_reference/agents.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/agents.py
rename to src/llama_stack/providers/inline/agents/meta_reference/agents.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/config.py b/src/llama_stack/providers/inline/agents/meta_reference/config.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/config.py
rename to src/llama_stack/providers/inline/agents/meta_reference/config.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/src/llama_stack/providers/inline/agents/meta_reference/persistence.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/persistence.py
rename to src/llama_stack/providers/inline/agents/meta_reference/persistence.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/__init__.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/__init__.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/streaming.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/types.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/types.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/types.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/responses/utils.py b/src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/responses/utils.py
rename to src/llama_stack/providers/inline/agents/meta_reference/responses/utils.py
diff --git a/llama_stack/providers/inline/agents/meta_reference/safety.py b/src/llama_stack/providers/inline/agents/meta_reference/safety.py
similarity index 100%
rename from llama_stack/providers/inline/agents/meta_reference/safety.py
rename to src/llama_stack/providers/inline/agents/meta_reference/safety.py
diff --git a/llama_stack/providers/inline/batches/__init__.py b/src/llama_stack/providers/inline/batches/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/batches/__init__.py
rename to src/llama_stack/providers/inline/batches/__init__.py
diff --git a/llama_stack/providers/inline/batches/reference/__init__.py b/src/llama_stack/providers/inline/batches/reference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/batches/reference/__init__.py
rename to src/llama_stack/providers/inline/batches/reference/__init__.py
diff --git a/llama_stack/providers/inline/batches/reference/batches.py b/src/llama_stack/providers/inline/batches/reference/batches.py
similarity index 100%
rename from llama_stack/providers/inline/batches/reference/batches.py
rename to src/llama_stack/providers/inline/batches/reference/batches.py
diff --git a/llama_stack/providers/inline/batches/reference/config.py b/src/llama_stack/providers/inline/batches/reference/config.py
similarity index 100%
rename from llama_stack/providers/inline/batches/reference/config.py
rename to src/llama_stack/providers/inline/batches/reference/config.py
diff --git a/llama_stack/providers/inline/datasetio/__init__.py b/src/llama_stack/providers/inline/datasetio/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/datasetio/__init__.py
rename to src/llama_stack/providers/inline/datasetio/__init__.py
diff --git a/llama_stack/providers/inline/datasetio/localfs/__init__.py b/src/llama_stack/providers/inline/datasetio/localfs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/datasetio/localfs/__init__.py
rename to src/llama_stack/providers/inline/datasetio/localfs/__init__.py
diff --git a/llama_stack/providers/inline/datasetio/localfs/config.py b/src/llama_stack/providers/inline/datasetio/localfs/config.py
similarity index 100%
rename from llama_stack/providers/inline/datasetio/localfs/config.py
rename to src/llama_stack/providers/inline/datasetio/localfs/config.py
diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
similarity index 100%
rename from llama_stack/providers/inline/datasetio/localfs/datasetio.py
rename to src/llama_stack/providers/inline/datasetio/localfs/datasetio.py
diff --git a/llama_stack/providers/inline/eval/__init__.py b/src/llama_stack/providers/inline/eval/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/eval/__init__.py
rename to src/llama_stack/providers/inline/eval/__init__.py
diff --git a/llama_stack/providers/inline/eval/meta_reference/__init__.py b/src/llama_stack/providers/inline/eval/meta_reference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/eval/meta_reference/__init__.py
rename to src/llama_stack/providers/inline/eval/meta_reference/__init__.py
diff --git a/llama_stack/providers/inline/eval/meta_reference/config.py b/src/llama_stack/providers/inline/eval/meta_reference/config.py
similarity index 100%
rename from llama_stack/providers/inline/eval/meta_reference/config.py
rename to src/llama_stack/providers/inline/eval/meta_reference/config.py
diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/src/llama_stack/providers/inline/eval/meta_reference/eval.py
similarity index 100%
rename from llama_stack/providers/inline/eval/meta_reference/eval.py
rename to src/llama_stack/providers/inline/eval/meta_reference/eval.py
diff --git a/llama_stack/providers/inline/files/localfs/__init__.py b/src/llama_stack/providers/inline/files/localfs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/files/localfs/__init__.py
rename to src/llama_stack/providers/inline/files/localfs/__init__.py
diff --git a/llama_stack/providers/inline/files/localfs/config.py b/src/llama_stack/providers/inline/files/localfs/config.py
similarity index 100%
rename from llama_stack/providers/inline/files/localfs/config.py
rename to src/llama_stack/providers/inline/files/localfs/config.py
diff --git a/llama_stack/providers/inline/files/localfs/files.py b/src/llama_stack/providers/inline/files/localfs/files.py
similarity index 100%
rename from llama_stack/providers/inline/files/localfs/files.py
rename to src/llama_stack/providers/inline/files/localfs/files.py
diff --git a/llama_stack/providers/inline/inference/__init__.py b/src/llama_stack/providers/inline/inference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/inference/__init__.py
rename to src/llama_stack/providers/inline/inference/__init__.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/__init__.py b/src/llama_stack/providers/inline/inference/meta_reference/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/__init__.py
rename to src/llama_stack/providers/inline/inference/meta_reference/__init__.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/common.py b/src/llama_stack/providers/inline/inference/meta_reference/common.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/common.py
rename to src/llama_stack/providers/inline/inference/meta_reference/common.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/config.py b/src/llama_stack/providers/inline/inference/meta_reference/config.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/config.py
rename to src/llama_stack/providers/inline/inference/meta_reference/config.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/generators.py b/src/llama_stack/providers/inline/inference/meta_reference/generators.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/generators.py
rename to src/llama_stack/providers/inline/inference/meta_reference/generators.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/src/llama_stack/providers/inline/inference/meta_reference/inference.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/inference.py
rename to src/llama_stack/providers/inline/inference/meta_reference/inference.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/model_parallel.py b/src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/model_parallel.py
rename to src/llama_stack/providers/inline/inference/meta_reference/model_parallel.py
diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
similarity index 100%
rename from llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
rename to src/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/__init__.py b/src/llama_stack/providers/inline/inference/sentence_transformers/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/inference/sentence_transformers/__init__.py
rename to src/llama_stack/providers/inline/inference/sentence_transformers/__init__.py
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/config.py b/src/llama_stack/providers/inline/inference/sentence_transformers/config.py
similarity index 100%
rename from llama_stack/providers/inline/inference/sentence_transformers/config.py
rename to src/llama_stack/providers/inline/inference/sentence_transformers/config.py
diff --git a/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py b/src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
similarity index 100%
rename from llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
rename to src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers.py
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift
diff --git a/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift b/src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift
similarity index 100%
rename from llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift
rename to src/llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift
diff --git a/llama_stack/providers/inline/post_training/__init__.py b/src/llama_stack/providers/inline/post_training/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/__init__.py
rename to src/llama_stack/providers/inline/post_training/__init__.py
diff --git a/llama_stack/providers/inline/post_training/common/__init__.py b/src/llama_stack/providers/inline/post_training/common/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/common/__init__.py
rename to src/llama_stack/providers/inline/post_training/common/__init__.py
diff --git a/llama_stack/providers/inline/post_training/common/utils.py b/src/llama_stack/providers/inline/post_training/common/utils.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/common/utils.py
rename to src/llama_stack/providers/inline/post_training/common/utils.py
diff --git a/llama_stack/providers/inline/post_training/common/validator.py b/src/llama_stack/providers/inline/post_training/common/validator.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/common/validator.py
rename to src/llama_stack/providers/inline/post_training/common/validator.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/__init__.py b/src/llama_stack/providers/inline/post_training/huggingface/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/__init__.py
rename to src/llama_stack/providers/inline/post_training/huggingface/__init__.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/config.py b/src/llama_stack/providers/inline/post_training/huggingface/config.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/config.py
rename to src/llama_stack/providers/inline/post_training/huggingface/config.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/post_training.py b/src/llama_stack/providers/inline/post_training/huggingface/post_training.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/post_training.py
rename to src/llama_stack/providers/inline/post_training/huggingface/post_training.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py
rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/__init__.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
rename to src/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py
diff --git a/llama_stack/providers/inline/post_training/huggingface/utils.py b/src/llama_stack/providers/inline/post_training/huggingface/utils.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/huggingface/utils.py
rename to src/llama_stack/providers/inline/post_training/huggingface/utils.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/__init__.py
rename to src/llama_stack/providers/inline/post_training/torchtune/__init__.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/common/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/common/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/common/__init__.py
rename to src/llama_stack/providers/inline/post_training/torchtune/common/__init__.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py b/src/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
rename to src/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/common/utils.py
rename to src/llama_stack/providers/inline/post_training/torchtune/common/utils.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/config.py b/src/llama_stack/providers/inline/post_training/torchtune/config.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/config.py
rename to src/llama_stack/providers/inline/post_training/torchtune/config.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py
rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/__init__.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py b/src/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
rename to src/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/post_training.py b/src/llama_stack/providers/inline/post_training/torchtune/post_training.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/post_training.py
rename to src/llama_stack/providers/inline/post_training/torchtune/post_training.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py
rename to src/llama_stack/providers/inline/post_training/torchtune/recipes/__init__.py
diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
similarity index 100%
rename from llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
rename to src/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py
diff --git a/llama_stack/providers/inline/safety/__init__.py b/src/llama_stack/providers/inline/safety/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/safety/__init__.py
rename to src/llama_stack/providers/inline/safety/__init__.py
diff --git a/llama_stack/providers/inline/safety/code_scanner/__init__.py b/src/llama_stack/providers/inline/safety/code_scanner/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/safety/code_scanner/__init__.py
rename to src/llama_stack/providers/inline/safety/code_scanner/__init__.py
diff --git a/llama_stack/providers/inline/safety/code_scanner/code_scanner.py b/src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
similarity index 100%
rename from llama_stack/providers/inline/safety/code_scanner/code_scanner.py
rename to src/llama_stack/providers/inline/safety/code_scanner/code_scanner.py
diff --git a/llama_stack/providers/inline/safety/code_scanner/config.py b/src/llama_stack/providers/inline/safety/code_scanner/config.py
similarity index 100%
rename from llama_stack/providers/inline/safety/code_scanner/config.py
rename to src/llama_stack/providers/inline/safety/code_scanner/config.py
diff --git a/llama_stack/providers/inline/safety/llama_guard/__init__.py b/src/llama_stack/providers/inline/safety/llama_guard/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/safety/llama_guard/__init__.py
rename to src/llama_stack/providers/inline/safety/llama_guard/__init__.py
diff --git a/llama_stack/providers/inline/safety/llama_guard/config.py b/src/llama_stack/providers/inline/safety/llama_guard/config.py
similarity index 100%
rename from llama_stack/providers/inline/safety/llama_guard/config.py
rename to src/llama_stack/providers/inline/safety/llama_guard/config.py
diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
similarity index 100%
rename from llama_stack/providers/inline/safety/llama_guard/llama_guard.py
rename to src/llama_stack/providers/inline/safety/llama_guard/llama_guard.py
diff --git a/llama_stack/providers/inline/safety/prompt_guard/__init__.py b/src/llama_stack/providers/inline/safety/prompt_guard/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/safety/prompt_guard/__init__.py
rename to src/llama_stack/providers/inline/safety/prompt_guard/__init__.py
diff --git a/llama_stack/providers/inline/safety/prompt_guard/config.py b/src/llama_stack/providers/inline/safety/prompt_guard/config.py
similarity index 100%
rename from llama_stack/providers/inline/safety/prompt_guard/config.py
rename to src/llama_stack/providers/inline/safety/prompt_guard/config.py
diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
similarity index 100%
rename from llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
rename to src/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
diff --git a/llama_stack/providers/inline/scoring/__init__.py b/src/llama_stack/providers/inline/scoring/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/__init__.py
rename to src/llama_stack/providers/inline/scoring/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/__init__.py b/src/llama_stack/providers/inline/scoring/basic/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/__init__.py
rename to src/llama_stack/providers/inline/scoring/basic/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/config.py b/src/llama_stack/providers/inline/scoring/basic/config.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/config.py
rename to src/llama_stack/providers/inline/scoring/basic/config.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring.py b/src/llama_stack/providers/inline/scoring/basic/scoring.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/docvqa_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/equality_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/docvqa.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/equality.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/ifeval.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_math_response.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/subset_of.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/ifeval_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_math_response_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/regex_parser_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py b/src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/basic/scoring_fn/subset_of_scoring_fn.py
diff --git a/llama_stack/providers/inline/scoring/basic/utils/__init__.py b/src/llama_stack/providers/inline/scoring/basic/utils/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/utils/__init__.py
rename to src/llama_stack/providers/inline/scoring/basic/utils/__init__.py
diff --git a/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py b/src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
rename to src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py
diff --git a/llama_stack/providers/inline/scoring/basic/utils/math_utils.py b/src/llama_stack/providers/inline/scoring/basic/utils/math_utils.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/basic/utils/math_utils.py
rename to src/llama_stack/providers/inline/scoring/basic/utils/math_utils.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/__init__.py
rename to src/llama_stack/providers/inline/scoring/braintrust/__init__.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/braintrust.py
rename to src/llama_stack/providers/inline/scoring/braintrust/braintrust.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/config.py b/src/llama_stack/providers/inline/scoring/braintrust/config.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/config.py
rename to src/llama_stack/providers/inline/scoring/braintrust/config.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/__init__.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/__init__.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_correctness.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_relevancy.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/answer_similarity.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_entity_recall.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_precision.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_recall.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/context_relevancy.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/factuality.py
diff --git a/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py b/src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
rename to src/llama_stack/providers/inline/scoring/braintrust/scoring_fn/fn_defs/faithfulness.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/config.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/config.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/config.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/config.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/__init__.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/__init__.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_405b_simpleqa.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/fn_defs/llm_as_judge_base.py
diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py b/src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
rename to src/llama_stack/providers/inline/scoring/llm_as_judge/scoring_fn/llm_as_judge_scoring_fn.py
diff --git a/llama_stack/providers/inline/tool_runtime/__init__.py b/src/llama_stack/providers/inline/tool_runtime/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/__init__.py
rename to src/llama_stack/providers/inline/tool_runtime/__init__.py
diff --git a/llama_stack/providers/inline/tool_runtime/rag/__init__.py b/src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/rag/__init__.py
rename to src/llama_stack/providers/inline/tool_runtime/rag/__init__.py
diff --git a/llama_stack/providers/inline/tool_runtime/rag/config.py b/src/llama_stack/providers/inline/tool_runtime/rag/config.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/rag/config.py
rename to src/llama_stack/providers/inline/tool_runtime/rag/config.py
diff --git a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
rename to src/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/src/llama_stack/providers/inline/tool_runtime/rag/memory.py
similarity index 100%
rename from llama_stack/providers/inline/tool_runtime/rag/memory.py
rename to src/llama_stack/providers/inline/tool_runtime/rag/memory.py
diff --git a/llama_stack/providers/inline/vector_io/__init__.py b/src/llama_stack/providers/inline/vector_io/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/__init__.py
rename to src/llama_stack/providers/inline/vector_io/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/chroma/__init__.py b/src/llama_stack/providers/inline/vector_io/chroma/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/chroma/__init__.py
rename to src/llama_stack/providers/inline/vector_io/chroma/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/chroma/config.py b/src/llama_stack/providers/inline/vector_io/chroma/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/chroma/config.py
rename to src/llama_stack/providers/inline/vector_io/chroma/config.py
diff --git a/llama_stack/providers/inline/vector_io/faiss/__init__.py b/src/llama_stack/providers/inline/vector_io/faiss/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/faiss/__init__.py
rename to src/llama_stack/providers/inline/vector_io/faiss/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/faiss/config.py b/src/llama_stack/providers/inline/vector_io/faiss/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/faiss/config.py
rename to src/llama_stack/providers/inline/vector_io/faiss/config.py
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/src/llama_stack/providers/inline/vector_io/faiss/faiss.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/faiss/faiss.py
rename to src/llama_stack/providers/inline/vector_io/faiss/faiss.py
diff --git a/llama_stack/providers/inline/vector_io/milvus/__init__.py b/src/llama_stack/providers/inline/vector_io/milvus/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/milvus/__init__.py
rename to src/llama_stack/providers/inline/vector_io/milvus/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/milvus/config.py b/src/llama_stack/providers/inline/vector_io/milvus/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/milvus/config.py
rename to src/llama_stack/providers/inline/vector_io/milvus/config.py
diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/qdrant/__init__.py
rename to src/llama_stack/providers/inline/vector_io/qdrant/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/src/llama_stack/providers/inline/vector_io/qdrant/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/qdrant/config.py
rename to src/llama_stack/providers/inline/vector_io/qdrant/config.py
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
rename to src/llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/config.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/sqlite_vec/config.py
rename to src/llama_stack/providers/inline/vector_io/sqlite_vec/config.py
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
similarity index 100%
rename from llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
rename to src/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
diff --git a/llama_stack/providers/registry/__init__.py b/src/llama_stack/providers/registry/__init__.py
similarity index 100%
rename from llama_stack/providers/registry/__init__.py
rename to src/llama_stack/providers/registry/__init__.py
diff --git a/llama_stack/providers/registry/agents.py b/src/llama_stack/providers/registry/agents.py
similarity index 100%
rename from llama_stack/providers/registry/agents.py
rename to src/llama_stack/providers/registry/agents.py
diff --git a/llama_stack/providers/registry/batches.py b/src/llama_stack/providers/registry/batches.py
similarity index 100%
rename from llama_stack/providers/registry/batches.py
rename to src/llama_stack/providers/registry/batches.py
diff --git a/llama_stack/providers/registry/datasetio.py b/src/llama_stack/providers/registry/datasetio.py
similarity index 100%
rename from llama_stack/providers/registry/datasetio.py
rename to src/llama_stack/providers/registry/datasetio.py
diff --git a/llama_stack/providers/registry/eval.py b/src/llama_stack/providers/registry/eval.py
similarity index 100%
rename from llama_stack/providers/registry/eval.py
rename to src/llama_stack/providers/registry/eval.py
diff --git a/llama_stack/providers/registry/files.py b/src/llama_stack/providers/registry/files.py
similarity index 100%
rename from llama_stack/providers/registry/files.py
rename to src/llama_stack/providers/registry/files.py
diff --git a/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py
similarity index 100%
rename from llama_stack/providers/registry/inference.py
rename to src/llama_stack/providers/registry/inference.py
diff --git a/llama_stack/providers/registry/post_training.py b/src/llama_stack/providers/registry/post_training.py
similarity index 100%
rename from llama_stack/providers/registry/post_training.py
rename to src/llama_stack/providers/registry/post_training.py
diff --git a/llama_stack/providers/registry/safety.py b/src/llama_stack/providers/registry/safety.py
similarity index 100%
rename from llama_stack/providers/registry/safety.py
rename to src/llama_stack/providers/registry/safety.py
diff --git a/llama_stack/providers/registry/scoring.py b/src/llama_stack/providers/registry/scoring.py
similarity index 100%
rename from llama_stack/providers/registry/scoring.py
rename to src/llama_stack/providers/registry/scoring.py
diff --git a/llama_stack/providers/registry/tool_runtime.py b/src/llama_stack/providers/registry/tool_runtime.py
similarity index 100%
rename from llama_stack/providers/registry/tool_runtime.py
rename to src/llama_stack/providers/registry/tool_runtime.py
diff --git a/llama_stack/providers/registry/vector_io.py b/src/llama_stack/providers/registry/vector_io.py
similarity index 100%
rename from llama_stack/providers/registry/vector_io.py
rename to src/llama_stack/providers/registry/vector_io.py
diff --git a/llama_stack/providers/remote/__init__.py b/src/llama_stack/providers/remote/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/__init__.py
rename to src/llama_stack/providers/remote/__init__.py
diff --git a/llama_stack/providers/remote/agents/__init__.py b/src/llama_stack/providers/remote/agents/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/agents/__init__.py
rename to src/llama_stack/providers/remote/agents/__init__.py
diff --git a/llama_stack/providers/remote/datasetio/__init__.py b/src/llama_stack/providers/remote/datasetio/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/__init__.py
rename to src/llama_stack/providers/remote/datasetio/__init__.py
diff --git a/llama_stack/providers/remote/datasetio/huggingface/__init__.py b/src/llama_stack/providers/remote/datasetio/huggingface/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/huggingface/__init__.py
rename to src/llama_stack/providers/remote/datasetio/huggingface/__init__.py
diff --git a/llama_stack/providers/remote/datasetio/huggingface/config.py b/src/llama_stack/providers/remote/datasetio/huggingface/config.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/huggingface/config.py
rename to src/llama_stack/providers/remote/datasetio/huggingface/config.py
diff --git a/llama_stack/providers/remote/datasetio/huggingface/huggingface.py b/src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/huggingface/huggingface.py
rename to src/llama_stack/providers/remote/datasetio/huggingface/huggingface.py
diff --git a/llama_stack/providers/remote/datasetio/nvidia/README.md b/src/llama_stack/providers/remote/datasetio/nvidia/README.md
similarity index 100%
rename from llama_stack/providers/remote/datasetio/nvidia/README.md
rename to src/llama_stack/providers/remote/datasetio/nvidia/README.md
diff --git a/llama_stack/providers/remote/datasetio/nvidia/__init__.py b/src/llama_stack/providers/remote/datasetio/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/nvidia/__init__.py
rename to src/llama_stack/providers/remote/datasetio/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/datasetio/nvidia/config.py b/src/llama_stack/providers/remote/datasetio/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/nvidia/config.py
rename to src/llama_stack/providers/remote/datasetio/nvidia/config.py
diff --git a/llama_stack/providers/remote/datasetio/nvidia/datasetio.py b/src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
similarity index 100%
rename from llama_stack/providers/remote/datasetio/nvidia/datasetio.py
rename to src/llama_stack/providers/remote/datasetio/nvidia/datasetio.py
diff --git a/llama_stack/providers/remote/eval/__init__.py b/src/llama_stack/providers/remote/eval/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/eval/__init__.py
rename to src/llama_stack/providers/remote/eval/__init__.py
diff --git a/llama_stack/providers/remote/eval/nvidia/README.md b/src/llama_stack/providers/remote/eval/nvidia/README.md
similarity index 100%
rename from llama_stack/providers/remote/eval/nvidia/README.md
rename to src/llama_stack/providers/remote/eval/nvidia/README.md
diff --git a/llama_stack/providers/remote/eval/nvidia/__init__.py b/src/llama_stack/providers/remote/eval/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/eval/nvidia/__init__.py
rename to src/llama_stack/providers/remote/eval/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/eval/nvidia/config.py b/src/llama_stack/providers/remote/eval/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/eval/nvidia/config.py
rename to src/llama_stack/providers/remote/eval/nvidia/config.py
diff --git a/llama_stack/providers/remote/eval/nvidia/eval.py b/src/llama_stack/providers/remote/eval/nvidia/eval.py
similarity index 100%
rename from llama_stack/providers/remote/eval/nvidia/eval.py
rename to src/llama_stack/providers/remote/eval/nvidia/eval.py
diff --git a/llama_stack/providers/remote/files/s3/README.md b/src/llama_stack/providers/remote/files/s3/README.md
similarity index 100%
rename from llama_stack/providers/remote/files/s3/README.md
rename to src/llama_stack/providers/remote/files/s3/README.md
diff --git a/llama_stack/providers/remote/files/s3/__init__.py b/src/llama_stack/providers/remote/files/s3/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/files/s3/__init__.py
rename to src/llama_stack/providers/remote/files/s3/__init__.py
diff --git a/llama_stack/providers/remote/files/s3/config.py b/src/llama_stack/providers/remote/files/s3/config.py
similarity index 100%
rename from llama_stack/providers/remote/files/s3/config.py
rename to src/llama_stack/providers/remote/files/s3/config.py
diff --git a/llama_stack/providers/remote/files/s3/files.py b/src/llama_stack/providers/remote/files/s3/files.py
similarity index 100%
rename from llama_stack/providers/remote/files/s3/files.py
rename to src/llama_stack/providers/remote/files/s3/files.py
diff --git a/llama_stack/providers/remote/inference/__init__.py b/src/llama_stack/providers/remote/inference/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/__init__.py
rename to src/llama_stack/providers/remote/inference/__init__.py
diff --git a/llama_stack/providers/remote/inference/anthropic/__init__.py b/src/llama_stack/providers/remote/inference/anthropic/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/anthropic/__init__.py
rename to src/llama_stack/providers/remote/inference/anthropic/__init__.py
diff --git a/llama_stack/providers/remote/inference/anthropic/anthropic.py b/src/llama_stack/providers/remote/inference/anthropic/anthropic.py
similarity index 100%
rename from llama_stack/providers/remote/inference/anthropic/anthropic.py
rename to src/llama_stack/providers/remote/inference/anthropic/anthropic.py
diff --git a/llama_stack/providers/remote/inference/anthropic/config.py b/src/llama_stack/providers/remote/inference/anthropic/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/anthropic/config.py
rename to src/llama_stack/providers/remote/inference/anthropic/config.py
diff --git a/llama_stack/providers/remote/inference/azure/__init__.py b/src/llama_stack/providers/remote/inference/azure/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/azure/__init__.py
rename to src/llama_stack/providers/remote/inference/azure/__init__.py
diff --git a/llama_stack/providers/remote/inference/azure/azure.py b/src/llama_stack/providers/remote/inference/azure/azure.py
similarity index 100%
rename from llama_stack/providers/remote/inference/azure/azure.py
rename to src/llama_stack/providers/remote/inference/azure/azure.py
diff --git a/llama_stack/providers/remote/inference/azure/config.py b/src/llama_stack/providers/remote/inference/azure/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/azure/config.py
rename to src/llama_stack/providers/remote/inference/azure/config.py
diff --git a/llama_stack/providers/remote/inference/bedrock/__init__.py b/src/llama_stack/providers/remote/inference/bedrock/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/bedrock/__init__.py
rename to src/llama_stack/providers/remote/inference/bedrock/__init__.py
diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py
similarity index 100%
rename from llama_stack/providers/remote/inference/bedrock/bedrock.py
rename to src/llama_stack/providers/remote/inference/bedrock/bedrock.py
diff --git a/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/bedrock/config.py
rename to src/llama_stack/providers/remote/inference/bedrock/config.py
diff --git a/llama_stack/providers/remote/inference/bedrock/models.py b/src/llama_stack/providers/remote/inference/bedrock/models.py
similarity index 100%
rename from llama_stack/providers/remote/inference/bedrock/models.py
rename to src/llama_stack/providers/remote/inference/bedrock/models.py
diff --git a/llama_stack/providers/remote/inference/cerebras/__init__.py b/src/llama_stack/providers/remote/inference/cerebras/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/cerebras/__init__.py
rename to src/llama_stack/providers/remote/inference/cerebras/__init__.py
diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/src/llama_stack/providers/remote/inference/cerebras/cerebras.py
similarity index 100%
rename from llama_stack/providers/remote/inference/cerebras/cerebras.py
rename to src/llama_stack/providers/remote/inference/cerebras/cerebras.py
diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/src/llama_stack/providers/remote/inference/cerebras/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/cerebras/config.py
rename to src/llama_stack/providers/remote/inference/cerebras/config.py
diff --git a/llama_stack/providers/remote/inference/databricks/__init__.py b/src/llama_stack/providers/remote/inference/databricks/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/databricks/__init__.py
rename to src/llama_stack/providers/remote/inference/databricks/__init__.py
diff --git a/llama_stack/providers/remote/inference/databricks/config.py b/src/llama_stack/providers/remote/inference/databricks/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/databricks/config.py
rename to src/llama_stack/providers/remote/inference/databricks/config.py
diff --git a/llama_stack/providers/remote/inference/databricks/databricks.py b/src/llama_stack/providers/remote/inference/databricks/databricks.py
similarity index 100%
rename from llama_stack/providers/remote/inference/databricks/databricks.py
rename to src/llama_stack/providers/remote/inference/databricks/databricks.py
diff --git a/llama_stack/providers/remote/inference/fireworks/__init__.py b/src/llama_stack/providers/remote/inference/fireworks/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/fireworks/__init__.py
rename to src/llama_stack/providers/remote/inference/fireworks/__init__.py
diff --git a/llama_stack/providers/remote/inference/fireworks/config.py b/src/llama_stack/providers/remote/inference/fireworks/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/fireworks/config.py
rename to src/llama_stack/providers/remote/inference/fireworks/config.py
diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/src/llama_stack/providers/remote/inference/fireworks/fireworks.py
similarity index 100%
rename from llama_stack/providers/remote/inference/fireworks/fireworks.py
rename to src/llama_stack/providers/remote/inference/fireworks/fireworks.py
diff --git a/llama_stack/providers/remote/inference/gemini/__init__.py b/src/llama_stack/providers/remote/inference/gemini/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/gemini/__init__.py
rename to src/llama_stack/providers/remote/inference/gemini/__init__.py
diff --git a/llama_stack/providers/remote/inference/gemini/config.py b/src/llama_stack/providers/remote/inference/gemini/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/gemini/config.py
rename to src/llama_stack/providers/remote/inference/gemini/config.py
diff --git a/llama_stack/providers/remote/inference/gemini/gemini.py b/src/llama_stack/providers/remote/inference/gemini/gemini.py
similarity index 100%
rename from llama_stack/providers/remote/inference/gemini/gemini.py
rename to src/llama_stack/providers/remote/inference/gemini/gemini.py
diff --git a/llama_stack/providers/remote/inference/groq/__init__.py b/src/llama_stack/providers/remote/inference/groq/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/groq/__init__.py
rename to src/llama_stack/providers/remote/inference/groq/__init__.py
diff --git a/llama_stack/providers/remote/inference/groq/config.py b/src/llama_stack/providers/remote/inference/groq/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/groq/config.py
rename to src/llama_stack/providers/remote/inference/groq/config.py
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/src/llama_stack/providers/remote/inference/groq/groq.py
similarity index 100%
rename from llama_stack/providers/remote/inference/groq/groq.py
rename to src/llama_stack/providers/remote/inference/groq/groq.py
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/llama_openai_compat/__init__.py
rename to src/llama_stack/providers/remote/inference/llama_openai_compat/__init__.py
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/config.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/llama_openai_compat/config.py
rename to src/llama_stack/providers/remote/inference/llama_openai_compat/config.py
diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
similarity index 100%
rename from llama_stack/providers/remote/inference/llama_openai_compat/llama.py
rename to src/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/NVIDIA.md
rename to src/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
diff --git a/llama_stack/providers/remote/inference/nvidia/__init__.py b/src/llama_stack/providers/remote/inference/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/__init__.py
rename to src/llama_stack/providers/remote/inference/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/inference/nvidia/config.py b/src/llama_stack/providers/remote/inference/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/config.py
rename to src/llama_stack/providers/remote/inference/nvidia/config.py
diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/src/llama_stack/providers/remote/inference/nvidia/nvidia.py
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/nvidia.py
rename to src/llama_stack/providers/remote/inference/nvidia/nvidia.py
diff --git a/llama_stack/providers/remote/inference/nvidia/utils.py b/src/llama_stack/providers/remote/inference/nvidia/utils.py
similarity index 100%
rename from llama_stack/providers/remote/inference/nvidia/utils.py
rename to src/llama_stack/providers/remote/inference/nvidia/utils.py
diff --git a/llama_stack/providers/remote/inference/ollama/__init__.py b/src/llama_stack/providers/remote/inference/ollama/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/ollama/__init__.py
rename to src/llama_stack/providers/remote/inference/ollama/__init__.py
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/src/llama_stack/providers/remote/inference/ollama/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/ollama/config.py
rename to src/llama_stack/providers/remote/inference/ollama/config.py
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/src/llama_stack/providers/remote/inference/ollama/ollama.py
similarity index 100%
rename from llama_stack/providers/remote/inference/ollama/ollama.py
rename to src/llama_stack/providers/remote/inference/ollama/ollama.py
diff --git a/llama_stack/providers/remote/inference/openai/__init__.py b/src/llama_stack/providers/remote/inference/openai/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/openai/__init__.py
rename to src/llama_stack/providers/remote/inference/openai/__init__.py
diff --git a/llama_stack/providers/remote/inference/openai/config.py b/src/llama_stack/providers/remote/inference/openai/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/openai/config.py
rename to src/llama_stack/providers/remote/inference/openai/config.py
diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/src/llama_stack/providers/remote/inference/openai/openai.py
similarity index 100%
rename from llama_stack/providers/remote/inference/openai/openai.py
rename to src/llama_stack/providers/remote/inference/openai/openai.py
diff --git a/llama_stack/providers/remote/inference/passthrough/__init__.py b/src/llama_stack/providers/remote/inference/passthrough/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/passthrough/__init__.py
rename to src/llama_stack/providers/remote/inference/passthrough/__init__.py
diff --git a/llama_stack/providers/remote/inference/passthrough/config.py b/src/llama_stack/providers/remote/inference/passthrough/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/passthrough/config.py
rename to src/llama_stack/providers/remote/inference/passthrough/config.py
diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/src/llama_stack/providers/remote/inference/passthrough/passthrough.py
similarity index 100%
rename from llama_stack/providers/remote/inference/passthrough/passthrough.py
rename to src/llama_stack/providers/remote/inference/passthrough/passthrough.py
diff --git a/llama_stack/providers/remote/inference/runpod/__init__.py b/src/llama_stack/providers/remote/inference/runpod/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/runpod/__init__.py
rename to src/llama_stack/providers/remote/inference/runpod/__init__.py
diff --git a/llama_stack/providers/remote/inference/runpod/config.py b/src/llama_stack/providers/remote/inference/runpod/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/runpod/config.py
rename to src/llama_stack/providers/remote/inference/runpod/config.py
diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/src/llama_stack/providers/remote/inference/runpod/runpod.py
similarity index 100%
rename from llama_stack/providers/remote/inference/runpod/runpod.py
rename to src/llama_stack/providers/remote/inference/runpod/runpod.py
diff --git a/llama_stack/providers/remote/inference/sambanova/__init__.py b/src/llama_stack/providers/remote/inference/sambanova/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/sambanova/__init__.py
rename to src/llama_stack/providers/remote/inference/sambanova/__init__.py
diff --git a/llama_stack/providers/remote/inference/sambanova/config.py b/src/llama_stack/providers/remote/inference/sambanova/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/sambanova/config.py
rename to src/llama_stack/providers/remote/inference/sambanova/config.py
diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/src/llama_stack/providers/remote/inference/sambanova/sambanova.py
similarity index 100%
rename from llama_stack/providers/remote/inference/sambanova/sambanova.py
rename to src/llama_stack/providers/remote/inference/sambanova/sambanova.py
diff --git a/llama_stack/providers/remote/inference/tgi/__init__.py b/src/llama_stack/providers/remote/inference/tgi/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/tgi/__init__.py
rename to src/llama_stack/providers/remote/inference/tgi/__init__.py
diff --git a/llama_stack/providers/remote/inference/tgi/config.py b/src/llama_stack/providers/remote/inference/tgi/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/tgi/config.py
rename to src/llama_stack/providers/remote/inference/tgi/config.py
diff --git a/llama_stack/providers/remote/inference/tgi/tgi.py b/src/llama_stack/providers/remote/inference/tgi/tgi.py
similarity index 100%
rename from llama_stack/providers/remote/inference/tgi/tgi.py
rename to src/llama_stack/providers/remote/inference/tgi/tgi.py
diff --git a/llama_stack/providers/remote/inference/together/__init__.py b/src/llama_stack/providers/remote/inference/together/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/together/__init__.py
rename to src/llama_stack/providers/remote/inference/together/__init__.py
diff --git a/llama_stack/providers/remote/inference/together/config.py b/src/llama_stack/providers/remote/inference/together/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/together/config.py
rename to src/llama_stack/providers/remote/inference/together/config.py
diff --git a/llama_stack/providers/remote/inference/together/together.py b/src/llama_stack/providers/remote/inference/together/together.py
similarity index 100%
rename from llama_stack/providers/remote/inference/together/together.py
rename to src/llama_stack/providers/remote/inference/together/together.py
diff --git a/llama_stack/providers/remote/inference/vertexai/__init__.py b/src/llama_stack/providers/remote/inference/vertexai/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vertexai/__init__.py
rename to src/llama_stack/providers/remote/inference/vertexai/__init__.py
diff --git a/llama_stack/providers/remote/inference/vertexai/config.py b/src/llama_stack/providers/remote/inference/vertexai/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vertexai/config.py
rename to src/llama_stack/providers/remote/inference/vertexai/config.py
diff --git a/llama_stack/providers/remote/inference/vertexai/vertexai.py b/src/llama_stack/providers/remote/inference/vertexai/vertexai.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vertexai/vertexai.py
rename to src/llama_stack/providers/remote/inference/vertexai/vertexai.py
diff --git a/llama_stack/providers/remote/inference/vllm/__init__.py b/src/llama_stack/providers/remote/inference/vllm/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vllm/__init__.py
rename to src/llama_stack/providers/remote/inference/vllm/__init__.py
diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/src/llama_stack/providers/remote/inference/vllm/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vllm/config.py
rename to src/llama_stack/providers/remote/inference/vllm/config.py
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/src/llama_stack/providers/remote/inference/vllm/vllm.py
similarity index 100%
rename from llama_stack/providers/remote/inference/vllm/vllm.py
rename to src/llama_stack/providers/remote/inference/vllm/vllm.py
diff --git a/llama_stack/providers/remote/inference/watsonx/__init__.py b/src/llama_stack/providers/remote/inference/watsonx/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/inference/watsonx/__init__.py
rename to src/llama_stack/providers/remote/inference/watsonx/__init__.py
diff --git a/llama_stack/providers/remote/inference/watsonx/config.py b/src/llama_stack/providers/remote/inference/watsonx/config.py
similarity index 100%
rename from llama_stack/providers/remote/inference/watsonx/config.py
rename to src/llama_stack/providers/remote/inference/watsonx/config.py
diff --git a/llama_stack/providers/remote/inference/watsonx/watsonx.py b/src/llama_stack/providers/remote/inference/watsonx/watsonx.py
similarity index 100%
rename from llama_stack/providers/remote/inference/watsonx/watsonx.py
rename to src/llama_stack/providers/remote/inference/watsonx/watsonx.py
diff --git a/llama_stack/providers/remote/post_training/__init__.py b/src/llama_stack/providers/remote/post_training/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/__init__.py
rename to src/llama_stack/providers/remote/post_training/__init__.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/src/llama_stack/providers/remote/post_training/nvidia/README.md
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/README.md
rename to src/llama_stack/providers/remote/post_training/nvidia/README.md
diff --git a/llama_stack/providers/remote/post_training/nvidia/__init__.py b/src/llama_stack/providers/remote/post_training/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/__init__.py
rename to src/llama_stack/providers/remote/post_training/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/config.py b/src/llama_stack/providers/remote/post_training/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/config.py
rename to src/llama_stack/providers/remote/post_training/nvidia/config.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/models.py b/src/llama_stack/providers/remote/post_training/nvidia/models.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/models.py
rename to src/llama_stack/providers/remote/post_training/nvidia/models.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/post_training.py b/src/llama_stack/providers/remote/post_training/nvidia/post_training.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/post_training.py
rename to src/llama_stack/providers/remote/post_training/nvidia/post_training.py
diff --git a/llama_stack/providers/remote/post_training/nvidia/utils.py b/src/llama_stack/providers/remote/post_training/nvidia/utils.py
similarity index 100%
rename from llama_stack/providers/remote/post_training/nvidia/utils.py
rename to src/llama_stack/providers/remote/post_training/nvidia/utils.py
diff --git a/llama_stack/providers/remote/safety/__init__.py b/src/llama_stack/providers/remote/safety/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/safety/__init__.py
rename to src/llama_stack/providers/remote/safety/__init__.py
diff --git a/llama_stack/providers/remote/safety/bedrock/__init__.py b/src/llama_stack/providers/remote/safety/bedrock/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/safety/bedrock/__init__.py
rename to src/llama_stack/providers/remote/safety/bedrock/__init__.py
diff --git a/llama_stack/providers/remote/safety/bedrock/bedrock.py b/src/llama_stack/providers/remote/safety/bedrock/bedrock.py
similarity index 100%
rename from llama_stack/providers/remote/safety/bedrock/bedrock.py
rename to src/llama_stack/providers/remote/safety/bedrock/bedrock.py
diff --git a/llama_stack/providers/remote/safety/bedrock/config.py b/src/llama_stack/providers/remote/safety/bedrock/config.py
similarity index 100%
rename from llama_stack/providers/remote/safety/bedrock/config.py
rename to src/llama_stack/providers/remote/safety/bedrock/config.py
diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/src/llama_stack/providers/remote/safety/nvidia/README.md
similarity index 100%
rename from llama_stack/providers/remote/safety/nvidia/README.md
rename to src/llama_stack/providers/remote/safety/nvidia/README.md
diff --git a/llama_stack/providers/remote/safety/nvidia/__init__.py b/src/llama_stack/providers/remote/safety/nvidia/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/safety/nvidia/__init__.py
rename to src/llama_stack/providers/remote/safety/nvidia/__init__.py
diff --git a/llama_stack/providers/remote/safety/nvidia/config.py b/src/llama_stack/providers/remote/safety/nvidia/config.py
similarity index 100%
rename from llama_stack/providers/remote/safety/nvidia/config.py
rename to src/llama_stack/providers/remote/safety/nvidia/config.py
diff --git a/llama_stack/providers/remote/safety/nvidia/nvidia.py b/src/llama_stack/providers/remote/safety/nvidia/nvidia.py
similarity index 100%
rename from llama_stack/providers/remote/safety/nvidia/nvidia.py
rename to src/llama_stack/providers/remote/safety/nvidia/nvidia.py
diff --git a/llama_stack/providers/remote/safety/sambanova/__init__.py b/src/llama_stack/providers/remote/safety/sambanova/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/safety/sambanova/__init__.py
rename to src/llama_stack/providers/remote/safety/sambanova/__init__.py
diff --git a/llama_stack/providers/remote/safety/sambanova/config.py b/src/llama_stack/providers/remote/safety/sambanova/config.py
similarity index 100%
rename from llama_stack/providers/remote/safety/sambanova/config.py
rename to src/llama_stack/providers/remote/safety/sambanova/config.py
diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/src/llama_stack/providers/remote/safety/sambanova/sambanova.py
similarity index 100%
rename from llama_stack/providers/remote/safety/sambanova/sambanova.py
rename to src/llama_stack/providers/remote/safety/sambanova/sambanova.py
diff --git a/llama_stack/providers/remote/tool_runtime/__init__.py b/src/llama_stack/providers/remote/tool_runtime/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/bing_search/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/bing_search/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
rename to src/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py
diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/config.py b/src/llama_stack/providers/remote/tool_runtime/bing_search/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/bing_search/config.py
rename to src/llama_stack/providers/remote/tool_runtime/bing_search/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/brave_search/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/brave_search/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
rename to src/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/config.py b/src/llama_stack/providers/remote/tool_runtime/brave_search/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/brave_search/config.py
rename to src/llama_stack/providers/remote/tool_runtime/brave_search/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/model_context_protocol/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
rename to src/llama_stack/providers/remote/tool_runtime/model_context_protocol/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
rename to src/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/tavily_search/config.py
rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
rename to src/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py
rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/__init__.py
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/config.py
diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
similarity index 100%
rename from llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
rename to src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
diff --git a/llama_stack/providers/remote/vector_io/__init__.py b/src/llama_stack/providers/remote/vector_io/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/__init__.py
rename to src/llama_stack/providers/remote/vector_io/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/chroma/__init__.py b/src/llama_stack/providers/remote/vector_io/chroma/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/chroma/__init__.py
rename to src/llama_stack/providers/remote/vector_io/chroma/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/src/llama_stack/providers/remote/vector_io/chroma/chroma.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/chroma/chroma.py
rename to src/llama_stack/providers/remote/vector_io/chroma/chroma.py
diff --git a/llama_stack/providers/remote/vector_io/chroma/config.py b/src/llama_stack/providers/remote/vector_io/chroma/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/chroma/config.py
rename to src/llama_stack/providers/remote/vector_io/chroma/config.py
diff --git a/llama_stack/providers/remote/vector_io/milvus/__init__.py b/src/llama_stack/providers/remote/vector_io/milvus/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/milvus/__init__.py
rename to src/llama_stack/providers/remote/vector_io/milvus/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/src/llama_stack/providers/remote/vector_io/milvus/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/milvus/config.py
rename to src/llama_stack/providers/remote/vector_io/milvus/config.py
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/src/llama_stack/providers/remote/vector_io/milvus/milvus.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/milvus/milvus.py
rename to src/llama_stack/providers/remote/vector_io/milvus/milvus.py
diff --git a/llama_stack/providers/remote/vector_io/pgvector/__init__.py b/src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/pgvector/__init__.py
rename to src/llama_stack/providers/remote/vector_io/pgvector/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/pgvector/config.py b/src/llama_stack/providers/remote/vector_io/pgvector/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/pgvector/config.py
rename to src/llama_stack/providers/remote/vector_io/pgvector/config.py
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/pgvector/pgvector.py
rename to src/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
diff --git a/llama_stack/providers/remote/vector_io/qdrant/__init__.py b/src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/qdrant/__init__.py
rename to src/llama_stack/providers/remote/vector_io/qdrant/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/src/llama_stack/providers/remote/vector_io/qdrant/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/qdrant/config.py
rename to src/llama_stack/providers/remote/vector_io/qdrant/config.py
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/qdrant/qdrant.py
rename to src/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
diff --git a/llama_stack/providers/remote/vector_io/weaviate/__init__.py b/src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/weaviate/__init__.py
rename to src/llama_stack/providers/remote/vector_io/weaviate/__init__.py
diff --git a/llama_stack/providers/remote/vector_io/weaviate/config.py b/src/llama_stack/providers/remote/vector_io/weaviate/config.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/weaviate/config.py
rename to src/llama_stack/providers/remote/vector_io/weaviate/config.py
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
similarity index 100%
rename from llama_stack/providers/remote/vector_io/weaviate/weaviate.py
rename to src/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
diff --git a/llama_stack/providers/utils/__init__.py b/src/llama_stack/providers/utils/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/__init__.py
rename to src/llama_stack/providers/utils/__init__.py
diff --git a/llama_stack/providers/utils/bedrock/__init__.py b/src/llama_stack/providers/utils/bedrock/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/bedrock/__init__.py
rename to src/llama_stack/providers/utils/bedrock/__init__.py
diff --git a/llama_stack/providers/utils/bedrock/client.py b/src/llama_stack/providers/utils/bedrock/client.py
similarity index 100%
rename from llama_stack/providers/utils/bedrock/client.py
rename to src/llama_stack/providers/utils/bedrock/client.py
diff --git a/llama_stack/providers/utils/bedrock/config.py b/src/llama_stack/providers/utils/bedrock/config.py
similarity index 100%
rename from llama_stack/providers/utils/bedrock/config.py
rename to src/llama_stack/providers/utils/bedrock/config.py
diff --git a/llama_stack/providers/utils/bedrock/refreshable_boto_session.py b/src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
similarity index 100%
rename from llama_stack/providers/utils/bedrock/refreshable_boto_session.py
rename to src/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
diff --git a/llama_stack/providers/utils/common/__init__.py b/src/llama_stack/providers/utils/common/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/common/__init__.py
rename to src/llama_stack/providers/utils/common/__init__.py
diff --git a/llama_stack/providers/utils/common/data_schema_validator.py b/src/llama_stack/providers/utils/common/data_schema_validator.py
similarity index 100%
rename from llama_stack/providers/utils/common/data_schema_validator.py
rename to src/llama_stack/providers/utils/common/data_schema_validator.py
diff --git a/llama_stack/providers/utils/datasetio/__init__.py b/src/llama_stack/providers/utils/datasetio/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/datasetio/__init__.py
rename to src/llama_stack/providers/utils/datasetio/__init__.py
diff --git a/llama_stack/providers/utils/datasetio/url_utils.py b/src/llama_stack/providers/utils/datasetio/url_utils.py
similarity index 100%
rename from llama_stack/providers/utils/datasetio/url_utils.py
rename to src/llama_stack/providers/utils/datasetio/url_utils.py
diff --git a/llama_stack/providers/utils/files/__init__.py b/src/llama_stack/providers/utils/files/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/files/__init__.py
rename to src/llama_stack/providers/utils/files/__init__.py
diff --git a/llama_stack/providers/utils/files/form_data.py b/src/llama_stack/providers/utils/files/form_data.py
similarity index 100%
rename from llama_stack/providers/utils/files/form_data.py
rename to src/llama_stack/providers/utils/files/form_data.py
diff --git a/llama_stack/providers/utils/inference/__init__.py b/src/llama_stack/providers/utils/inference/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/inference/__init__.py
rename to src/llama_stack/providers/utils/inference/__init__.py
diff --git a/llama_stack/providers/utils/inference/embedding_mixin.py b/src/llama_stack/providers/utils/inference/embedding_mixin.py
similarity index 100%
rename from llama_stack/providers/utils/inference/embedding_mixin.py
rename to src/llama_stack/providers/utils/inference/embedding_mixin.py
diff --git a/llama_stack/providers/utils/inference/inference_store.py b/src/llama_stack/providers/utils/inference/inference_store.py
similarity index 100%
rename from llama_stack/providers/utils/inference/inference_store.py
rename to src/llama_stack/providers/utils/inference/inference_store.py
diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
similarity index 100%
rename from llama_stack/providers/utils/inference/litellm_openai_mixin.py
rename to src/llama_stack/providers/utils/inference/litellm_openai_mixin.py
diff --git a/llama_stack/providers/utils/inference/model_registry.py b/src/llama_stack/providers/utils/inference/model_registry.py
similarity index 100%
rename from llama_stack/providers/utils/inference/model_registry.py
rename to src/llama_stack/providers/utils/inference/model_registry.py
diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/src/llama_stack/providers/utils/inference/openai_compat.py
similarity index 100%
rename from llama_stack/providers/utils/inference/openai_compat.py
rename to src/llama_stack/providers/utils/inference/openai_compat.py
diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/src/llama_stack/providers/utils/inference/openai_mixin.py
similarity index 100%
rename from llama_stack/providers/utils/inference/openai_mixin.py
rename to src/llama_stack/providers/utils/inference/openai_mixin.py
diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/src/llama_stack/providers/utils/inference/prompt_adapter.py
similarity index 100%
rename from llama_stack/providers/utils/inference/prompt_adapter.py
rename to src/llama_stack/providers/utils/inference/prompt_adapter.py
diff --git a/llama_stack/providers/utils/kvstore/__init__.py b/src/llama_stack/providers/utils/kvstore/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/__init__.py
rename to src/llama_stack/providers/utils/kvstore/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/api.py b/src/llama_stack/providers/utils/kvstore/api.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/api.py
rename to src/llama_stack/providers/utils/kvstore/api.py
diff --git a/llama_stack/providers/utils/kvstore/config.py b/src/llama_stack/providers/utils/kvstore/config.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/config.py
rename to src/llama_stack/providers/utils/kvstore/config.py
diff --git a/llama_stack/providers/utils/kvstore/kvstore.py b/src/llama_stack/providers/utils/kvstore/kvstore.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/kvstore.py
rename to src/llama_stack/providers/utils/kvstore/kvstore.py
diff --git a/llama_stack/providers/utils/kvstore/mongodb/__init__.py b/src/llama_stack/providers/utils/kvstore/mongodb/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/mongodb/__init__.py
rename to src/llama_stack/providers/utils/kvstore/mongodb/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/mongodb/mongodb.py b/src/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/mongodb/mongodb.py
rename to src/llama_stack/providers/utils/kvstore/mongodb/mongodb.py
diff --git a/llama_stack/providers/utils/kvstore/postgres/__init__.py b/src/llama_stack/providers/utils/kvstore/postgres/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/postgres/__init__.py
rename to src/llama_stack/providers/utils/kvstore/postgres/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/postgres/postgres.py b/src/llama_stack/providers/utils/kvstore/postgres/postgres.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/postgres/postgres.py
rename to src/llama_stack/providers/utils/kvstore/postgres/postgres.py
diff --git a/llama_stack/providers/utils/kvstore/redis/__init__.py b/src/llama_stack/providers/utils/kvstore/redis/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/redis/__init__.py
rename to src/llama_stack/providers/utils/kvstore/redis/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/redis/redis.py b/src/llama_stack/providers/utils/kvstore/redis/redis.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/redis/redis.py
rename to src/llama_stack/providers/utils/kvstore/redis/redis.py
diff --git a/llama_stack/providers/utils/kvstore/sqlite/__init__.py b/src/llama_stack/providers/utils/kvstore/sqlite/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/sqlite/__init__.py
rename to src/llama_stack/providers/utils/kvstore/sqlite/__init__.py
diff --git a/llama_stack/providers/utils/kvstore/sqlite/config.py b/src/llama_stack/providers/utils/kvstore/sqlite/config.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/sqlite/config.py
rename to src/llama_stack/providers/utils/kvstore/sqlite/config.py
diff --git a/llama_stack/providers/utils/kvstore/sqlite/sqlite.py b/src/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
similarity index 100%
rename from llama_stack/providers/utils/kvstore/sqlite/sqlite.py
rename to src/llama_stack/providers/utils/kvstore/sqlite/sqlite.py
diff --git a/llama_stack/providers/utils/memory/__init__.py b/src/llama_stack/providers/utils/memory/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/memory/__init__.py
rename to src/llama_stack/providers/utils/memory/__init__.py
diff --git a/llama_stack/providers/utils/memory/file_utils.py b/src/llama_stack/providers/utils/memory/file_utils.py
similarity index 100%
rename from llama_stack/providers/utils/memory/file_utils.py
rename to src/llama_stack/providers/utils/memory/file_utils.py
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
similarity index 100%
rename from llama_stack/providers/utils/memory/openai_vector_store_mixin.py
rename to src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/src/llama_stack/providers/utils/memory/vector_store.py
similarity index 100%
rename from llama_stack/providers/utils/memory/vector_store.py
rename to src/llama_stack/providers/utils/memory/vector_store.py
diff --git a/llama_stack/providers/utils/pagination.py b/src/llama_stack/providers/utils/pagination.py
similarity index 100%
rename from llama_stack/providers/utils/pagination.py
rename to src/llama_stack/providers/utils/pagination.py
diff --git a/llama_stack/providers/utils/responses/__init__.py b/src/llama_stack/providers/utils/responses/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/responses/__init__.py
rename to src/llama_stack/providers/utils/responses/__init__.py
diff --git a/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py
similarity index 100%
rename from llama_stack/providers/utils/responses/responses_store.py
rename to src/llama_stack/providers/utils/responses/responses_store.py
diff --git a/llama_stack/providers/utils/scheduler.py b/src/llama_stack/providers/utils/scheduler.py
similarity index 100%
rename from llama_stack/providers/utils/scheduler.py
rename to src/llama_stack/providers/utils/scheduler.py
diff --git a/llama_stack/providers/utils/scoring/__init__.py b/src/llama_stack/providers/utils/scoring/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/scoring/__init__.py
rename to src/llama_stack/providers/utils/scoring/__init__.py
diff --git a/llama_stack/providers/utils/scoring/aggregation_utils.py b/src/llama_stack/providers/utils/scoring/aggregation_utils.py
similarity index 100%
rename from llama_stack/providers/utils/scoring/aggregation_utils.py
rename to src/llama_stack/providers/utils/scoring/aggregation_utils.py
diff --git a/llama_stack/providers/utils/scoring/base_scoring_fn.py b/src/llama_stack/providers/utils/scoring/base_scoring_fn.py
similarity index 100%
rename from llama_stack/providers/utils/scoring/base_scoring_fn.py
rename to src/llama_stack/providers/utils/scoring/base_scoring_fn.py
diff --git a/llama_stack/providers/utils/scoring/basic_scoring_utils.py b/src/llama_stack/providers/utils/scoring/basic_scoring_utils.py
similarity index 100%
rename from llama_stack/providers/utils/scoring/basic_scoring_utils.py
rename to src/llama_stack/providers/utils/scoring/basic_scoring_utils.py
diff --git a/llama_stack/providers/utils/sqlstore/__init__.py b/src/llama_stack/providers/utils/sqlstore/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/__init__.py
rename to src/llama_stack/providers/utils/sqlstore/__init__.py
diff --git a/llama_stack/providers/utils/sqlstore/api.py b/src/llama_stack/providers/utils/sqlstore/api.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/api.py
rename to src/llama_stack/providers/utils/sqlstore/api.py
diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
rename to src/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
diff --git a/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
rename to src/llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py
diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/src/llama_stack/providers/utils/sqlstore/sqlstore.py
similarity index 100%
rename from llama_stack/providers/utils/sqlstore/sqlstore.py
rename to src/llama_stack/providers/utils/sqlstore/sqlstore.py
diff --git a/llama_stack/providers/utils/tools/__init__.py b/src/llama_stack/providers/utils/tools/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/tools/__init__.py
rename to src/llama_stack/providers/utils/tools/__init__.py
diff --git a/llama_stack/providers/utils/tools/mcp.py b/src/llama_stack/providers/utils/tools/mcp.py
similarity index 100%
rename from llama_stack/providers/utils/tools/mcp.py
rename to src/llama_stack/providers/utils/tools/mcp.py
diff --git a/llama_stack/providers/utils/tools/ttl_dict.py b/src/llama_stack/providers/utils/tools/ttl_dict.py
similarity index 100%
rename from llama_stack/providers/utils/tools/ttl_dict.py
rename to src/llama_stack/providers/utils/tools/ttl_dict.py
diff --git a/llama_stack/providers/utils/vector_io/__init__.py b/src/llama_stack/providers/utils/vector_io/__init__.py
similarity index 100%
rename from llama_stack/providers/utils/vector_io/__init__.py
rename to src/llama_stack/providers/utils/vector_io/__init__.py
diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/src/llama_stack/providers/utils/vector_io/vector_utils.py
similarity index 100%
rename from llama_stack/providers/utils/vector_io/vector_utils.py
rename to src/llama_stack/providers/utils/vector_io/vector_utils.py
diff --git a/llama_stack/schema_utils.py b/src/llama_stack/schema_utils.py
similarity index 100%
rename from llama_stack/schema_utils.py
rename to src/llama_stack/schema_utils.py
diff --git a/llama_stack/strong_typing/__init__.py b/src/llama_stack/strong_typing/__init__.py
similarity index 100%
rename from llama_stack/strong_typing/__init__.py
rename to src/llama_stack/strong_typing/__init__.py
diff --git a/llama_stack/strong_typing/auxiliary.py b/src/llama_stack/strong_typing/auxiliary.py
similarity index 89%
rename from llama_stack/strong_typing/auxiliary.py
rename to src/llama_stack/strong_typing/auxiliary.py
index 965ffa079..eb067b38b 100644
--- a/llama_stack/strong_typing/auxiliary.py
+++ b/src/llama_stack/strong_typing/auxiliary.py
@@ -12,23 +12,24 @@ Type-safe data interchange for Python data classes.
 
 import dataclasses
 import sys
+from collections.abc import Callable
 from dataclasses import is_dataclass
-from typing import Callable, Dict, Optional, Type, TypeVar, Union, overload
+from typing import TypeVar, overload
 
 if sys.version_info >= (3, 9):
     from typing import Annotated as Annotated
 else:
-    from typing_extensions import Annotated as Annotated
+    from typing import Annotated as Annotated
 
 if sys.version_info >= (3, 10):
     from typing import TypeAlias as TypeAlias
 else:
-    from typing_extensions import TypeAlias as TypeAlias
+    from typing import TypeAlias as TypeAlias
 
 if sys.version_info >= (3, 11):
     from typing import dataclass_transform as dataclass_transform
 else:
-    from typing_extensions import dataclass_transform as dataclass_transform
+    from typing import dataclass_transform as dataclass_transform
 
 T = TypeVar("T")
 
@@ -56,17 +57,17 @@ class CompactDataClass:
 
 
 @overload
-def typeannotation(cls: Type[T], /) -> Type[T]: ...
+def typeannotation(cls: type[T], /) -> type[T]: ...
 
 
 @overload
-def typeannotation(cls: None, *, eq: bool = True, order: bool = False) -> Callable[[Type[T]], Type[T]]: ...
+def typeannotation(cls: None, *, eq: bool = True, order: bool = False) -> Callable[[type[T]], type[T]]: ...
 
 
 @dataclass_transform(eq_default=True, order_default=False)
 def typeannotation(
-    cls: Optional[Type[T]] = None, *, eq: bool = True, order: bool = False
-) -> Union[Type[T], Callable[[Type[T]], Type[T]]]:
+    cls: type[T] | None = None, *, eq: bool = True, order: bool = False
+) -> type[T] | Callable[[type[T]], type[T]]:
     """
     Returns the same class as was passed in, with dunder methods added based on the fields defined in the class.
 
@@ -76,7 +77,7 @@ def typeannotation(
     :returns: A data-class type, or a wrapper for data-class types.
     """
 
-    def wrap(cls: Type[T]) -> Type[T]:
+    def wrap(cls: type[T]) -> type[T]:
         # mypy fails to equate bound-y functions (first argument interpreted as
         # the bound object) with class methods, hence the `ignore` directive.
         cls.__repr__ = _compact_dataclass_repr  # type: ignore[method-assign]
@@ -213,7 +214,7 @@ float32: TypeAlias = Annotated[float, Storage(4)]
 float64: TypeAlias = Annotated[float, Storage(8)]
 
 # maps globals of type Annotated[T, ...] defined in this module to their string names
-_auxiliary_types: Dict[object, str] = {}
+_auxiliary_types: dict[object, str] = {}
 module = sys.modules[__name__]
 for var in dir(module):
     typ = getattr(module, var)
@@ -222,7 +223,7 @@ for var in dir(module):
         _auxiliary_types[typ] = var
 
 
-def get_auxiliary_format(data_type: object) -> Optional[str]:
+def get_auxiliary_format(data_type: object) -> str | None:
     "Returns the JSON format string corresponding to an auxiliary type."
 
     return _auxiliary_types.get(data_type)
diff --git a/llama_stack/strong_typing/classdef.py b/src/llama_stack/strong_typing/classdef.py
similarity index 86%
rename from llama_stack/strong_typing/classdef.py
rename to src/llama_stack/strong_typing/classdef.py
index 5ead886d4..e54e3a9d6 100644
--- a/llama_stack/strong_typing/classdef.py
+++ b/src/llama_stack/strong_typing/classdef.py
@@ -17,7 +17,7 @@ import types
 import typing
 import uuid
 from dataclasses import dataclass
-from typing import Any, Dict, List, Literal, Optional, Tuple, Type, TypeVar, Union
+from typing import Any, Literal, TypeVar, Union
 
 from .auxiliary import (
     Alias,
@@ -40,57 +40,57 @@ T = TypeVar("T")
 
 @dataclass
 class JsonSchemaNode:
-    title: Optional[str]
-    description: Optional[str]
+    title: str | None
+    description: str | None
 
 
 @dataclass
 class JsonSchemaType(JsonSchemaNode):
     type: str
-    format: Optional[str]
+    format: str | None
 
 
 @dataclass
 class JsonSchemaBoolean(JsonSchemaType):
     type: Literal["boolean"]
-    const: Optional[bool]
-    default: Optional[bool]
-    examples: Optional[List[bool]]
+    const: bool | None
+    default: bool | None
+    examples: list[bool] | None
 
 
 @dataclass
 class JsonSchemaInteger(JsonSchemaType):
     type: Literal["integer"]
-    const: Optional[int]
-    default: Optional[int]
-    examples: Optional[List[int]]
-    enum: Optional[List[int]]
-    minimum: Optional[int]
-    maximum: Optional[int]
+    const: int | None
+    default: int | None
+    examples: list[int] | None
+    enum: list[int] | None
+    minimum: int | None
+    maximum: int | None
 
 
 @dataclass
 class JsonSchemaNumber(JsonSchemaType):
     type: Literal["number"]
-    const: Optional[float]
-    default: Optional[float]
-    examples: Optional[List[float]]
-    minimum: Optional[float]
-    maximum: Optional[float]
-    exclusiveMinimum: Optional[float]
-    exclusiveMaximum: Optional[float]
-    multipleOf: Optional[float]
+    const: float | None
+    default: float | None
+    examples: list[float] | None
+    minimum: float | None
+    maximum: float | None
+    exclusiveMinimum: float | None
+    exclusiveMaximum: float | None
+    multipleOf: float | None
 
 
 @dataclass
 class JsonSchemaString(JsonSchemaType):
     type: Literal["string"]
-    const: Optional[str]
-    default: Optional[str]
-    examples: Optional[List[str]]
-    enum: Optional[List[str]]
-    minLength: Optional[int]
-    maxLength: Optional[int]
+    const: str | None
+    default: str | None
+    examples: list[str] | None
+    enum: list[str] | None
+    minLength: int | None
+    maxLength: int | None
 
 
 @dataclass
@@ -102,9 +102,9 @@ class JsonSchemaArray(JsonSchemaType):
 @dataclass
 class JsonSchemaObject(JsonSchemaType):
     type: Literal["object"]
-    properties: Optional[Dict[str, "JsonSchemaAny"]]
-    additionalProperties: Optional[bool]
-    required: Optional[List[str]]
+    properties: dict[str, "JsonSchemaAny"] | None
+    additionalProperties: bool | None
+    required: list[str] | None
 
 
 @dataclass
@@ -114,24 +114,24 @@ class JsonSchemaRef(JsonSchemaNode):
 
 @dataclass
 class JsonSchemaAllOf(JsonSchemaNode):
-    allOf: List["JsonSchemaAny"]
+    allOf: list["JsonSchemaAny"]
 
 
 @dataclass
 class JsonSchemaAnyOf(JsonSchemaNode):
-    anyOf: List["JsonSchemaAny"]
+    anyOf: list["JsonSchemaAny"]
 
 
 @dataclass
 class Discriminator:
     propertyName: str
-    mapping: Dict[str, str]
+    mapping: dict[str, str]
 
 
 @dataclass
 class JsonSchemaOneOf(JsonSchemaNode):
-    oneOf: List["JsonSchemaAny"]
-    discriminator: Optional[Discriminator]
+    oneOf: list["JsonSchemaAny"]
+    discriminator: Discriminator | None
 
 
 JsonSchemaAny = Union[
@@ -149,7 +149,7 @@ JsonSchemaAny = Union[
 @dataclass
 class JsonSchemaTopLevelObject(JsonSchemaObject):
     schema: Annotated[str, Alias("$schema")]
-    definitions: Optional[Dict[str, JsonSchemaAny]]
+    definitions: dict[str, JsonSchemaAny] | None
 
 
 def integer_range_to_type(min_value: float, max_value: float) -> type:
@@ -173,11 +173,11 @@ def enum_safe_name(name: str) -> str:
 def enum_values_to_type(
     module: types.ModuleType,
     name: str,
-    values: Dict[str, Any],
-    title: Optional[str] = None,
-    description: Optional[str] = None,
-) -> Type[enum.Enum]:
-    enum_class: Type[enum.Enum] = enum.Enum(name, values)  # type: ignore
+    values: dict[str, Any],
+    title: str | None = None,
+    description: str | None = None,
+) -> type[enum.Enum]:
+    enum_class: type[enum.Enum] = enum.Enum(name, values)  # type: ignore
 
     # assign the newly created type to the same module where the defining class is
     enum_class.__module__ = module.__name__
@@ -330,7 +330,7 @@ def node_to_typedef(module: types.ModuleType, context: str, node: JsonSchemaNode
         type_def = node_to_typedef(module, context, node.items)
         if type_def.default is not dataclasses.MISSING:
             raise TypeError("disallowed: `default` for array element type")
-        list_type = List[(type_def.type,)]  # type: ignore
+        list_type = list[(type_def.type,)]  # type: ignore
         return TypeDef(list_type, dataclasses.MISSING)
 
     elif isinstance(node, JsonSchemaObject):
@@ -344,8 +344,8 @@ def node_to_typedef(module: types.ModuleType, context: str, node: JsonSchemaNode
 
         class_name = context
 
-        fields: List[Tuple[str, Any, dataclasses.Field]] = []
-        params: Dict[str, DocstringParam] = {}
+        fields: list[tuple[str, Any, dataclasses.Field]] = []
+        params: dict[str, DocstringParam] = {}
         for prop_name, prop_node in node.properties.items():
             type_def = node_to_typedef(module, f"{class_name}__{prop_name}", prop_node)
             if prop_name in required:
@@ -388,7 +388,7 @@ class SchemaFlatteningOptions:
     recursive: bool = False
 
 
-def flatten_schema(schema: Schema, *, options: Optional[SchemaFlatteningOptions] = None) -> Schema:
+def flatten_schema(schema: Schema, *, options: SchemaFlatteningOptions | None = None) -> Schema:
     top_node = typing.cast(JsonSchemaTopLevelObject, json_to_object(JsonSchemaTopLevelObject, schema))
     flattener = SchemaFlattener(options)
     obj = flattener.flatten(top_node)
@@ -398,7 +398,7 @@ def flatten_schema(schema: Schema, *, options: Optional[SchemaFlatteningOptions]
 class SchemaFlattener:
     options: SchemaFlatteningOptions
 
-    def __init__(self, options: Optional[SchemaFlatteningOptions] = None) -> None:
+    def __init__(self, options: SchemaFlatteningOptions | None = None) -> None:
         self.options = options or SchemaFlatteningOptions()
 
     def flatten(self, source_node: JsonSchemaObject) -> JsonSchemaObject:
@@ -406,10 +406,10 @@ class SchemaFlattener:
             return source_node
 
         source_props = source_node.properties or {}
-        target_props: Dict[str, JsonSchemaAny] = {}
+        target_props: dict[str, JsonSchemaAny] = {}
 
         source_reqs = source_node.required or []
-        target_reqs: List[str] = []
+        target_reqs: list[str] = []
 
         for name, prop in source_props.items():
             if not isinstance(prop, JsonSchemaObject):
diff --git a/llama_stack/strong_typing/core.py b/src/llama_stack/strong_typing/core.py
similarity index 80%
rename from llama_stack/strong_typing/core.py
rename to src/llama_stack/strong_typing/core.py
index 501b6a5db..5f3764aeb 100644
--- a/llama_stack/strong_typing/core.py
+++ b/src/llama_stack/strong_typing/core.py
@@ -10,7 +10,7 @@ Type-safe data interchange for Python data classes.
 :see: https://github.com/hunyadi/strong_typing
 """
 
-from typing import Dict, List, Union
+from typing import Union
 
 
 class JsonObject:
@@ -28,8 +28,8 @@ JsonType = Union[
     int,
     float,
     str,
-    Dict[str, "JsonType"],
-    List["JsonType"],
+    dict[str, "JsonType"],
+    list["JsonType"],
 ]
 
 # a JSON type that cannot contain `null` values
@@ -38,9 +38,9 @@ StrictJsonType = Union[
     int,
     float,
     str,
-    Dict[str, "StrictJsonType"],
-    List["StrictJsonType"],
+    dict[str, "StrictJsonType"],
+    list["StrictJsonType"],
 ]
 
 # a meta-type that captures the object type in a JSON schema
-Schema = Dict[str, JsonType]
+Schema = dict[str, JsonType]
diff --git a/llama_stack/strong_typing/deserializer.py b/src/llama_stack/strong_typing/deserializer.py
similarity index 89%
rename from llama_stack/strong_typing/deserializer.py
rename to src/llama_stack/strong_typing/deserializer.py
index 883590862..58dfe53a4 100644
--- a/llama_stack/strong_typing/deserializer.py
+++ b/src/llama_stack/strong_typing/deserializer.py
@@ -20,19 +20,14 @@ import ipaddress
 import sys
 import typing
 import uuid
+from collections.abc import Callable
 from types import ModuleType
 from typing import (
     Any,
-    Callable,
-    Dict,
     Generic,
-    List,
     Literal,
     NamedTuple,
     Optional,
-    Set,
-    Tuple,
-    Type,
     TypeVar,
     Union,
 )
@@ -70,7 +65,7 @@ V = TypeVar("V")
 class Deserializer(abc.ABC, Generic[T]):
     "Parses a JSON value into a Python type."
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         """
         Creates auxiliary parsers that this parser is depending on.
 
@@ -203,19 +198,19 @@ class IPv6Deserializer(Deserializer[ipaddress.IPv6Address]):
         return ipaddress.IPv6Address(data)
 
 
-class ListDeserializer(Deserializer[List[T]]):
+class ListDeserializer(Deserializer[list[T]]):
     "Recursively de-serializes a JSON array into a Python `list`."
 
-    item_type: Type[T]
+    item_type: type[T]
     item_parser: Deserializer
 
-    def __init__(self, item_type: Type[T]) -> None:
+    def __init__(self, item_type: type[T]) -> None:
         self.item_type = item_type
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.item_parser = _get_deserializer(self.item_type, context)
 
-    def parse(self, data: JsonType) -> List[T]:
+    def parse(self, data: JsonType) -> list[T]:
         if not isinstance(data, list):
             type_name = python_type_to_str(self.item_type)
             raise JsonTypeError(f"type `List[{type_name}]` expects JSON `array` data but instead received: {data}")
@@ -223,19 +218,19 @@ class ListDeserializer(Deserializer[List[T]]):
         return [self.item_parser.parse(item) for item in data]
 
 
-class DictDeserializer(Deserializer[Dict[K, V]]):
+class DictDeserializer(Deserializer[dict[K, V]]):
     "Recursively de-serializes a JSON object into a Python `dict`."
 
-    key_type: Type[K]
-    value_type: Type[V]
+    key_type: type[K]
+    value_type: type[V]
     value_parser: Deserializer[V]
 
-    def __init__(self, key_type: Type[K], value_type: Type[V]) -> None:
+    def __init__(self, key_type: type[K], value_type: type[V]) -> None:
         self.key_type = key_type
         self.value_type = value_type
         self._check_key_type()
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.value_parser = _get_deserializer(self.value_type, context)
 
     def _check_key_type(self) -> None:
@@ -264,7 +259,7 @@ class DictDeserializer(Deserializer[Dict[K, V]]):
         value_type_name = python_type_to_str(self.value_type)
         return f"Dict[{key_type_name}, {value_type_name}]"
 
-    def parse(self, data: JsonType) -> Dict[K, V]:
+    def parse(self, data: JsonType) -> dict[K, V]:
         if not isinstance(data, dict):
             raise JsonTypeError(
                 f"`type `{self.container_type}` expects JSON `object` data but instead received: {data}"
@@ -276,19 +271,19 @@ class DictDeserializer(Deserializer[Dict[K, V]]):
         )
 
 
-class SetDeserializer(Deserializer[Set[T]]):
+class SetDeserializer(Deserializer[set[T]]):
     "Recursively de-serializes a JSON list into a Python `set`."
 
-    member_type: Type[T]
+    member_type: type[T]
     member_parser: Deserializer
 
-    def __init__(self, member_type: Type[T]) -> None:
+    def __init__(self, member_type: type[T]) -> None:
         self.member_type = member_type
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.member_parser = _get_deserializer(self.member_type, context)
 
-    def parse(self, data: JsonType) -> Set[T]:
+    def parse(self, data: JsonType) -> set[T]:
         if not isinstance(data, list):
             type_name = python_type_to_str(self.member_type)
             raise JsonTypeError(f"type `Set[{type_name}]` expects JSON `array` data but instead received: {data}")
@@ -296,16 +291,16 @@ class SetDeserializer(Deserializer[Set[T]]):
         return set(self.member_parser.parse(item) for item in data)
 
 
-class TupleDeserializer(Deserializer[Tuple[Any, ...]]):
+class TupleDeserializer(Deserializer[tuple[Any, ...]]):
     "Recursively de-serializes a JSON list into a Python `tuple`."
 
-    item_types: Tuple[Type[Any], ...]
-    item_parsers: Tuple[Deserializer[Any], ...]
+    item_types: tuple[type[Any], ...]
+    item_parsers: tuple[Deserializer[Any], ...]
 
-    def __init__(self, item_types: Tuple[Type[Any], ...]) -> None:
+    def __init__(self, item_types: tuple[type[Any], ...]) -> None:
         self.item_types = item_types
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.item_parsers = tuple(_get_deserializer(item_type, context) for item_type in self.item_types)
 
     @property
@@ -313,7 +308,7 @@ class TupleDeserializer(Deserializer[Tuple[Any, ...]]):
         type_names = ", ".join(python_type_to_str(item_type) for item_type in self.item_types)
         return f"Tuple[{type_names}]"
 
-    def parse(self, data: JsonType) -> Tuple[Any, ...]:
+    def parse(self, data: JsonType) -> tuple[Any, ...]:
         if not isinstance(data, list) or len(data) != len(self.item_parsers):
             if not isinstance(data, list):
                 raise JsonTypeError(
@@ -331,13 +326,13 @@ class TupleDeserializer(Deserializer[Tuple[Any, ...]]):
 class UnionDeserializer(Deserializer):
     "De-serializes a JSON value (of any type) into a Python union type."
 
-    member_types: Tuple[type, ...]
-    member_parsers: Tuple[Deserializer, ...]
+    member_types: tuple[type, ...]
+    member_parsers: tuple[Deserializer, ...]
 
-    def __init__(self, member_types: Tuple[type, ...]) -> None:
+    def __init__(self, member_types: tuple[type, ...]) -> None:
         self.member_types = member_types
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.member_parsers = tuple(_get_deserializer(member_type, context) for member_type in self.member_types)
 
     def parse(self, data: JsonType) -> Any:
@@ -354,7 +349,7 @@ class UnionDeserializer(Deserializer):
         raise JsonKeyError(f"type `Union[{type_names}]` could not be instantiated from: {data}")
 
 
-def get_literal_properties(typ: type) -> Set[str]:
+def get_literal_properties(typ: type) -> set[str]:
     "Returns the names of all properties in a class that are of a literal type."
 
     return set(
@@ -362,7 +357,7 @@ def get_literal_properties(typ: type) -> Set[str]:
     )
 
 
-def get_discriminating_properties(types: Tuple[type, ...]) -> Set[str]:
+def get_discriminating_properties(types: tuple[type, ...]) -> set[str]:
     "Returns a set of properties with literal type that are common across all specified classes."
 
     if not types or not all(isinstance(typ, type) for typ in types):
@@ -378,15 +373,15 @@ def get_discriminating_properties(types: Tuple[type, ...]) -> Set[str]:
 class TaggedUnionDeserializer(Deserializer):
     "De-serializes a JSON value with one or more disambiguating properties into a Python union type."
 
-    member_types: Tuple[type, ...]
-    disambiguating_properties: Set[str]
-    member_parsers: Dict[Tuple[str, Any], Deserializer]
+    member_types: tuple[type, ...]
+    disambiguating_properties: set[str]
+    member_parsers: dict[tuple[str, Any], Deserializer]
 
-    def __init__(self, member_types: Tuple[type, ...]) -> None:
+    def __init__(self, member_types: tuple[type, ...]) -> None:
         self.member_types = member_types
         self.disambiguating_properties = get_discriminating_properties(member_types)
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         self.member_parsers = {}
         for member_type in self.member_types:
             for property_name in self.disambiguating_properties:
@@ -435,13 +430,13 @@ class TaggedUnionDeserializer(Deserializer):
 class LiteralDeserializer(Deserializer):
     "De-serializes a JSON value into a Python literal type."
 
-    values: Tuple[Any, ...]
+    values: tuple[Any, ...]
     parser: Deserializer
 
-    def __init__(self, values: Tuple[Any, ...]) -> None:
+    def __init__(self, values: tuple[Any, ...]) -> None:
         self.values = values
 
-    def build(self, context: Optional[ModuleType]) -> None:
+    def build(self, context: ModuleType | None) -> None:
         literal_type_tuple = tuple(type(value) for value in self.values)
         literal_type_set = set(literal_type_tuple)
         if len(literal_type_set) != 1:
@@ -464,9 +459,9 @@ class LiteralDeserializer(Deserializer):
 class EnumDeserializer(Deserializer[E]):
     "Returns an enumeration instance based on the enumeration value read from a JSON value."
 
-    enum_type: Type[E]
+    enum_type: type[E]
 
-    def __init__(self, enum_type: Type[E]) -> None:
+    def __init__(self, enum_type: type[E]) -> None:
         self.enum_type = enum_type
 
     def parse(self, data: JsonType) -> E:
@@ -504,13 +499,13 @@ class FieldDeserializer(abc.ABC, Generic[T, R]):
         self.parser = parser
 
     @abc.abstractmethod
-    def parse_field(self, data: Dict[str, JsonType]) -> R: ...
+    def parse_field(self, data: dict[str, JsonType]) -> R: ...
 
 
 class RequiredFieldDeserializer(FieldDeserializer[T, T]):
     "Deserializes a JSON property into a mandatory Python object field."
 
-    def parse_field(self, data: Dict[str, JsonType]) -> T:
+    def parse_field(self, data: dict[str, JsonType]) -> T:
         if self.property_name not in data:
             raise JsonKeyError(f"missing required property `{self.property_name}` from JSON object: {data}")
 
@@ -520,7 +515,7 @@ class RequiredFieldDeserializer(FieldDeserializer[T, T]):
 class OptionalFieldDeserializer(FieldDeserializer[T, Optional[T]]):
     "Deserializes a JSON property into an optional Python object field with a default value of `None`."
 
-    def parse_field(self, data: Dict[str, JsonType]) -> Optional[T]:
+    def parse_field(self, data: dict[str, JsonType]) -> T | None:
         value = data.get(self.property_name)
         if value is not None:
             return self.parser.parse(value)
@@ -543,7 +538,7 @@ class DefaultFieldDeserializer(FieldDeserializer[T, T]):
         super().__init__(property_name, field_name, parser)
         self.default_value = default_value
 
-    def parse_field(self, data: Dict[str, JsonType]) -> T:
+    def parse_field(self, data: dict[str, JsonType]) -> T:
         value = data.get(self.property_name)
         if value is not None:
             return self.parser.parse(value)
@@ -566,7 +561,7 @@ class DefaultFactoryFieldDeserializer(FieldDeserializer[T, T]):
         super().__init__(property_name, field_name, parser)
         self.default_factory = default_factory
 
-    def parse_field(self, data: Dict[str, JsonType]) -> T:
+    def parse_field(self, data: dict[str, JsonType]) -> T:
         value = data.get(self.property_name)
         if value is not None:
             return self.parser.parse(value)
@@ -578,13 +573,13 @@ class ClassDeserializer(Deserializer[T]):
     "Base class for de-serializing class-like types such as data classes, named tuples and regular classes."
 
     class_type: type
-    property_parsers: List[FieldDeserializer]
-    property_fields: Set[str]
+    property_parsers: list[FieldDeserializer]
+    property_fields: set[str]
 
-    def __init__(self, class_type: Type[T]) -> None:
+    def __init__(self, class_type: type[T]) -> None:
         self.class_type = class_type
 
-    def assign(self, property_parsers: List[FieldDeserializer]) -> None:
+    def assign(self, property_parsers: list[FieldDeserializer]) -> None:
         self.property_parsers = property_parsers
         self.property_fields = set(property_parser.property_name for property_parser in property_parsers)
 
@@ -593,7 +588,7 @@ class ClassDeserializer(Deserializer[T]):
             type_name = python_type_to_str(self.class_type)
             raise JsonTypeError(f"`type `{type_name}` expects JSON `object` data but instead received: {data}")
 
-        object_data: Dict[str, JsonType] = typing.cast(Dict[str, JsonType], data)
+        object_data: dict[str, JsonType] = typing.cast(dict[str, JsonType], data)
 
         field_values = {}
         for property_parser in self.property_parsers:
@@ -619,8 +614,8 @@ class ClassDeserializer(Deserializer[T]):
 class NamedTupleDeserializer(ClassDeserializer[NamedTuple]):
     "De-serializes a named tuple from a JSON `object`."
 
-    def build(self, context: Optional[ModuleType]) -> None:
-        property_parsers: List[FieldDeserializer] = [
+    def build(self, context: ModuleType | None) -> None:
+        property_parsers: list[FieldDeserializer] = [
             RequiredFieldDeserializer(field_name, field_name, _get_deserializer(field_type, context))
             for field_name, field_type in get_resolved_hints(self.class_type).items()
         ]
@@ -634,13 +629,13 @@ class NamedTupleDeserializer(ClassDeserializer[NamedTuple]):
 class DataclassDeserializer(ClassDeserializer[T]):
     "De-serializes a data class from a JSON `object`."
 
-    def __init__(self, class_type: Type[T]) -> None:
+    def __init__(self, class_type: type[T]) -> None:
         if not dataclasses.is_dataclass(class_type):
             raise TypeError("expected: data-class type")
         super().__init__(class_type)  # type: ignore[arg-type]
 
-    def build(self, context: Optional[ModuleType]) -> None:
-        property_parsers: List[FieldDeserializer] = []
+    def build(self, context: ModuleType | None) -> None:
+        property_parsers: list[FieldDeserializer] = []
         resolved_hints = get_resolved_hints(self.class_type)
         for field in dataclasses.fields(self.class_type):
             field_type = resolved_hints[field.name]
@@ -651,7 +646,7 @@ class DataclassDeserializer(ClassDeserializer[T]):
             has_default_factory = field.default_factory is not dataclasses.MISSING
 
             if is_optional:
-                required_type: Type[T] = unwrap_optional_type(field_type)
+                required_type: type[T] = unwrap_optional_type(field_type)
             else:
                 required_type = field_type
 
@@ -691,15 +686,15 @@ class FrozenDataclassDeserializer(DataclassDeserializer[T]):
 class TypedClassDeserializer(ClassDeserializer[T]):
     "De-serializes a class with type annotations from a JSON `object` by iterating over class properties."
 
-    def build(self, context: Optional[ModuleType]) -> None:
-        property_parsers: List[FieldDeserializer] = []
+    def build(self, context: ModuleType | None) -> None:
+        property_parsers: list[FieldDeserializer] = []
         for field_name, field_type in get_resolved_hints(self.class_type).items():
             property_name = python_field_to_json_property(field_name, field_type)
 
             is_optional = is_type_optional(field_type)
 
             if is_optional:
-                required_type: Type[T] = unwrap_optional_type(field_type)
+                required_type: type[T] = unwrap_optional_type(field_type)
             else:
                 required_type = field_type
 
@@ -715,7 +710,7 @@ class TypedClassDeserializer(ClassDeserializer[T]):
         super().assign(property_parsers)
 
 
-def create_deserializer(typ: TypeLike, context: Optional[ModuleType] = None) -> Deserializer:
+def create_deserializer(typ: TypeLike, context: ModuleType | None = None) -> Deserializer:
     """
     Creates a de-serializer engine to produce a Python object from an object obtained from a JSON string.
 
@@ -741,10 +736,10 @@ def create_deserializer(typ: TypeLike, context: Optional[ModuleType] = None) ->
     return _get_deserializer(typ, context)
 
 
-_CACHE: Dict[Tuple[str, str], Deserializer] = {}
+_CACHE: dict[tuple[str, str], Deserializer] = {}
 
 
-def _get_deserializer(typ: TypeLike, context: Optional[ModuleType]) -> Deserializer:
+def _get_deserializer(typ: TypeLike, context: ModuleType | None) -> Deserializer:
     "Creates or re-uses a de-serializer engine to parse an object obtained from a JSON string."
 
     cache_key = None
diff --git a/llama_stack/strong_typing/docstring.py b/src/llama_stack/strong_typing/docstring.py
similarity index 93%
rename from llama_stack/strong_typing/docstring.py
rename to src/llama_stack/strong_typing/docstring.py
index 497c9ea82..4c9ea49e5 100644
--- a/llama_stack/strong_typing/docstring.py
+++ b/src/llama_stack/strong_typing/docstring.py
@@ -18,14 +18,15 @@ import re
 import sys
 import types
 import typing
+from collections.abc import Callable
 from dataclasses import dataclass
 from io import StringIO
-from typing import Any, Callable, Dict, Optional, Protocol, Type, TypeVar
+from typing import Any, Protocol, TypeVar
 
 if sys.version_info >= (3, 10):
     from typing import TypeGuard
 else:
-    from typing_extensions import TypeGuard
+    from typing import TypeGuard
 
 from .inspection import (
     DataclassInstance,
@@ -110,14 +111,14 @@ class Docstring:
     :param returns: The returns declaration extracted from a docstring.
     """
 
-    short_description: Optional[str] = None
-    long_description: Optional[str] = None
-    params: Dict[str, DocstringParam] = dataclasses.field(default_factory=dict)
-    returns: Optional[DocstringReturns] = None
-    raises: Dict[str, DocstringRaises] = dataclasses.field(default_factory=dict)
+    short_description: str | None = None
+    long_description: str | None = None
+    params: dict[str, DocstringParam] = dataclasses.field(default_factory=dict)
+    returns: DocstringReturns | None = None
+    raises: dict[str, DocstringRaises] = dataclasses.field(default_factory=dict)
 
     @property
-    def full_description(self) -> Optional[str]:
+    def full_description(self) -> str | None:
         if self.short_description and self.long_description:
             return f"{self.short_description}\n\n{self.long_description}"
         elif self.short_description:
@@ -158,18 +159,18 @@ class Docstring:
         return s
 
 
-def is_exception(member: object) -> TypeGuard[Type[BaseException]]:
+def is_exception(member: object) -> TypeGuard[type[BaseException]]:
     return isinstance(member, type) and issubclass(member, BaseException)
 
 
-def get_exceptions(module: types.ModuleType) -> Dict[str, Type[BaseException]]:
+def get_exceptions(module: types.ModuleType) -> dict[str, type[BaseException]]:
     "Returns all exception classes declared in a module."
 
     return {name: class_type for name, class_type in inspect.getmembers(module, is_exception)}
 
 
 class SupportsDoc(Protocol):
-    __doc__: Optional[str]
+    __doc__: str | None
 
 
 def _maybe_unwrap_async_iterator(t):
@@ -213,7 +214,7 @@ def parse_type(typ: SupportsDoc) -> Docstring:
     # assign exception types
     defining_module = inspect.getmodule(typ)
     if defining_module:
-        context: Dict[str, type] = {}
+        context: dict[str, type] = {}
         context.update(get_exceptions(builtins))
         context.update(get_exceptions(defining_module))
         for exc_name, exc in docstring.raises.items():
@@ -262,8 +263,8 @@ def parse_text(text: str) -> Docstring:
     else:
         long_description = None
 
-    params: Dict[str, DocstringParam] = {}
-    raises: Dict[str, DocstringRaises] = {}
+    params: dict[str, DocstringParam] = {}
+    raises: dict[str, DocstringRaises] = {}
     returns = None
     for match in re.finditer(r"(^:.*?)(?=^:|\Z)", meta_chunk, flags=re.DOTALL | re.MULTILINE):
         chunk = match.group(0)
@@ -325,7 +326,7 @@ def has_docstring(typ: SupportsDoc) -> bool:
     return bool(typ.__doc__)
 
 
-def get_docstring(typ: SupportsDoc) -> Optional[str]:
+def get_docstring(typ: SupportsDoc) -> str | None:
     if typ.__doc__ is None:
         return None
 
@@ -348,7 +349,7 @@ def check_docstring(typ: SupportsDoc, docstring: Docstring, strict: bool = False
         check_function_docstring(typ, docstring, strict)
 
 
-def check_dataclass_docstring(typ: Type[DataclassInstance], docstring: Docstring, strict: bool = False) -> None:
+def check_dataclass_docstring(typ: type[DataclassInstance], docstring: Docstring, strict: bool = False) -> None:
     """
     Verifies the doc-string of a data-class type.
 
diff --git a/llama_stack/strong_typing/exception.py b/src/llama_stack/strong_typing/exception.py
similarity index 100%
rename from llama_stack/strong_typing/exception.py
rename to src/llama_stack/strong_typing/exception.py
diff --git a/llama_stack/strong_typing/inspection.py b/src/llama_stack/strong_typing/inspection.py
similarity index 91%
rename from llama_stack/strong_typing/inspection.py
rename to src/llama_stack/strong_typing/inspection.py
index f3a4bef90..d3ebc7585 100644
--- a/llama_stack/strong_typing/inspection.py
+++ b/src/llama_stack/strong_typing/inspection.py
@@ -22,19 +22,12 @@ import sys
 import types
 import typing
 import uuid
+from collections.abc import Callable, Iterable
 from typing import (
     Any,
-    Callable,
-    Dict,
-    Iterable,
-    List,
     Literal,
     NamedTuple,
-    Optional,
     Protocol,
-    Set,
-    Tuple,
-    Type,
     TypeVar,
     Union,
     runtime_checkable,
@@ -43,12 +36,12 @@ from typing import (
 if sys.version_info >= (3, 9):
     from typing import Annotated
 else:
-    from typing_extensions import Annotated
+    from typing import Annotated
 
 if sys.version_info >= (3, 10):
     from typing import TypeGuard
 else:
-    from typing_extensions import TypeGuard
+    from typing import TypeGuard
 
 
 from pydantic import BaseModel
@@ -143,10 +136,10 @@ def evaluate_type(typ: Any, module: types.ModuleType) -> Any:
 
 @runtime_checkable
 class DataclassInstance(Protocol):
-    __dataclass_fields__: typing.ClassVar[Dict[str, dataclasses.Field]]
+    __dataclass_fields__: typing.ClassVar[dict[str, dataclasses.Field]]
 
 
-def is_dataclass_type(typ: Any) -> TypeGuard[Type[DataclassInstance]]:
+def is_dataclass_type(typ: Any) -> TypeGuard[type[DataclassInstance]]:
     "True if the argument corresponds to a data class type (but not an instance)."
 
     typ = unwrap_annotated_type(typ)
@@ -171,14 +164,14 @@ class DataclassField:
         self.default = default
 
 
-def dataclass_fields(cls: Type[DataclassInstance]) -> Iterable[DataclassField]:
+def dataclass_fields(cls: type[DataclassInstance]) -> Iterable[DataclassField]:
     "Generates the fields of a data-class resolving forward references."
 
     for field in dataclasses.fields(cls):
         yield DataclassField(field.name, evaluate_member_type(field.type, cls), field.default)
 
 
-def dataclass_field_by_name(cls: Type[DataclassInstance], name: str) -> DataclassField:
+def dataclass_field_by_name(cls: type[DataclassInstance], name: str) -> DataclassField:
     "Looks up a field in a data-class by its field name."
 
     for field in dataclasses.fields(cls):
@@ -194,7 +187,7 @@ def is_named_tuple_instance(obj: Any) -> TypeGuard[NamedTuple]:
     return is_named_tuple_type(type(obj))
 
 
-def is_named_tuple_type(typ: Any) -> TypeGuard[Type[NamedTuple]]:
+def is_named_tuple_type(typ: Any) -> TypeGuard[type[NamedTuple]]:
     """
     True if the argument corresponds to a named tuple type.
 
@@ -223,7 +216,7 @@ def is_named_tuple_type(typ: Any) -> TypeGuard[Type[NamedTuple]]:
 
 if sys.version_info >= (3, 11):
 
-    def is_type_enum(typ: object) -> TypeGuard[Type[enum.Enum]]:
+    def is_type_enum(typ: object) -> TypeGuard[type[enum.Enum]]:
         "True if the specified type is an enumeration type."
 
         typ = unwrap_annotated_type(typ)
@@ -231,7 +224,7 @@ if sys.version_info >= (3, 11):
 
 else:
 
-    def is_type_enum(typ: object) -> TypeGuard[Type[enum.Enum]]:
+    def is_type_enum(typ: object) -> TypeGuard[type[enum.Enum]]:
         "True if the specified type is an enumeration type."
 
         typ = unwrap_annotated_type(typ)
@@ -240,7 +233,7 @@ else:
         return isinstance(typ, type) and issubclass(typ, enum.Enum)
 
 
-def enum_value_types(enum_type: Type[enum.Enum]) -> List[type]:
+def enum_value_types(enum_type: type[enum.Enum]) -> list[type]:
     """
     Returns all unique value types of the `enum.Enum` type in definition order.
     """
@@ -250,8 +243,8 @@ def enum_value_types(enum_type: Type[enum.Enum]) -> List[type]:
 
 
 def extend_enum(
-    source: Type[enum.Enum],
-) -> Callable[[Type[enum.Enum]], Type[enum.Enum]]:
+    source: type[enum.Enum],
+) -> Callable[[type[enum.Enum]], type[enum.Enum]]:
     """
     Creates a new enumeration type extending the set of values in an existing type.
 
@@ -259,13 +252,13 @@ def extend_enum(
     :returns: A new enumeration type with the extended set of values.
     """
 
-    def wrap(extend: Type[enum.Enum]) -> Type[enum.Enum]:
+    def wrap(extend: type[enum.Enum]) -> type[enum.Enum]:
         # create new enumeration type combining the values from both types
-        values: Dict[str, Any] = {}
+        values: dict[str, Any] = {}
         values.update((e.name, e.value) for e in source)
         values.update((e.name, e.value) for e in extend)
         # mypy fails to determine that __name__ is always a string; hence the `ignore` directive.
-        enum_class: Type[enum.Enum] = enum.Enum(extend.__name__, values)  # type: ignore[misc]
+        enum_class: type[enum.Enum] = enum.Enum(extend.__name__, values)  # type: ignore[misc]
 
         # assign the newly created type to the same module where the extending class is defined
         enum_class.__module__ = extend.__module__
@@ -292,7 +285,7 @@ else:
         return typing.get_origin(typ) is Union
 
 
-def is_type_optional(typ: object, strict: bool = False) -> TypeGuard[Type[Optional[Any]]]:
+def is_type_optional(typ: object, strict: bool = False) -> TypeGuard[type[Any | None]]:
     """
     True if the type annotation corresponds to an optional type (e.g. `Optional[T]` or `Union[T1,T2,None]`).
 
@@ -313,7 +306,7 @@ def is_type_optional(typ: object, strict: bool = False) -> TypeGuard[Type[Option
     return False
 
 
-def unwrap_optional_type(typ: Type[Optional[T]]) -> Type[T]:
+def unwrap_optional_type(typ: type[T | None]) -> type[T]:
     """
     Extracts the inner type of an optional type.
 
@@ -324,7 +317,7 @@ def unwrap_optional_type(typ: Type[Optional[T]]) -> Type[T]:
     return rewrap_annotated_type(_unwrap_optional_type, typ)
 
 
-def _unwrap_optional_type(typ: Type[Optional[T]]) -> Type[T]:
+def _unwrap_optional_type(typ: type[T | None]) -> type[T]:
     "Extracts the type qualified as optional (e.g. returns `T` for `Optional[T]`)."
 
     # Optional[T] is represented internally as Union[T, None]
@@ -346,7 +339,7 @@ def is_type_union(typ: object) -> bool:
     return False
 
 
-def unwrap_union_types(typ: object) -> Tuple[object, ...]:
+def unwrap_union_types(typ: object) -> tuple[object, ...]:
     """
     Extracts the inner types of a union type.
 
@@ -358,7 +351,7 @@ def unwrap_union_types(typ: object) -> Tuple[object, ...]:
     return _unwrap_union_types(typ)
 
 
-def _unwrap_union_types(typ: object) -> Tuple[object, ...]:
+def _unwrap_union_types(typ: object) -> tuple[object, ...]:
     "Extracts the types in a union (e.g. returns a tuple of types `T1` and `T2` for `Union[T1, T2]`)."
 
     if not _is_union_like(typ):
@@ -389,7 +382,7 @@ def unwrap_literal_value(typ: object) -> Any:
     return args[0]
 
 
-def unwrap_literal_values(typ: object) -> Tuple[Any, ...]:
+def unwrap_literal_values(typ: object) -> tuple[Any, ...]:
     """
     Extracts the constant values captured by a literal type.
 
@@ -401,7 +394,7 @@ def unwrap_literal_values(typ: object) -> Tuple[Any, ...]:
     return typing.get_args(typ)
 
 
-def unwrap_literal_types(typ: object) -> Tuple[type, ...]:
+def unwrap_literal_types(typ: object) -> tuple[type, ...]:
     """
     Extracts the types of the constant values captured by a literal type.
 
@@ -412,14 +405,14 @@ def unwrap_literal_types(typ: object) -> Tuple[type, ...]:
     return tuple(type(t) for t in unwrap_literal_values(typ))
 
 
-def is_generic_list(typ: object) -> TypeGuard[Type[list]]:
+def is_generic_list(typ: object) -> TypeGuard[type[list]]:
     "True if the specified type is a generic list, i.e. `List[T]`."
 
     typ = unwrap_annotated_type(typ)
     return typing.get_origin(typ) is list
 
 
-def unwrap_generic_list(typ: Type[List[T]]) -> Type[T]:
+def unwrap_generic_list(typ: type[list[T]]) -> type[T]:
     """
     Extracts the item type of a list type.
 
@@ -430,21 +423,21 @@ def unwrap_generic_list(typ: Type[List[T]]) -> Type[T]:
     return rewrap_annotated_type(_unwrap_generic_list, typ)
 
 
-def _unwrap_generic_list(typ: Type[List[T]]) -> Type[T]:
+def _unwrap_generic_list(typ: type[list[T]]) -> type[T]:
     "Extracts the item type of a list type (e.g. returns `T` for `List[T]`)."
 
     (list_type,) = typing.get_args(typ)  # unpack single tuple element
     return list_type  # type: ignore[no-any-return]
 
 
-def is_generic_set(typ: object) -> TypeGuard[Type[set]]:
+def is_generic_set(typ: object) -> TypeGuard[type[set]]:
     "True if the specified type is a generic set, i.e. `Set[T]`."
 
     typ = unwrap_annotated_type(typ)
     return typing.get_origin(typ) is set
 
 
-def unwrap_generic_set(typ: Type[Set[T]]) -> Type[T]:
+def unwrap_generic_set(typ: type[set[T]]) -> type[T]:
     """
     Extracts the item type of a set type.
 
@@ -455,21 +448,21 @@ def unwrap_generic_set(typ: Type[Set[T]]) -> Type[T]:
     return rewrap_annotated_type(_unwrap_generic_set, typ)
 
 
-def _unwrap_generic_set(typ: Type[Set[T]]) -> Type[T]:
+def _unwrap_generic_set(typ: type[set[T]]) -> type[T]:
     "Extracts the item type of a set type (e.g. returns `T` for `Set[T]`)."
 
     (set_type,) = typing.get_args(typ)  # unpack single tuple element
     return set_type  # type: ignore[no-any-return]
 
 
-def is_generic_dict(typ: object) -> TypeGuard[Type[dict]]:
+def is_generic_dict(typ: object) -> TypeGuard[type[dict]]:
     "True if the specified type is a generic dictionary, i.e. `Dict[KeyType, ValueType]`."
 
     typ = unwrap_annotated_type(typ)
     return typing.get_origin(typ) is dict
 
 
-def unwrap_generic_dict(typ: Type[Dict[K, V]]) -> Tuple[Type[K], Type[V]]:
+def unwrap_generic_dict(typ: type[dict[K, V]]) -> tuple[type[K], type[V]]:
     """
     Extracts the key and value types of a dictionary type as a tuple.
 
@@ -480,7 +473,7 @@ def unwrap_generic_dict(typ: Type[Dict[K, V]]) -> Tuple[Type[K], Type[V]]:
     return _unwrap_generic_dict(unwrap_annotated_type(typ))
 
 
-def _unwrap_generic_dict(typ: Type[Dict[K, V]]) -> Tuple[Type[K], Type[V]]:
+def _unwrap_generic_dict(typ: type[dict[K, V]]) -> tuple[type[K], type[V]]:
     "Extracts the key and value types of a dict type (e.g. returns (`K`, `V`) for `Dict[K, V]`)."
 
     key_type, value_type = typing.get_args(typ)
@@ -493,7 +486,7 @@ def is_type_annotated(typ: TypeLike) -> bool:
     return getattr(typ, "__metadata__", None) is not None
 
 
-def get_annotation(data_type: TypeLike, annotation_type: Type[T]) -> Optional[T]:
+def get_annotation(data_type: TypeLike, annotation_type: type[T]) -> T | None:
     """
     Returns the first annotation on a data type that matches the expected annotation type.
 
@@ -522,7 +515,7 @@ def unwrap_annotated_type(typ: T) -> T:
         return typ
 
 
-def rewrap_annotated_type(transform: Callable[[Type[S]], Type[T]], typ: Type[S]) -> Type[T]:
+def rewrap_annotated_type(transform: Callable[[type[S]], type[T]], typ: type[S]) -> type[T]:
     """
     Un-boxes, transforms and re-boxes an optionally annotated type.
 
@@ -546,7 +539,7 @@ def rewrap_annotated_type(transform: Callable[[Type[S]], Type[T]], typ: Type[S])
         return transformed_type
 
 
-def get_module_classes(module: types.ModuleType) -> List[type]:
+def get_module_classes(module: types.ModuleType) -> list[type]:
     "Returns all classes declared directly in a module."
 
     def is_class_member(member: object) -> TypeGuard[type]:
@@ -557,16 +550,16 @@ def get_module_classes(module: types.ModuleType) -> List[type]:
 
 if sys.version_info >= (3, 9):
 
-    def get_resolved_hints(typ: type) -> Dict[str, type]:
+    def get_resolved_hints(typ: type) -> dict[str, type]:
         return typing.get_type_hints(typ, include_extras=True)
 
 else:
 
-    def get_resolved_hints(typ: type) -> Dict[str, type]:
+    def get_resolved_hints(typ: type) -> dict[str, type]:
         return typing.get_type_hints(typ)
 
 
-def get_class_properties(typ: type) -> Iterable[Tuple[str, type | str]]:
+def get_class_properties(typ: type) -> Iterable[tuple[str, type | str]]:
     "Returns all properties of a class."
 
     if is_dataclass_type(typ):
@@ -593,7 +586,7 @@ def get_class_properties(typ: type) -> Iterable[Tuple[str, type | str]]:
         return resolved_hints.items()
 
 
-def get_class_property(typ: type, name: str) -> Optional[type | str]:
+def get_class_property(typ: type, name: str) -> type | str | None:
     "Looks up the annotated type of a property in a class by its property name."
 
     for property_name, property_type in get_class_properties(typ):
@@ -607,7 +600,7 @@ class _ROOT:
     pass
 
 
-def get_referenced_types(typ: TypeLike, module: Optional[types.ModuleType] = None) -> Set[type]:
+def get_referenced_types(typ: TypeLike, module: types.ModuleType | None = None) -> set[type]:
     """
     Extracts types directly or indirectly referenced by this type.
 
@@ -631,10 +624,10 @@ class TypeCollector:
     :param graph: The type dependency graph, linking types to types they depend on.
     """
 
-    graph: Dict[type, Set[type]]
+    graph: dict[type, set[type]]
 
     @property
-    def references(self) -> Set[type]:
+    def references(self) -> set[type]:
         "Types collected by the type collector."
 
         dependencies = set()
@@ -659,8 +652,8 @@ class TypeCollector:
     def run(
         self,
         typ: TypeLike,
-        cls: Type[DataclassInstance],
-        module: Optional[types.ModuleType],
+        cls: type[DataclassInstance],
+        module: types.ModuleType | None,
     ) -> None:
         """
         Extracts types indirectly referenced by this type.
@@ -779,7 +772,7 @@ def create_module(name: str) -> types.ModuleType:
 
 if sys.version_info >= (3, 10):
 
-    def create_data_type(class_name: str, fields: List[Tuple[str, type]]) -> type:
+    def create_data_type(class_name: str, fields: list[tuple[str, type]]) -> type:
         """
         Creates a new data-class type dynamically.
 
@@ -793,7 +786,7 @@ if sys.version_info >= (3, 10):
 
 else:
 
-    def create_data_type(class_name: str, fields: List[Tuple[str, type]]) -> type:
+    def create_data_type(class_name: str, fields: list[tuple[str, type]]) -> type:
         """
         Creates a new data-class type dynamically.
 
@@ -821,7 +814,7 @@ else:
         return cls
 
 
-def create_object(typ: Type[T]) -> T:
+def create_object(typ: type[T]) -> T:
     "Creates an instance of a type."
 
     if issubclass(typ, Exception):
@@ -906,7 +899,7 @@ def is_generic_instance(obj: Any, typ: TypeLike) -> bool:
 
 
 class RecursiveChecker:
-    _pred: Optional[Callable[[type, Any], bool]]
+    _pred: Callable[[type, Any], bool] | None
 
     def __init__(self, pred: Callable[[type, Any], bool]) -> None:
         """
@@ -1018,9 +1011,9 @@ def check_recursive(
     obj: object,
     /,
     *,
-    pred: Optional[Callable[[type, Any], bool]] = None,
-    type_pred: Optional[Callable[[type], bool]] = None,
-    value_pred: Optional[Callable[[Any], bool]] = None,
+    pred: Callable[[type, Any], bool] | None = None,
+    type_pred: Callable[[type], bool] | None = None,
+    value_pred: Callable[[Any], bool] | None = None,
 ) -> bool:
     """
     Checks if a predicate applies to all nested member properties of an object recursively.
@@ -1036,7 +1029,7 @@ def check_recursive(
         if pred is not None:
             raise TypeError("filter predicate not permitted when type and value predicates are present")
 
-        type_p: Callable[[Type[T]], bool] = type_pred
+        type_p: Callable[[type[T]], bool] = type_pred
         value_p: Callable[[T], bool] = value_pred
         pred = lambda typ, obj: not type_p(typ) or value_p(obj)  # noqa: E731
 
diff --git a/llama_stack/strong_typing/mapping.py b/src/llama_stack/strong_typing/mapping.py
similarity index 89%
rename from llama_stack/strong_typing/mapping.py
rename to src/llama_stack/strong_typing/mapping.py
index 408375a9f..d6c1a3172 100644
--- a/llama_stack/strong_typing/mapping.py
+++ b/src/llama_stack/strong_typing/mapping.py
@@ -11,13 +11,12 @@ Type-safe data interchange for Python data classes.
 """
 
 import keyword
-from typing import Optional
 
 from .auxiliary import Alias
 from .inspection import get_annotation
 
 
-def python_field_to_json_property(python_id: str, python_type: Optional[object] = None) -> str:
+def python_field_to_json_property(python_id: str, python_type: object | None = None) -> str:
     """
     Map a Python field identifier to a JSON property name.
 
diff --git a/llama_stack/strong_typing/name.py b/src/llama_stack/strong_typing/name.py
similarity index 95%
rename from llama_stack/strong_typing/name.py
rename to src/llama_stack/strong_typing/name.py
index a1a2ae5f1..00cdc2ae2 100644
--- a/llama_stack/strong_typing/name.py
+++ b/src/llama_stack/strong_typing/name.py
@@ -11,7 +11,7 @@ Type-safe data interchange for Python data classes.
 """
 
 import typing
-from typing import Any, Literal, Optional, Tuple, Union
+from typing import Any, Literal, Union
 
 from .auxiliary import _auxiliary_types
 from .inspection import (
@@ -39,7 +39,7 @@ class TypeFormatter:
     def __init__(self, use_union_operator: bool = False) -> None:
         self.use_union_operator = use_union_operator
 
-    def union_to_str(self, data_type_args: Tuple[TypeLike, ...]) -> str:
+    def union_to_str(self, data_type_args: tuple[TypeLike, ...]) -> str:
         if self.use_union_operator:
             return " | ".join(self.python_type_to_str(t) for t in data_type_args)
         else:
@@ -100,7 +100,7 @@ class TypeFormatter:
         metadata = getattr(data_type, "__metadata__", None)
         if metadata is not None:
             # type is Annotated[T, ...]
-            metatuple: Tuple[Any, ...] = metadata
+            metatuple: tuple[Any, ...] = metadata
             arg = typing.get_args(data_type)[0]
 
             # check for auxiliary types with user-defined annotations
@@ -110,7 +110,7 @@ class TypeFormatter:
                 if arg is not auxiliary_arg:
                     continue
 
-                auxiliary_metatuple: Optional[Tuple[Any, ...]] = getattr(auxiliary_type, "__metadata__", None)
+                auxiliary_metatuple: tuple[Any, ...] | None = getattr(auxiliary_type, "__metadata__", None)
                 if auxiliary_metatuple is None:
                     continue
 
diff --git a/llama_stack/strong_typing/py.typed b/src/llama_stack/strong_typing/py.typed
similarity index 100%
rename from llama_stack/strong_typing/py.typed
rename to src/llama_stack/strong_typing/py.typed
diff --git a/llama_stack/strong_typing/schema.py b/src/llama_stack/strong_typing/schema.py
similarity index 92%
rename from llama_stack/strong_typing/schema.py
rename to src/llama_stack/strong_typing/schema.py
index f911fc41f..15a3bbbfc 100644
--- a/llama_stack/strong_typing/schema.py
+++ b/src/llama_stack/strong_typing/schema.py
@@ -21,24 +21,19 @@ import json
 import types
 import typing
 import uuid
+from collections.abc import Callable
 from copy import deepcopy
 from typing import (
+    Annotated,
     Any,
-    Callable,
     ClassVar,
-    Dict,
-    List,
     Literal,
-    Optional,
-    Tuple,
-    Type,
     TypeVar,
     Union,
     overload,
 )
 
 import jsonschema
-from typing_extensions import Annotated
 
 from . import docstring
 from .auxiliary import (
@@ -71,7 +66,7 @@ OBJECT_ENUM_EXPANSION_LIMIT = 4
 T = TypeVar("T")
 
 
-def get_class_docstrings(data_type: type) -> Tuple[Optional[str], Optional[str]]:
+def get_class_docstrings(data_type: type) -> tuple[str | None, str | None]:
     docstr = docstring.parse_type(data_type)
 
     # check if class has a doc-string other than the auto-generated string assigned by @dataclass
@@ -82,8 +77,8 @@ def get_class_docstrings(data_type: type) -> Tuple[Optional[str], Optional[str]]
 
 
 def get_class_property_docstrings(
-    data_type: type, transform_fun: Optional[Callable[[type, str, str], str]] = None
-) -> Dict[str, str]:
+    data_type: type, transform_fun: Callable[[type, str, str], str] | None = None
+) -> dict[str, str]:
     """
     Extracts the documentation strings associated with the properties of a composite type.
 
@@ -92,7 +87,7 @@ def get_class_property_docstrings(
     :returns: A dictionary mapping property names to descriptions.
     """
 
-    result: Dict[str, str] = {}
+    result: dict[str, str] = {}
     # Only try to get MRO if data_type is actually a class
     # Special types like Literal, Union, etc. don't have MRO
     if not inspect.isclass(data_type):
@@ -125,7 +120,7 @@ def docstring_to_schema(data_type: type) -> Schema:
     return schema
 
 
-def id_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> str:
+def id_from_ref(data_type: typing.ForwardRef | str | type) -> str:
     "Extracts the name of a possibly forward-referenced type."
 
     if isinstance(data_type, typing.ForwardRef):
@@ -137,7 +132,7 @@ def id_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> str:
         return data_type.__name__
 
 
-def type_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> Tuple[str, type]:
+def type_from_ref(data_type: typing.ForwardRef | str | type) -> tuple[str, type]:
     "Creates a type from a forward reference."
 
     if isinstance(data_type, typing.ForwardRef):
@@ -153,16 +148,16 @@ def type_from_ref(data_type: Union[typing.ForwardRef, str, type]) -> Tuple[str,
 
 @dataclasses.dataclass
 class TypeCatalogEntry:
-    schema: Optional[Schema]
+    schema: Schema | None
     identifier: str
-    examples: Optional[JsonType] = None
+    examples: JsonType | None = None
 
 
 class TypeCatalog:
     "Maintains an association of well-known Python types to their JSON schema."
 
-    _by_type: Dict[TypeLike, TypeCatalogEntry]
-    _by_name: Dict[str, TypeCatalogEntry]
+    _by_type: dict[TypeLike, TypeCatalogEntry]
+    _by_name: dict[str, TypeCatalogEntry]
 
     def __init__(self) -> None:
         self._by_type = {}
@@ -179,9 +174,9 @@ class TypeCatalog:
     def add(
         self,
         data_type: TypeLike,
-        schema: Optional[Schema],
+        schema: Schema | None,
         identifier: str,
-        examples: Optional[List[JsonType]] = None,
+        examples: list[JsonType] | None = None,
     ) -> None:
         if isinstance(data_type, typing.ForwardRef):
             raise TypeError("forward references cannot be used to register a type")
@@ -207,17 +202,17 @@ class SchemaOptions:
     definitions_path: str = "#/definitions/"
     use_descriptions: bool = True
     use_examples: bool = True
-    property_description_fun: Optional[Callable[[type, str, str], str]] = None
+    property_description_fun: Callable[[type, str, str], str] | None = None
 
 
 class JsonSchemaGenerator:
     "Creates a JSON schema with user-defined type definitions."
 
     type_catalog: ClassVar[TypeCatalog] = TypeCatalog()
-    types_used: Dict[str, TypeLike]
+    types_used: dict[str, TypeLike]
     options: SchemaOptions
 
-    def __init__(self, options: Optional[SchemaOptions] = None):
+    def __init__(self, options: SchemaOptions | None = None):
         if options is None:
             self.options = SchemaOptions()
         else:
@@ -249,13 +244,13 @@ class JsonSchemaGenerator:
     def _(self, arg: MaxLength) -> Schema:
         return {"maxLength": arg.value}
 
-    def _with_metadata(self, type_schema: Schema, metadata: Optional[Tuple[Any, ...]]) -> Schema:
+    def _with_metadata(self, type_schema: Schema, metadata: tuple[Any, ...] | None) -> Schema:
         if metadata:
             for m in metadata:
                 type_schema.update(self._metadata_to_schema(m))
         return type_schema
 
-    def _simple_type_to_schema(self, typ: TypeLike, json_schema_extra: Optional[dict] = None) -> Optional[Schema]:
+    def _simple_type_to_schema(self, typ: TypeLike, json_schema_extra: dict | None = None) -> Schema | None:
         """
         Returns the JSON schema associated with a simple, unrestricted type.
 
@@ -319,7 +314,7 @@ class JsonSchemaGenerator:
         self,
         data_type: TypeLike,
         force_expand: bool = False,
-        json_schema_extra: Optional[dict] = None,
+        json_schema_extra: dict | None = None,
     ) -> Schema:
         common_info = {}
         if json_schema_extra and "deprecated" in json_schema_extra:
@@ -330,7 +325,7 @@ class JsonSchemaGenerator:
         self,
         data_type: TypeLike,
         force_expand: bool = False,
-        json_schema_extra: Optional[dict] = None,
+        json_schema_extra: dict | None = None,
     ) -> Schema:
         """
         Returns the JSON schema associated with a type.
@@ -386,7 +381,7 @@ class JsonSchemaGenerator:
                 return {"$ref": f"{self.options.definitions_path}{identifier}"}
 
         if is_type_enum(typ):
-            enum_type: Type[enum.Enum] = typ
+            enum_type: type[enum.Enum] = typ
             value_types = enum_value_types(enum_type)
             if len(value_types) != 1:
                 raise ValueError(
@@ -438,7 +433,7 @@ class JsonSchemaGenerator:
                     }
                 else:
                     dict_schema = {
-                        "properties": {value: value_schema for value in enum_values},
+                        "properties": dict.fromkeys(enum_values, value_schema),
                         "additionalProperties": False,
                     }
             else:
@@ -508,8 +503,8 @@ class JsonSchemaGenerator:
         members = dict(inspect.getmembers(typ, lambda a: not inspect.isroutine(a)))
 
         property_docstrings = get_class_property_docstrings(typ, self.options.property_description_fun)
-        properties: Dict[str, Schema] = {}
-        required: List[str] = []
+        properties: dict[str, Schema] = {}
+        required: list[str] = []
         for property_name, property_type in get_class_properties(typ):
             # rename property if an alias name is specified
             alias = get_annotation(property_type, Alias)
@@ -599,7 +594,7 @@ class JsonSchemaGenerator:
 
         return type_schema
 
-    def classdef_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> Tuple[Schema, Dict[str, Schema]]:
+    def classdef_to_schema(self, data_type: TypeLike, force_expand: bool = False) -> tuple[Schema, dict[str, Schema]]:
         """
         Returns the JSON schema associated with a type and any nested types.
 
@@ -616,7 +611,7 @@ class JsonSchemaGenerator:
         try:
             type_schema = self.type_to_schema(data_type, force_expand=force_expand)
 
-            types_defined: Dict[str, Schema] = {}
+            types_defined: dict[str, Schema] = {}
             while len(self.types_used) > len(types_defined):
                 # make a snapshot copy; original collection is going to be modified
                 types_undefined = {
@@ -647,7 +642,7 @@ class Validator(enum.Enum):
 
 def classdef_to_schema(
     data_type: TypeLike,
-    options: Optional[SchemaOptions] = None,
+    options: SchemaOptions | None = None,
     validator: Validator = Validator.Latest,
 ) -> Schema:
     """
@@ -701,7 +696,7 @@ def print_schema(data_type: type) -> None:
     print(json.dumps(s, indent=4))
 
 
-def get_schema_identifier(data_type: type) -> Optional[str]:
+def get_schema_identifier(data_type: type) -> str | None:
     if data_type in JsonSchemaGenerator.type_catalog:
         return JsonSchemaGenerator.type_catalog.get(data_type).identifier
     else:
@@ -710,9 +705,9 @@ def get_schema_identifier(data_type: type) -> Optional[str]:
 
 def register_schema(
     data_type: T,
-    schema: Optional[Schema] = None,
-    name: Optional[str] = None,
-    examples: Optional[List[JsonType]] = None,
+    schema: Schema | None = None,
+    name: str | None = None,
+    examples: list[JsonType] | None = None,
 ) -> T:
     """
     Associates a type with a JSON schema definition.
@@ -733,22 +728,22 @@ def register_schema(
 
 
 @overload
-def json_schema_type(cls: Type[T], /) -> Type[T]: ...
+def json_schema_type(cls: type[T], /) -> type[T]: ...
 
 
 @overload
-def json_schema_type(cls: None, *, schema: Optional[Schema] = None) -> Callable[[Type[T]], Type[T]]: ...
+def json_schema_type(cls: None, *, schema: Schema | None = None) -> Callable[[type[T]], type[T]]: ...
 
 
 def json_schema_type(
-    cls: Optional[Type[T]] = None,
+    cls: type[T] | None = None,
     *,
-    schema: Optional[Schema] = None,
-    examples: Optional[List[JsonType]] = None,
-) -> Union[Type[T], Callable[[Type[T]], Type[T]]]:
+    schema: Schema | None = None,
+    examples: list[JsonType] | None = None,
+) -> type[T] | Callable[[type[T]], type[T]]:
     """Decorator to add user-defined schema definition to a class."""
 
-    def wrap(cls: Type[T]) -> Type[T]:
+    def wrap(cls: type[T]) -> type[T]:
         return register_schema(cls, schema, examples=examples)
 
     # see if decorator is used as @json_schema_type or @json_schema_type()
diff --git a/llama_stack/strong_typing/serialization.py b/src/llama_stack/strong_typing/serialization.py
similarity index 95%
rename from llama_stack/strong_typing/serialization.py
rename to src/llama_stack/strong_typing/serialization.py
index c00a0aad5..3e34945ad 100644
--- a/llama_stack/strong_typing/serialization.py
+++ b/src/llama_stack/strong_typing/serialization.py
@@ -14,7 +14,7 @@ import inspect
 import json
 import sys
 from types import ModuleType
-from typing import Any, Optional, TextIO, TypeVar
+from typing import Any, TextIO, TypeVar
 
 from .core import JsonType
 from .deserializer import create_deserializer
@@ -42,7 +42,7 @@ def object_to_json(obj: Any) -> JsonType:
     return generator.generate(obj)
 
 
-def json_to_object(typ: TypeLike, data: JsonType, *, context: Optional[ModuleType] = None) -> object:
+def json_to_object(typ: TypeLike, data: JsonType, *, context: ModuleType | None = None) -> object:
     """
     Creates an object from a representation that has been de-serialized from JSON.
 
diff --git a/llama_stack/strong_typing/serializer.py b/src/llama_stack/strong_typing/serializer.py
similarity index 87%
rename from llama_stack/strong_typing/serializer.py
rename to src/llama_stack/strong_typing/serializer.py
index 17848c14b..4a12a1f4b 100644
--- a/llama_stack/strong_typing/serializer.py
+++ b/src/llama_stack/strong_typing/serializer.py
@@ -20,19 +20,13 @@ import ipaddress
 import sys
 import typing
 import uuid
+from collections.abc import Callable
 from types import FunctionType, MethodType, ModuleType
 from typing import (
     Any,
-    Callable,
-    Dict,
     Generic,
-    List,
     Literal,
     NamedTuple,
-    Optional,
-    Set,
-    Tuple,
-    Type,
     TypeVar,
     Union,
 )
@@ -133,7 +127,7 @@ class IPv6Serializer(Serializer[ipaddress.IPv6Address]):
 
 
 class EnumSerializer(Serializer[enum.Enum]):
-    def generate(self, obj: enum.Enum) -> Union[int, str]:
+    def generate(self, obj: enum.Enum) -> int | str:
         value = obj.value
         if isinstance(value, int):
             return value
@@ -141,12 +135,12 @@ class EnumSerializer(Serializer[enum.Enum]):
 
 
 class UntypedListSerializer(Serializer[list]):
-    def generate(self, obj: list) -> List[JsonType]:
+    def generate(self, obj: list) -> list[JsonType]:
         return [object_to_json(item) for item in obj]
 
 
 class UntypedDictSerializer(Serializer[dict]):
-    def generate(self, obj: dict) -> Dict[str, JsonType]:
+    def generate(self, obj: dict) -> dict[str, JsonType]:
         if obj and isinstance(next(iter(obj.keys())), enum.Enum):
             iterator = ((key.value, object_to_json(value)) for key, value in obj.items())
         else:
@@ -155,41 +149,41 @@ class UntypedDictSerializer(Serializer[dict]):
 
 
 class UntypedSetSerializer(Serializer[set]):
-    def generate(self, obj: set) -> List[JsonType]:
+    def generate(self, obj: set) -> list[JsonType]:
         return [object_to_json(item) for item in obj]
 
 
 class UntypedTupleSerializer(Serializer[tuple]):
-    def generate(self, obj: tuple) -> List[JsonType]:
+    def generate(self, obj: tuple) -> list[JsonType]:
         return [object_to_json(item) for item in obj]
 
 
 class TypedCollectionSerializer(Serializer, Generic[T]):
     generator: Serializer[T]
 
-    def __init__(self, item_type: Type[T], context: Optional[ModuleType]) -> None:
+    def __init__(self, item_type: type[T], context: ModuleType | None) -> None:
         self.generator = _get_serializer(item_type, context)
 
 
 class TypedListSerializer(TypedCollectionSerializer[T]):
-    def generate(self, obj: List[T]) -> List[JsonType]:
+    def generate(self, obj: list[T]) -> list[JsonType]:
         return [self.generator.generate(item) for item in obj]
 
 
 class TypedStringDictSerializer(TypedCollectionSerializer[T]):
-    def __init__(self, value_type: Type[T], context: Optional[ModuleType]) -> None:
+    def __init__(self, value_type: type[T], context: ModuleType | None) -> None:
         super().__init__(value_type, context)
 
-    def generate(self, obj: Dict[str, T]) -> Dict[str, JsonType]:
+    def generate(self, obj: dict[str, T]) -> dict[str, JsonType]:
         return {key: self.generator.generate(value) for key, value in obj.items()}
 
 
 class TypedEnumDictSerializer(TypedCollectionSerializer[T]):
     def __init__(
         self,
-        key_type: Type[enum.Enum],
-        value_type: Type[T],
-        context: Optional[ModuleType],
+        key_type: type[enum.Enum],
+        value_type: type[T],
+        context: ModuleType | None,
     ) -> None:
         super().__init__(value_type, context)
 
@@ -203,22 +197,22 @@ class TypedEnumDictSerializer(TypedCollectionSerializer[T]):
         if value_type is not str:
             raise JsonTypeError("invalid enumeration key type, expected `enum.Enum` with string values")
 
-    def generate(self, obj: Dict[enum.Enum, T]) -> Dict[str, JsonType]:
+    def generate(self, obj: dict[enum.Enum, T]) -> dict[str, JsonType]:
         return {key.value: self.generator.generate(value) for key, value in obj.items()}
 
 
 class TypedSetSerializer(TypedCollectionSerializer[T]):
-    def generate(self, obj: Set[T]) -> JsonType:
+    def generate(self, obj: set[T]) -> JsonType:
         return [self.generator.generate(item) for item in obj]
 
 
 class TypedTupleSerializer(Serializer[tuple]):
-    item_generators: Tuple[Serializer, ...]
+    item_generators: tuple[Serializer, ...]
 
-    def __init__(self, item_types: Tuple[type, ...], context: Optional[ModuleType]) -> None:
+    def __init__(self, item_types: tuple[type, ...], context: ModuleType | None) -> None:
         self.item_generators = tuple(_get_serializer(item_type, context) for item_type in item_types)
 
-    def generate(self, obj: tuple) -> List[JsonType]:
+    def generate(self, obj: tuple) -> list[JsonType]:
         return [item_generator.generate(item) for item_generator, item in zip(self.item_generators, obj, strict=False)]
 
 
@@ -250,16 +244,16 @@ class FieldSerializer(Generic[T]):
         self.property_name = property_name
         self.generator = generator
 
-    def generate_field(self, obj: object, object_dict: Dict[str, JsonType]) -> None:
+    def generate_field(self, obj: object, object_dict: dict[str, JsonType]) -> None:
         value = getattr(obj, self.field_name)
         if value is not None:
             object_dict[self.property_name] = self.generator.generate(value)
 
 
 class TypedClassSerializer(Serializer[T]):
-    property_generators: List[FieldSerializer]
+    property_generators: list[FieldSerializer]
 
-    def __init__(self, class_type: Type[T], context: Optional[ModuleType]) -> None:
+    def __init__(self, class_type: type[T], context: ModuleType | None) -> None:
         self.property_generators = [
             FieldSerializer(
                 field_name,
@@ -269,8 +263,8 @@ class TypedClassSerializer(Serializer[T]):
             for field_name, field_type in get_class_properties(class_type)
         ]
 
-    def generate(self, obj: T) -> Dict[str, JsonType]:
-        object_dict: Dict[str, JsonType] = {}
+    def generate(self, obj: T) -> dict[str, JsonType]:
+        object_dict: dict[str, JsonType] = {}
         for property_generator in self.property_generators:
             property_generator.generate_field(obj, object_dict)
 
@@ -278,12 +272,12 @@ class TypedClassSerializer(Serializer[T]):
 
 
 class TypedNamedTupleSerializer(TypedClassSerializer[NamedTuple]):
-    def __init__(self, class_type: Type[NamedTuple], context: Optional[ModuleType]) -> None:
+    def __init__(self, class_type: type[NamedTuple], context: ModuleType | None) -> None:
         super().__init__(class_type, context)
 
 
 class DataclassSerializer(TypedClassSerializer[T]):
-    def __init__(self, class_type: Type[T], context: Optional[ModuleType]) -> None:
+    def __init__(self, class_type: type[T], context: ModuleType | None) -> None:
         super().__init__(class_type, context)
 
 
@@ -295,7 +289,7 @@ class UnionSerializer(Serializer):
 class LiteralSerializer(Serializer):
     generator: Serializer
 
-    def __init__(self, values: Tuple[Any, ...], context: Optional[ModuleType]) -> None:
+    def __init__(self, values: tuple[Any, ...], context: ModuleType | None) -> None:
         literal_type_tuple = tuple(type(value) for value in values)
         literal_type_set = set(literal_type_tuple)
         if len(literal_type_set) != 1:
@@ -312,12 +306,12 @@ class LiteralSerializer(Serializer):
 
 
 class UntypedNamedTupleSerializer(Serializer):
-    fields: Dict[str, str]
+    fields: dict[str, str]
 
-    def __init__(self, class_type: Type[NamedTuple]) -> None:
+    def __init__(self, class_type: type[NamedTuple]) -> None:
         # named tuples are also instances of tuple
         self.fields = {}
-        field_names: Tuple[str, ...] = class_type._fields
+        field_names: tuple[str, ...] = class_type._fields
         for field_name in field_names:
             self.fields[field_name] = python_field_to_json_property(field_name)
 
@@ -351,7 +345,7 @@ class UntypedClassSerializer(Serializer):
         return object_dict
 
 
-def create_serializer(typ: TypeLike, context: Optional[ModuleType] = None) -> Serializer:
+def create_serializer(typ: TypeLike, context: ModuleType | None = None) -> Serializer:
     """
     Creates a serializer engine to produce an object that can be directly converted into a JSON string.
 
@@ -376,7 +370,7 @@ def create_serializer(typ: TypeLike, context: Optional[ModuleType] = None) -> Se
     return _get_serializer(typ, context)
 
 
-def _get_serializer(typ: TypeLike, context: Optional[ModuleType]) -> Serializer:
+def _get_serializer(typ: TypeLike, context: ModuleType | None) -> Serializer:
     if isinstance(typ, (str, typing.ForwardRef)):
         if context is None:
             raise TypeError(f"missing context for evaluating type: {typ}")
@@ -390,13 +384,13 @@ def _get_serializer(typ: TypeLike, context: Optional[ModuleType]) -> Serializer:
         return _create_serializer(typ, context)
 
 
-@functools.lru_cache(maxsize=None)
+@functools.cache
 def _fetch_serializer(typ: type) -> Serializer:
     context = sys.modules[typ.__module__]
     return _create_serializer(typ, context)
 
 
-def _create_serializer(typ: TypeLike, context: Optional[ModuleType]) -> Serializer:
+def _create_serializer(typ: TypeLike, context: ModuleType | None) -> Serializer:
     # check for well-known types
     if typ is type(None):
         return NoneSerializer()
diff --git a/llama_stack/strong_typing/slots.py b/src/llama_stack/strong_typing/slots.py
similarity index 71%
rename from llama_stack/strong_typing/slots.py
rename to src/llama_stack/strong_typing/slots.py
index c1a3293d8..772834140 100644
--- a/llama_stack/strong_typing/slots.py
+++ b/src/llama_stack/strong_typing/slots.py
@@ -4,18 +4,18 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from typing import Any, Dict, Tuple, Type, TypeVar
+from typing import Any, TypeVar
 
 T = TypeVar("T")
 
 
 class SlotsMeta(type):
-    def __new__(cls: Type[T], name: str, bases: Tuple[type, ...], ns: Dict[str, Any]) -> T:
+    def __new__(cls: type[T], name: str, bases: tuple[type, ...], ns: dict[str, Any]) -> T:
         # caller may have already provided slots, in which case just retain them and keep going
-        slots: Tuple[str, ...] = ns.get("__slots__", ())
+        slots: tuple[str, ...] = ns.get("__slots__", ())
 
         # add fields with type annotations to slots
-        annotations: Dict[str, Any] = ns.get("__annotations__", {})
+        annotations: dict[str, Any] = ns.get("__annotations__", {})
         members = tuple(member for member in annotations.keys() if member not in slots)
 
         # assign slots
diff --git a/llama_stack/strong_typing/topological.py b/src/llama_stack/strong_typing/topological.py
similarity index 88%
rename from llama_stack/strong_typing/topological.py
rename to src/llama_stack/strong_typing/topological.py
index 28bf4bd0f..9502a5887 100644
--- a/llama_stack/strong_typing/topological.py
+++ b/src/llama_stack/strong_typing/topological.py
@@ -10,14 +10,15 @@ Type-safe data interchange for Python data classes.
 :see: https://github.com/hunyadi/strong_typing
 """
 
-from typing import Callable, Dict, Iterable, List, Optional, Set, TypeVar
+from collections.abc import Callable, Iterable
+from typing import TypeVar
 
 from .inspection import TypeCollector
 
 T = TypeVar("T")
 
 
-def topological_sort(graph: Dict[T, Set[T]]) -> List[T]:
+def topological_sort(graph: dict[T, set[T]]) -> list[T]:
     """
     Performs a topological sort of a graph.
 
@@ -29,9 +30,9 @@ def topological_sort(graph: Dict[T, Set[T]]) -> List[T]:
     """
 
     # empty list that will contain the sorted nodes (in reverse order)
-    ordered: List[T] = []
+    ordered: list[T] = []
 
-    seen: Dict[T, bool] = {}
+    seen: dict[T, bool] = {}
 
     def _visit(n: T) -> None:
         status = seen.get(n)
@@ -57,8 +58,8 @@ def topological_sort(graph: Dict[T, Set[T]]) -> List[T]:
 
 def type_topological_sort(
     types: Iterable[type],
-    dependency_fn: Optional[Callable[[type], Iterable[type]]] = None,
-) -> List[type]:
+    dependency_fn: Callable[[type], Iterable[type]] | None = None,
+) -> list[type]:
     """
     Performs a topological sort of a list of types.
 
@@ -78,7 +79,7 @@ def type_topological_sort(
     graph = collector.graph
 
     if dependency_fn:
-        new_types: Set[type] = set()
+        new_types: set[type] = set()
         for source_type, references in graph.items():
             dependent_types = dependency_fn(source_type)
             references.update(dependent_types)
diff --git a/llama_stack/testing/__init__.py b/src/llama_stack/testing/__init__.py
similarity index 100%
rename from llama_stack/testing/__init__.py
rename to src/llama_stack/testing/__init__.py
diff --git a/llama_stack/testing/api_recorder.py b/src/llama_stack/testing/api_recorder.py
similarity index 99%
rename from llama_stack/testing/api_recorder.py
rename to src/llama_stack/testing/api_recorder.py
index 9e272ca3a..84407223c 100644
--- a/llama_stack/testing/api_recorder.py
+++ b/src/llama_stack/testing/api_recorder.py
@@ -43,7 +43,7 @@ from llama_stack.core.testing_context import get_test_context, is_debug_mode
 CompletionChoice.model_fields["finish_reason"].annotation = Literal["stop", "length", "content_filter"] | None
 CompletionChoice.model_rebuild()
 
-REPO_ROOT = Path(__file__).parent.parent.parent
+REPO_ROOT = Path(__file__).parent.parent.parent.parent
 DEFAULT_STORAGE_DIR = REPO_ROOT / "tests/integration/common"
 
 
diff --git a/llama_stack/ui/.gitignore b/src/llama_stack/ui/.gitignore
similarity index 100%
rename from llama_stack/ui/.gitignore
rename to src/llama_stack/ui/.gitignore
diff --git a/llama_stack/ui/.nvmrc b/src/llama_stack/ui/.nvmrc
similarity index 100%
rename from llama_stack/ui/.nvmrc
rename to src/llama_stack/ui/.nvmrc
diff --git a/llama_stack/ui/.prettierignore b/src/llama_stack/ui/.prettierignore
similarity index 100%
rename from llama_stack/ui/.prettierignore
rename to src/llama_stack/ui/.prettierignore
diff --git a/llama_stack/ui/.prettierrc b/src/llama_stack/ui/.prettierrc
similarity index 100%
rename from llama_stack/ui/.prettierrc
rename to src/llama_stack/ui/.prettierrc
diff --git a/llama_stack/ui/README.md b/src/llama_stack/ui/README.md
similarity index 100%
rename from llama_stack/ui/README.md
rename to src/llama_stack/ui/README.md
diff --git a/llama_stack/ui/app/api/auth/[...nextauth]/route.ts b/src/llama_stack/ui/app/api/auth/[...nextauth]/route.ts
similarity index 100%
rename from llama_stack/ui/app/api/auth/[...nextauth]/route.ts
rename to src/llama_stack/ui/app/api/auth/[...nextauth]/route.ts
diff --git a/llama_stack/ui/app/api/v1/[...path]/route.ts b/src/llama_stack/ui/app/api/v1/[...path]/route.ts
similarity index 100%
rename from llama_stack/ui/app/api/v1/[...path]/route.ts
rename to src/llama_stack/ui/app/api/v1/[...path]/route.ts
diff --git a/llama_stack/ui/app/auth/signin/page.tsx b/src/llama_stack/ui/app/auth/signin/page.tsx
similarity index 100%
rename from llama_stack/ui/app/auth/signin/page.tsx
rename to src/llama_stack/ui/app/auth/signin/page.tsx
diff --git a/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx b/src/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
similarity index 100%
rename from llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
rename to src/llama_stack/ui/app/chat-playground/chunk-processor.test.tsx
diff --git a/llama_stack/ui/app/chat-playground/page.test.tsx b/src/llama_stack/ui/app/chat-playground/page.test.tsx
similarity index 100%
rename from llama_stack/ui/app/chat-playground/page.test.tsx
rename to src/llama_stack/ui/app/chat-playground/page.test.tsx
diff --git a/llama_stack/ui/app/chat-playground/page.tsx b/src/llama_stack/ui/app/chat-playground/page.tsx
similarity index 100%
rename from llama_stack/ui/app/chat-playground/page.tsx
rename to src/llama_stack/ui/app/chat-playground/page.tsx
diff --git a/llama_stack/ui/app/globals.css b/src/llama_stack/ui/app/globals.css
similarity index 100%
rename from llama_stack/ui/app/globals.css
rename to src/llama_stack/ui/app/globals.css
diff --git a/llama_stack/ui/app/layout.tsx b/src/llama_stack/ui/app/layout.tsx
similarity index 100%
rename from llama_stack/ui/app/layout.tsx
rename to src/llama_stack/ui/app/layout.tsx
diff --git a/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx b/src/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
rename to src/llama_stack/ui/app/logs/chat-completions/[id]/page.tsx
diff --git a/llama_stack/ui/app/logs/chat-completions/layout.tsx b/src/llama_stack/ui/app/logs/chat-completions/layout.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/chat-completions/layout.tsx
rename to src/llama_stack/ui/app/logs/chat-completions/layout.tsx
diff --git a/llama_stack/ui/app/logs/chat-completions/page.tsx b/src/llama_stack/ui/app/logs/chat-completions/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/chat-completions/page.tsx
rename to src/llama_stack/ui/app/logs/chat-completions/page.tsx
diff --git a/llama_stack/ui/app/logs/responses/[id]/page.tsx b/src/llama_stack/ui/app/logs/responses/[id]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/responses/[id]/page.tsx
rename to src/llama_stack/ui/app/logs/responses/[id]/page.tsx
diff --git a/llama_stack/ui/app/logs/responses/layout.tsx b/src/llama_stack/ui/app/logs/responses/layout.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/responses/layout.tsx
rename to src/llama_stack/ui/app/logs/responses/layout.tsx
diff --git a/llama_stack/ui/app/logs/responses/page.tsx b/src/llama_stack/ui/app/logs/responses/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/responses/page.tsx
rename to src/llama_stack/ui/app/logs/responses/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.test.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/[contentId]/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.test.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/contents/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.test.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/files/[fileId]/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/layout.tsx b/src/llama_stack/ui/app/logs/vector-stores/layout.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/layout.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/layout.tsx
diff --git a/llama_stack/ui/app/logs/vector-stores/page.tsx b/src/llama_stack/ui/app/logs/vector-stores/page.tsx
similarity index 100%
rename from llama_stack/ui/app/logs/vector-stores/page.tsx
rename to src/llama_stack/ui/app/logs/vector-stores/page.tsx
diff --git a/llama_stack/ui/app/page.tsx b/src/llama_stack/ui/app/page.tsx
similarity index 100%
rename from llama_stack/ui/app/page.tsx
rename to src/llama_stack/ui/app/page.tsx
diff --git a/llama_stack/ui/components.json b/src/llama_stack/ui/components.json
similarity index 100%
rename from llama_stack/ui/components.json
rename to src/llama_stack/ui/components.json
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx b/src/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-completion-detail.test.tsx
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx b/src/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-completion-detail.tsx
diff --git a/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx b/src/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-completion-table.test.tsx
diff --git a/llama_stack/ui/components/chat-completions/chat-completions-table.tsx b/src/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-completions-table.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-completions-table.tsx
diff --git a/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx b/src/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
rename to src/llama_stack/ui/components/chat-completions/chat-messasge-item.tsx
diff --git a/llama_stack/ui/components/chat-playground/chat-message.tsx b/src/llama_stack/ui/components/chat-playground/chat-message.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/chat-message.tsx
rename to src/llama_stack/ui/components/chat-playground/chat-message.tsx
diff --git a/llama_stack/ui/components/chat-playground/chat.tsx b/src/llama_stack/ui/components/chat-playground/chat.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/chat.tsx
rename to src/llama_stack/ui/components/chat-playground/chat.tsx
diff --git a/llama_stack/ui/components/chat-playground/conversations.test.tsx b/src/llama_stack/ui/components/chat-playground/conversations.test.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/conversations.test.tsx
rename to src/llama_stack/ui/components/chat-playground/conversations.test.tsx
diff --git a/llama_stack/ui/components/chat-playground/conversations.tsx b/src/llama_stack/ui/components/chat-playground/conversations.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/conversations.tsx
rename to src/llama_stack/ui/components/chat-playground/conversations.tsx
diff --git a/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx b/src/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
rename to src/llama_stack/ui/components/chat-playground/interrupt-prompt.tsx
diff --git a/llama_stack/ui/components/chat-playground/markdown-renderer.tsx b/src/llama_stack/ui/components/chat-playground/markdown-renderer.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/markdown-renderer.tsx
rename to src/llama_stack/ui/components/chat-playground/markdown-renderer.tsx
diff --git a/llama_stack/ui/components/chat-playground/message-components.tsx b/src/llama_stack/ui/components/chat-playground/message-components.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/message-components.tsx
rename to src/llama_stack/ui/components/chat-playground/message-components.tsx
diff --git a/llama_stack/ui/components/chat-playground/message-input.tsx b/src/llama_stack/ui/components/chat-playground/message-input.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/message-input.tsx
rename to src/llama_stack/ui/components/chat-playground/message-input.tsx
diff --git a/llama_stack/ui/components/chat-playground/message-list.tsx b/src/llama_stack/ui/components/chat-playground/message-list.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/message-list.tsx
rename to src/llama_stack/ui/components/chat-playground/message-list.tsx
diff --git a/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx b/src/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
rename to src/llama_stack/ui/components/chat-playground/prompt-suggestions.tsx
diff --git a/llama_stack/ui/components/chat-playground/typing-indicator.tsx b/src/llama_stack/ui/components/chat-playground/typing-indicator.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/typing-indicator.tsx
rename to src/llama_stack/ui/components/chat-playground/typing-indicator.tsx
diff --git a/llama_stack/ui/components/chat-playground/vector-db-creator.tsx b/src/llama_stack/ui/components/chat-playground/vector-db-creator.tsx
similarity index 100%
rename from llama_stack/ui/components/chat-playground/vector-db-creator.tsx
rename to src/llama_stack/ui/components/chat-playground/vector-db-creator.tsx
diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/src/llama_stack/ui/components/layout/app-sidebar.tsx
similarity index 100%
rename from llama_stack/ui/components/layout/app-sidebar.tsx
rename to src/llama_stack/ui/components/layout/app-sidebar.tsx
diff --git a/llama_stack/ui/components/layout/detail-layout.tsx b/src/llama_stack/ui/components/layout/detail-layout.tsx
similarity index 100%
rename from llama_stack/ui/components/layout/detail-layout.tsx
rename to src/llama_stack/ui/components/layout/detail-layout.tsx
diff --git a/llama_stack/ui/components/layout/logs-layout.tsx b/src/llama_stack/ui/components/layout/logs-layout.tsx
similarity index 100%
rename from llama_stack/ui/components/layout/logs-layout.tsx
rename to src/llama_stack/ui/components/layout/logs-layout.tsx
diff --git a/llama_stack/ui/components/layout/page-breadcrumb.tsx b/src/llama_stack/ui/components/layout/page-breadcrumb.tsx
similarity index 100%
rename from llama_stack/ui/components/layout/page-breadcrumb.tsx
rename to src/llama_stack/ui/components/layout/page-breadcrumb.tsx
diff --git a/llama_stack/ui/components/logs/logs-table-scroll.test.tsx b/src/llama_stack/ui/components/logs/logs-table-scroll.test.tsx
similarity index 100%
rename from llama_stack/ui/components/logs/logs-table-scroll.test.tsx
rename to src/llama_stack/ui/components/logs/logs-table-scroll.test.tsx
diff --git a/llama_stack/ui/components/logs/logs-table.test.tsx b/src/llama_stack/ui/components/logs/logs-table.test.tsx
similarity index 100%
rename from llama_stack/ui/components/logs/logs-table.test.tsx
rename to src/llama_stack/ui/components/logs/logs-table.test.tsx
diff --git a/llama_stack/ui/components/logs/logs-table.tsx b/src/llama_stack/ui/components/logs/logs-table.tsx
similarity index 100%
rename from llama_stack/ui/components/logs/logs-table.tsx
rename to src/llama_stack/ui/components/logs/logs-table.tsx
diff --git a/llama_stack/ui/components/providers/session-provider.tsx b/src/llama_stack/ui/components/providers/session-provider.tsx
similarity index 100%
rename from llama_stack/ui/components/providers/session-provider.tsx
rename to src/llama_stack/ui/components/providers/session-provider.tsx
diff --git a/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx b/src/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
rename to src/llama_stack/ui/components/responses/grouping/grouped-items-display.tsx
diff --git a/llama_stack/ui/components/responses/hooks/function-call-grouping.ts b/src/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
similarity index 100%
rename from llama_stack/ui/components/responses/hooks/function-call-grouping.ts
rename to src/llama_stack/ui/components/responses/hooks/function-call-grouping.ts
diff --git a/llama_stack/ui/components/responses/items/function-call-item.tsx b/src/llama_stack/ui/components/responses/items/function-call-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/function-call-item.tsx
rename to src/llama_stack/ui/components/responses/items/function-call-item.tsx
diff --git a/llama_stack/ui/components/responses/items/generic-item.tsx b/src/llama_stack/ui/components/responses/items/generic-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/generic-item.tsx
rename to src/llama_stack/ui/components/responses/items/generic-item.tsx
diff --git a/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx b/src/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/grouped-function-call-item.tsx
rename to src/llama_stack/ui/components/responses/items/grouped-function-call-item.tsx
diff --git a/llama_stack/ui/components/responses/items/index.ts b/src/llama_stack/ui/components/responses/items/index.ts
similarity index 100%
rename from llama_stack/ui/components/responses/items/index.ts
rename to src/llama_stack/ui/components/responses/items/index.ts
diff --git a/llama_stack/ui/components/responses/items/item-renderer.tsx b/src/llama_stack/ui/components/responses/items/item-renderer.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/item-renderer.tsx
rename to src/llama_stack/ui/components/responses/items/item-renderer.tsx
diff --git a/llama_stack/ui/components/responses/items/message-item.tsx b/src/llama_stack/ui/components/responses/items/message-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/message-item.tsx
rename to src/llama_stack/ui/components/responses/items/message-item.tsx
diff --git a/llama_stack/ui/components/responses/items/web-search-item.tsx b/src/llama_stack/ui/components/responses/items/web-search-item.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/items/web-search-item.tsx
rename to src/llama_stack/ui/components/responses/items/web-search-item.tsx
diff --git a/llama_stack/ui/components/responses/responses-detail.test.tsx b/src/llama_stack/ui/components/responses/responses-detail.test.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/responses-detail.test.tsx
rename to src/llama_stack/ui/components/responses/responses-detail.test.tsx
diff --git a/llama_stack/ui/components/responses/responses-detail.tsx b/src/llama_stack/ui/components/responses/responses-detail.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/responses-detail.tsx
rename to src/llama_stack/ui/components/responses/responses-detail.tsx
diff --git a/llama_stack/ui/components/responses/responses-table.test.tsx b/src/llama_stack/ui/components/responses/responses-table.test.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/responses-table.test.tsx
rename to src/llama_stack/ui/components/responses/responses-table.test.tsx
diff --git a/llama_stack/ui/components/responses/responses-table.tsx b/src/llama_stack/ui/components/responses/responses-table.tsx
similarity index 100%
rename from llama_stack/ui/components/responses/responses-table.tsx
rename to src/llama_stack/ui/components/responses/responses-table.tsx
diff --git a/llama_stack/ui/components/responses/utils/item-types.ts b/src/llama_stack/ui/components/responses/utils/item-types.ts
similarity index 100%
rename from llama_stack/ui/components/responses/utils/item-types.ts
rename to src/llama_stack/ui/components/responses/utils/item-types.ts
diff --git a/llama_stack/ui/components/ui/audio-visualizer.tsx b/src/llama_stack/ui/components/ui/audio-visualizer.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/audio-visualizer.tsx
rename to src/llama_stack/ui/components/ui/audio-visualizer.tsx
diff --git a/llama_stack/ui/components/ui/breadcrumb.tsx b/src/llama_stack/ui/components/ui/breadcrumb.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/breadcrumb.tsx
rename to src/llama_stack/ui/components/ui/breadcrumb.tsx
diff --git a/llama_stack/ui/components/ui/button.tsx b/src/llama_stack/ui/components/ui/button.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/button.tsx
rename to src/llama_stack/ui/components/ui/button.tsx
diff --git a/llama_stack/ui/components/ui/card.tsx b/src/llama_stack/ui/components/ui/card.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/card.tsx
rename to src/llama_stack/ui/components/ui/card.tsx
diff --git a/llama_stack/ui/components/ui/collapsible.tsx b/src/llama_stack/ui/components/ui/collapsible.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/collapsible.tsx
rename to src/llama_stack/ui/components/ui/collapsible.tsx
diff --git a/llama_stack/ui/components/ui/copy-button.tsx b/src/llama_stack/ui/components/ui/copy-button.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/copy-button.tsx
rename to src/llama_stack/ui/components/ui/copy-button.tsx
diff --git a/llama_stack/ui/components/ui/dropdown-menu.tsx b/src/llama_stack/ui/components/ui/dropdown-menu.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/dropdown-menu.tsx
rename to src/llama_stack/ui/components/ui/dropdown-menu.tsx
diff --git a/llama_stack/ui/components/ui/file-preview.tsx b/src/llama_stack/ui/components/ui/file-preview.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/file-preview.tsx
rename to src/llama_stack/ui/components/ui/file-preview.tsx
diff --git a/llama_stack/ui/components/ui/input.tsx b/src/llama_stack/ui/components/ui/input.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/input.tsx
rename to src/llama_stack/ui/components/ui/input.tsx
diff --git a/llama_stack/ui/components/ui/mode-toggle.tsx b/src/llama_stack/ui/components/ui/mode-toggle.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/mode-toggle.tsx
rename to src/llama_stack/ui/components/ui/mode-toggle.tsx
diff --git a/llama_stack/ui/components/ui/select.tsx b/src/llama_stack/ui/components/ui/select.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/select.tsx
rename to src/llama_stack/ui/components/ui/select.tsx
diff --git a/llama_stack/ui/components/ui/separator.tsx b/src/llama_stack/ui/components/ui/separator.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/separator.tsx
rename to src/llama_stack/ui/components/ui/separator.tsx
diff --git a/llama_stack/ui/components/ui/sheet.tsx b/src/llama_stack/ui/components/ui/sheet.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/sheet.tsx
rename to src/llama_stack/ui/components/ui/sheet.tsx
diff --git a/llama_stack/ui/components/ui/sidebar.tsx b/src/llama_stack/ui/components/ui/sidebar.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/sidebar.tsx
rename to src/llama_stack/ui/components/ui/sidebar.tsx
diff --git a/llama_stack/ui/components/ui/sign-in-button.tsx b/src/llama_stack/ui/components/ui/sign-in-button.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/sign-in-button.tsx
rename to src/llama_stack/ui/components/ui/sign-in-button.tsx
diff --git a/llama_stack/ui/components/ui/skeleton.tsx b/src/llama_stack/ui/components/ui/skeleton.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/skeleton.tsx
rename to src/llama_stack/ui/components/ui/skeleton.tsx
diff --git a/llama_stack/ui/components/ui/sonner.tsx b/src/llama_stack/ui/components/ui/sonner.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/sonner.tsx
rename to src/llama_stack/ui/components/ui/sonner.tsx
diff --git a/llama_stack/ui/components/ui/table.tsx b/src/llama_stack/ui/components/ui/table.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/table.tsx
rename to src/llama_stack/ui/components/ui/table.tsx
diff --git a/llama_stack/ui/components/ui/theme-provider.tsx b/src/llama_stack/ui/components/ui/theme-provider.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/theme-provider.tsx
rename to src/llama_stack/ui/components/ui/theme-provider.tsx
diff --git a/llama_stack/ui/components/ui/tooltip.tsx b/src/llama_stack/ui/components/ui/tooltip.tsx
similarity index 100%
rename from llama_stack/ui/components/ui/tooltip.tsx
rename to src/llama_stack/ui/components/ui/tooltip.tsx
diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.test.tsx b/src/llama_stack/ui/components/vector-stores/vector-store-detail.test.tsx
similarity index 100%
rename from llama_stack/ui/components/vector-stores/vector-store-detail.test.tsx
rename to src/llama_stack/ui/components/vector-stores/vector-store-detail.test.tsx
diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/src/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
similarity index 100%
rename from llama_stack/ui/components/vector-stores/vector-store-detail.tsx
rename to src/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
diff --git a/llama_stack/ui/e2e/logs-table-scroll.spec.ts b/src/llama_stack/ui/e2e/logs-table-scroll.spec.ts
similarity index 100%
rename from llama_stack/ui/e2e/logs-table-scroll.spec.ts
rename to src/llama_stack/ui/e2e/logs-table-scroll.spec.ts
diff --git a/llama_stack/ui/eslint.config.mjs b/src/llama_stack/ui/eslint.config.mjs
similarity index 100%
rename from llama_stack/ui/eslint.config.mjs
rename to src/llama_stack/ui/eslint.config.mjs
diff --git a/llama_stack/ui/hooks/use-audio-recording.ts b/src/llama_stack/ui/hooks/use-audio-recording.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-audio-recording.ts
rename to src/llama_stack/ui/hooks/use-audio-recording.ts
diff --git a/llama_stack/ui/hooks/use-auth-client.ts b/src/llama_stack/ui/hooks/use-auth-client.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-auth-client.ts
rename to src/llama_stack/ui/hooks/use-auth-client.ts
diff --git a/llama_stack/ui/hooks/use-auto-scroll.ts b/src/llama_stack/ui/hooks/use-auto-scroll.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-auto-scroll.ts
rename to src/llama_stack/ui/hooks/use-auto-scroll.ts
diff --git a/llama_stack/ui/hooks/use-autosize-textarea.ts b/src/llama_stack/ui/hooks/use-autosize-textarea.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-autosize-textarea.ts
rename to src/llama_stack/ui/hooks/use-autosize-textarea.ts
diff --git a/llama_stack/ui/hooks/use-copy-to-clipboard.ts b/src/llama_stack/ui/hooks/use-copy-to-clipboard.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-copy-to-clipboard.ts
rename to src/llama_stack/ui/hooks/use-copy-to-clipboard.ts
diff --git a/llama_stack/ui/hooks/use-infinite-scroll.ts b/src/llama_stack/ui/hooks/use-infinite-scroll.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-infinite-scroll.ts
rename to src/llama_stack/ui/hooks/use-infinite-scroll.ts
diff --git a/llama_stack/ui/hooks/use-mobile.ts b/src/llama_stack/ui/hooks/use-mobile.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-mobile.ts
rename to src/llama_stack/ui/hooks/use-mobile.ts
diff --git a/llama_stack/ui/hooks/use-pagination.ts b/src/llama_stack/ui/hooks/use-pagination.ts
similarity index 100%
rename from llama_stack/ui/hooks/use-pagination.ts
rename to src/llama_stack/ui/hooks/use-pagination.ts
diff --git a/llama_stack/ui/instrumentation.ts b/src/llama_stack/ui/instrumentation.ts
similarity index 100%
rename from llama_stack/ui/instrumentation.ts
rename to src/llama_stack/ui/instrumentation.ts
diff --git a/llama_stack/ui/jest.config.ts b/src/llama_stack/ui/jest.config.ts
similarity index 100%
rename from llama_stack/ui/jest.config.ts
rename to src/llama_stack/ui/jest.config.ts
diff --git a/llama_stack/ui/jest.setup.ts b/src/llama_stack/ui/jest.setup.ts
similarity index 100%
rename from llama_stack/ui/jest.setup.ts
rename to src/llama_stack/ui/jest.setup.ts
diff --git a/llama_stack/ui/lib/audio-utils.ts b/src/llama_stack/ui/lib/audio-utils.ts
similarity index 100%
rename from llama_stack/ui/lib/audio-utils.ts
rename to src/llama_stack/ui/lib/audio-utils.ts
diff --git a/llama_stack/ui/lib/auth.ts b/src/llama_stack/ui/lib/auth.ts
similarity index 100%
rename from llama_stack/ui/lib/auth.ts
rename to src/llama_stack/ui/lib/auth.ts
diff --git a/llama_stack/ui/lib/config-validator.ts b/src/llama_stack/ui/lib/config-validator.ts
similarity index 100%
rename from llama_stack/ui/lib/config-validator.ts
rename to src/llama_stack/ui/lib/config-validator.ts
diff --git a/llama_stack/ui/lib/contents-api.ts b/src/llama_stack/ui/lib/contents-api.ts
similarity index 100%
rename from llama_stack/ui/lib/contents-api.ts
rename to src/llama_stack/ui/lib/contents-api.ts
diff --git a/llama_stack/ui/lib/format-message-content.test.ts b/src/llama_stack/ui/lib/format-message-content.test.ts
similarity index 100%
rename from llama_stack/ui/lib/format-message-content.test.ts
rename to src/llama_stack/ui/lib/format-message-content.test.ts
diff --git a/llama_stack/ui/lib/format-message-content.ts b/src/llama_stack/ui/lib/format-message-content.ts
similarity index 100%
rename from llama_stack/ui/lib/format-message-content.ts
rename to src/llama_stack/ui/lib/format-message-content.ts
diff --git a/llama_stack/ui/lib/format-tool-call.tsx b/src/llama_stack/ui/lib/format-tool-call.tsx
similarity index 100%
rename from llama_stack/ui/lib/format-tool-call.tsx
rename to src/llama_stack/ui/lib/format-tool-call.tsx
diff --git a/llama_stack/ui/lib/message-content-utils.ts b/src/llama_stack/ui/lib/message-content-utils.ts
similarity index 100%
rename from llama_stack/ui/lib/message-content-utils.ts
rename to src/llama_stack/ui/lib/message-content-utils.ts
diff --git a/llama_stack/ui/lib/truncate-text.ts b/src/llama_stack/ui/lib/truncate-text.ts
similarity index 100%
rename from llama_stack/ui/lib/truncate-text.ts
rename to src/llama_stack/ui/lib/truncate-text.ts
diff --git a/llama_stack/ui/lib/types.ts b/src/llama_stack/ui/lib/types.ts
similarity index 100%
rename from llama_stack/ui/lib/types.ts
rename to src/llama_stack/ui/lib/types.ts
diff --git a/llama_stack/ui/lib/utils.tsx b/src/llama_stack/ui/lib/utils.tsx
similarity index 100%
rename from llama_stack/ui/lib/utils.tsx
rename to src/llama_stack/ui/lib/utils.tsx
diff --git a/llama_stack/ui/next.config.ts b/src/llama_stack/ui/next.config.ts
similarity index 100%
rename from llama_stack/ui/next.config.ts
rename to src/llama_stack/ui/next.config.ts
diff --git a/llama_stack/ui/package-lock.json b/src/llama_stack/ui/package-lock.json
similarity index 100%
rename from llama_stack/ui/package-lock.json
rename to src/llama_stack/ui/package-lock.json
diff --git a/llama_stack/ui/package.json b/src/llama_stack/ui/package.json
similarity index 100%
rename from llama_stack/ui/package.json
rename to src/llama_stack/ui/package.json
diff --git a/llama_stack/ui/playwright.config.ts b/src/llama_stack/ui/playwright.config.ts
similarity index 100%
rename from llama_stack/ui/playwright.config.ts
rename to src/llama_stack/ui/playwright.config.ts
diff --git a/llama_stack/ui/postcss.config.mjs b/src/llama_stack/ui/postcss.config.mjs
similarity index 100%
rename from llama_stack/ui/postcss.config.mjs
rename to src/llama_stack/ui/postcss.config.mjs
diff --git a/llama_stack/ui/public/favicon.ico b/src/llama_stack/ui/public/favicon.ico
similarity index 100%
rename from llama_stack/ui/public/favicon.ico
rename to src/llama_stack/ui/public/favicon.ico
diff --git a/llama_stack/ui/public/file.svg b/src/llama_stack/ui/public/file.svg
similarity index 100%
rename from llama_stack/ui/public/file.svg
rename to src/llama_stack/ui/public/file.svg
diff --git a/llama_stack/ui/public/globe.svg b/src/llama_stack/ui/public/globe.svg
similarity index 100%
rename from llama_stack/ui/public/globe.svg
rename to src/llama_stack/ui/public/globe.svg
diff --git a/llama_stack/ui/public/logo.webp b/src/llama_stack/ui/public/logo.webp
similarity index 100%
rename from llama_stack/ui/public/logo.webp
rename to src/llama_stack/ui/public/logo.webp
diff --git a/llama_stack/ui/public/next.svg b/src/llama_stack/ui/public/next.svg
similarity index 100%
rename from llama_stack/ui/public/next.svg
rename to src/llama_stack/ui/public/next.svg
diff --git a/llama_stack/ui/public/vercel.svg b/src/llama_stack/ui/public/vercel.svg
similarity index 100%
rename from llama_stack/ui/public/vercel.svg
rename to src/llama_stack/ui/public/vercel.svg
diff --git a/llama_stack/ui/public/window.svg b/src/llama_stack/ui/public/window.svg
similarity index 100%
rename from llama_stack/ui/public/window.svg
rename to src/llama_stack/ui/public/window.svg
diff --git a/llama_stack/ui/tsconfig.json b/src/llama_stack/ui/tsconfig.json
similarity index 100%
rename from llama_stack/ui/tsconfig.json
rename to src/llama_stack/ui/tsconfig.json
diff --git a/llama_stack/ui/types/next-auth.d.ts b/src/llama_stack/ui/types/next-auth.d.ts
similarity index 100%
rename from llama_stack/ui/types/next-auth.d.ts
rename to src/llama_stack/ui/types/next-auth.d.ts