From 2665f0010299b5877fc9e268f755d268aa5d0758 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 30 Jul 2025 23:30:53 -0700 Subject: [PATCH 1/2] chore(rename): move llama_stack.distribution to llama_stack.core (#2975) We would like to rename the term `template` to `distribution`. To prepare for that, this is a precursor. cc @leseb --- .github/workflows/providers-build.yml | 12 ++++---- .../test-external-provider-module.yml | 5 +++- .github/workflows/test-external.yml | 4 +-- MANIFEST.in | 2 +- docs/getting_started.ipynb | 2 +- docs/getting_started_llama4.ipynb | 2 +- docs/getting_started_llama_api.ipynb | 2 +- .../Alpha_Llama_Stack_Post_Training.ipynb | 28 +++++++++---------- .../Llama_Stack_Agent_Workflows.ipynb | 2 +- .../Llama_Stack_Benchmark_Evals.ipynb | 2 +- .../notebooks/Llama_Stack_RAG_Lifecycle.ipynb | 2 +- .../Llama_Stack_NVIDIA_E2E_Flow.ipynb | 2 +- .../2_finetuning_and_inference.ipynb | 2 +- .../tool_calling/3_model_evaluation.ipynb | 2 +- .../4_adding_safety_guardrails.ipynb | 2 +- docs/openapi_generator/README.md | 2 +- docs/openapi_generator/generate.py | 2 +- docs/openapi_generator/pyopenapi/utility.py | 2 +- docs/original_rfc.md | 2 +- docs/quick_start.ipynb | 2 +- docs/source/apis/external.md | 2 +- .../building_applications/playground/index.md | 2 +- .../source/deploying/kubernetes_deployment.md | 2 +- docs/source/distributions/building_distro.md | 2 +- .../distributions/importing_as_library.md | 2 +- .../distributions/k8s/stack-k8s.yaml.template | 2 +- docs/zero_to_hero_guide/06_Safety101.ipynb | 2 +- llama_stack/__init__.py | 2 +- llama_stack/cli/download.py | 6 ++-- llama_stack/cli/model/list.py | 2 +- llama_stack/cli/model/remove.py | 2 +- llama_stack/cli/stack/_build.py | 22 +++++++-------- llama_stack/cli/stack/build.py | 2 +- llama_stack/cli/stack/list_apis.py | 2 +- llama_stack/cli/stack/list_providers.py | 4 +-- llama_stack/cli/stack/run.py | 8 +++--- llama_stack/cli/verify_download.py | 2 +- .../{distribution => core}/__init__.py | 0 .../access_control/__init__.py | 0 .../access_control/access_control.py | 2 +- .../access_control/conditions.py | 0 .../access_control/datatypes.py | 0 llama_stack/{distribution => core}/build.py | 16 +++++------ .../{distribution => core}/build_conda_env.sh | 0 .../{distribution => core}/build_container.sh | 4 +-- .../{distribution => core}/build_venv.sh | 0 llama_stack/{distribution => core}/client.py | 0 llama_stack/{distribution => core}/common.sh | 0 .../{distribution => core}/configure.py | 12 ++++---- .../{distribution => core}/datatypes.py | 2 +- .../{distribution => core}/distribution.py | 4 +-- .../{distribution => core}/external.py | 2 +- llama_stack/{distribution => core}/inspect.py | 6 ++-- .../{distribution => core}/library_client.py | 20 ++++++------- .../{distribution => core}/providers.py | 0 .../{distribution => core}/request_headers.py | 2 +- .../{distribution => core}/resolver.py | 16 +++++------ .../routers/__init__.py | 6 ++-- .../routers/datasets.py | 0 .../routers/eval_scoring.py | 0 .../routers/inference.py | 0 .../{distribution => core}/routers/safety.py | 0 .../routers/tool_runtime.py | 0 .../routers/vector_io.py | 0 .../routing_tables/__init__.py | 0 .../routing_tables/benchmarks.py | 2 +- .../routing_tables/common.py | 10 +++---- .../routing_tables/datasets.py | 2 +- .../routing_tables/models.py | 2 +- .../routing_tables/scoring_functions.py | 2 +- .../routing_tables/shields.py | 2 +- .../routing_tables/toolgroups.py | 2 +- .../routing_tables/vector_dbs.py | 2 +- .../{distribution => core}/server/__init__.py | 0 .../{distribution => core}/server/auth.py | 8 +++--- .../server/auth_providers.py | 2 +- .../{distribution => core}/server/quota.py | 0 .../{distribution => core}/server/routes.py | 2 +- .../{distribution => core}/server/server.py | 22 +++++++-------- llama_stack/{distribution => core}/stack.py | 16 +++++------ .../{distribution => core}/start_stack.sh | 2 +- .../{distribution => core}/store/__init__.py | 0 .../{distribution => core}/store/registry.py | 4 +-- .../{distribution => core}/ui/Containerfile | 0 .../{distribution => core}/ui/README.md | 2 +- .../{distribution => core}/ui/__init__.py | 0 llama_stack/{distribution => core}/ui/app.py | 0 .../ui/modules/__init__.py | 0 .../{distribution => core}/ui/modules/api.py | 0 .../ui/modules/utils.py | 0 .../ui/page/__init__.py | 0 .../ui/page/distribution/__init__.py | 0 .../ui/page/distribution/datasets.py | 2 +- .../ui/page/distribution/eval_tasks.py | 2 +- .../ui/page/distribution/models.py | 2 +- .../ui/page/distribution/providers.py | 2 +- .../ui/page/distribution/resources.py | 12 ++++---- .../ui/page/distribution/scoring_functions.py | 2 +- .../ui/page/distribution/shields.py | 2 +- .../ui/page/distribution/vector_dbs.py | 2 +- .../ui/page/evaluations/__init__.py | 0 .../ui/page/evaluations/app_eval.py | 4 +-- .../ui/page/evaluations/native_eval.py | 2 +- .../ui/page/playground/__init__.py | 0 .../ui/page/playground/chat.py | 2 +- .../ui/page/playground/rag.py | 4 +-- .../ui/page/playground/tools.py | 2 +- .../ui/requirements.txt | 0 .../{distribution => core}/utils/__init__.py | 0 .../{distribution => core}/utils/config.py | 0 .../utils/config_dirs.py | 0 .../utils/config_resolution.py | 4 +-- .../{distribution => core}/utils/context.py | 0 .../{distribution => core}/utils/dynamic.py | 0 .../{distribution => core}/utils/exec.py | 4 +-- .../utils/image_types.py | 0 .../utils/model_utils.py | 0 .../utils/prompt_for_config.py | 0 .../{distribution => core}/utils/serialize.py | 0 llama_stack/log.py | 2 +- .../inline/agents/meta_reference/__init__.py | 2 +- .../agents/meta_reference/agent_instance.py | 2 +- .../inline/agents/meta_reference/agents.py | 2 +- .../agents/meta_reference/persistence.py | 8 +++--- .../inline/eval/meta_reference/__init__.py | 2 +- .../inline/files/localfs/__init__.py | 2 +- .../providers/inline/files/localfs/files.py | 2 +- .../inline/inference/meta_reference/common.py | 2 +- .../post_training/huggingface/__init__.py | 2 +- .../post_training/torchtune/__init__.py | 2 +- .../recipes/lora_finetuning_single_device.py | 4 +-- .../inline/safety/llama_guard/llama_guard.py | 2 +- .../safety/prompt_guard/prompt_guard.py | 2 +- .../inline/scoring/basic/__init__.py | 2 +- .../providers/inline/scoring/basic/scoring.py | 2 +- .../inline/scoring/braintrust/__init__.py | 2 +- .../inline/scoring/braintrust/braintrust.py | 4 +-- .../inline/scoring/llm_as_judge/__init__.py | 2 +- .../inline/scoring/llm_as_judge/scoring.py | 2 +- .../telemetry/meta_reference/__init__.py | 2 +- .../inline/telemetry/meta_reference/config.py | 2 +- .../telemetry/meta_reference/telemetry.py | 2 +- .../remote/datasetio/nvidia/README.md | 2 +- .../providers/remote/eval/nvidia/__init__.py | 2 +- .../remote/inference/fireworks/fireworks.py | 2 +- .../remote/inference/nvidia/NVIDIA.md | 2 +- .../inference/passthrough/passthrough.py | 2 +- .../remote/inference/together/together.py | 2 +- .../remote/post_training/nvidia/README.md | 2 +- .../providers/remote/safety/nvidia/README.md | 2 +- .../remote/safety/sambanova/sambanova.py | 2 +- .../tool_runtime/bing_search/bing_search.py | 2 +- .../tool_runtime/brave_search/brave_search.py | 2 +- .../model_context_protocol.py | 2 +- .../tavily_search/tavily_search.py | 2 +- .../wolfram_alpha/wolfram_alpha.py | 2 +- .../remote/vector_io/weaviate/weaviate.py | 2 +- .../utils/common/data_schema_validator.py | 2 +- .../utils/inference/inference_store.py | 4 +-- .../utils/inference/litellm_openai_mixin.py | 2 +- llama_stack/providers/utils/kvstore/config.py | 2 +- .../utils/responses/responses_store.py | 4 +-- .../utils/sqlstore/authorized_sqlstore.py | 10 +++---- .../providers/utils/sqlstore/sqlstore.py | 2 +- llama_stack/providers/utils/tools/mcp.py | 2 +- llama_stack/templates/dell/dell.py | 2 +- .../meta-reference-gpu/meta_reference.py | 2 +- llama_stack/templates/nvidia/nvidia.py | 2 +- .../open-benchmark/open_benchmark.py | 2 +- .../templates/postgres-demo/postgres_demo.py | 2 +- llama_stack/templates/starter/starter.py | 4 +-- llama_stack/templates/template.py | 6 ++-- llama_stack/templates/watsonx/watsonx.py | 2 +- pyproject.toml | 22 +++++++-------- scripts/provider_codegen.py | 2 +- .../agents/test_openai_responses.py | 2 +- tests/integration/files/test_files.py | 4 +-- tests/integration/fixtures/common.py | 2 +- .../inference/test_openai_completion.py | 2 +- .../inference/test_openai_embeddings.py | 2 +- .../sqlstore/test_authorized_sqlstore.py | 6 ++-- tests/integration/tool_runtime/test_mcp.py | 2 +- .../vector_io/test_openai_vector_stores.py | 2 +- tests/unit/cli/test_stack_config.py | 2 +- .../routers/test_routing_tables.py | 16 +++++------ .../routing_tables/test_vector_dbs.py | 8 +++--- tests/unit/distribution/test_build_path.py | 4 +-- tests/unit/distribution/test_context.py | 2 +- tests/unit/distribution/test_distribution.py | 12 ++++---- .../test_library_client_initialization.py | 2 +- tests/unit/files/test_files.py | 2 +- tests/unit/fixtures.py | 2 +- .../meta_reference/test_openai_responses.py | 2 +- .../agents/test_persistence_access_control.py | 2 +- .../test_inference_client_caching.py | 2 +- .../inference/test_openai_base_url_config.py | 2 +- .../unit/providers/nvidia/test_parameters.py | 2 +- .../nvidia/test_supervised_fine_tuning.py | 2 +- tests/unit/providers/test_configs.py | 4 +-- tests/unit/registry/test_registry.py | 2 +- tests/unit/registry/test_registry_acl.py | 4 +-- tests/unit/server/test_access_control.py | 20 ++++++------- tests/unit/server/test_auth.py | 14 +++++----- tests/unit/server/test_auth_github.py | 10 +++---- tests/unit/server/test_quota.py | 4 +-- tests/unit/server/test_replace_env_vars.py | 2 +- tests/unit/server/test_resolver.py | 8 +++--- tests/unit/server/test_server.py | 8 +++--- tests/unit/server/test_sse.py | 2 +- tests/unit/utils/test_authorized_sqlstore.py | 6 ++-- .../openai_api/test_responses.py | 2 +- 211 files changed, 351 insertions(+), 348 deletions(-) rename llama_stack/{distribution => core}/__init__.py (100%) rename llama_stack/{distribution => core}/access_control/__init__.py (100%) rename llama_stack/{distribution => core}/access_control/access_control.py (98%) rename llama_stack/{distribution => core}/access_control/conditions.py (100%) rename llama_stack/{distribution => core}/access_control/datatypes.py (100%) rename llama_stack/{distribution => core}/build.py (93%) rename llama_stack/{distribution => core}/build_conda_env.sh (100%) rename llama_stack/{distribution => core}/build_container.sh (98%) rename llama_stack/{distribution => core}/build_venv.sh (100%) rename llama_stack/{distribution => core}/client.py (100%) rename llama_stack/{distribution => core}/common.sh (100%) rename llama_stack/{distribution => core}/configure.py (93%) rename llama_stack/{distribution => core}/datatypes.py (99%) rename llama_stack/{distribution => core}/distribution.py (98%) rename llama_stack/{distribution => core}/external.py (96%) rename llama_stack/{distribution => core}/inspect.py (93%) rename llama_stack/{distribution => core}/library_client.py (96%) rename llama_stack/{distribution => core}/providers.py (100%) rename llama_stack/{distribution => core}/request_headers.py (98%) rename llama_stack/{distribution => core}/resolver.py (97%) rename llama_stack/{distribution => core}/routers/__init__.py (94%) rename llama_stack/{distribution => core}/routers/datasets.py (100%) rename llama_stack/{distribution => core}/routers/eval_scoring.py (100%) rename llama_stack/{distribution => core}/routers/inference.py (100%) rename llama_stack/{distribution => core}/routers/safety.py (100%) rename llama_stack/{distribution => core}/routers/tool_runtime.py (100%) rename llama_stack/{distribution => core}/routers/vector_io.py (100%) rename llama_stack/{distribution => core}/routing_tables/__init__.py (100%) rename llama_stack/{distribution => core}/routing_tables/benchmarks.py (97%) rename llama_stack/{distribution => core}/routing_tables/common.py (96%) rename llama_stack/{distribution => core}/routing_tables/datasets.py (98%) rename llama_stack/{distribution => core}/routing_tables/models.py (99%) rename llama_stack/{distribution => core}/routing_tables/scoring_functions.py (97%) rename llama_stack/{distribution => core}/routing_tables/shields.py (97%) rename llama_stack/{distribution => core}/routing_tables/toolgroups.py (98%) rename llama_stack/{distribution => core}/routing_tables/vector_dbs.py (99%) rename llama_stack/{distribution => core}/server/__init__.py (100%) rename llama_stack/{distribution => core}/server/auth.py (95%) rename llama_stack/{distribution => core}/server/auth_providers.py (99%) rename llama_stack/{distribution => core}/server/quota.py (100%) rename llama_stack/{distribution => core}/server/routes.py (98%) rename llama_stack/{distribution => core}/server/server.py (96%) rename llama_stack/{distribution => core}/stack.py (96%) rename llama_stack/{distribution => core}/start_stack.sh (98%) rename llama_stack/{distribution => core}/store/__init__.py (100%) rename llama_stack/{distribution => core}/store/registry.py (98%) rename llama_stack/{distribution => core}/ui/Containerfile (100%) rename llama_stack/{distribution => core}/ui/README.md (96%) rename llama_stack/{distribution => core}/ui/__init__.py (100%) rename llama_stack/{distribution => core}/ui/app.py (100%) rename llama_stack/{distribution => core}/ui/modules/__init__.py (100%) rename llama_stack/{distribution => core}/ui/modules/api.py (100%) rename llama_stack/{distribution => core}/ui/modules/utils.py (100%) rename llama_stack/{distribution => core}/ui/page/__init__.py (100%) rename llama_stack/{distribution => core}/ui/page/distribution/__init__.py (100%) rename llama_stack/{distribution => core}/ui/page/distribution/datasets.py (88%) rename llama_stack/{distribution => core}/ui/page/distribution/eval_tasks.py (90%) rename llama_stack/{distribution => core}/ui/page/distribution/models.py (87%) rename llama_stack/{distribution => core}/ui/page/distribution/providers.py (91%) rename llama_stack/{distribution => core}/ui/page/distribution/resources.py (70%) rename llama_stack/{distribution => core}/ui/page/distribution/scoring_functions.py (89%) rename llama_stack/{distribution => core}/ui/page/distribution/shields.py (88%) rename llama_stack/{distribution => core}/ui/page/distribution/vector_dbs.py (90%) rename llama_stack/{distribution => core}/ui/page/evaluations/__init__.py (100%) rename llama_stack/{distribution => core}/ui/page/evaluations/app_eval.py (97%) rename llama_stack/{distribution => core}/ui/page/evaluations/native_eval.py (99%) rename llama_stack/{distribution => core}/ui/page/playground/__init__.py (100%) rename llama_stack/{distribution => core}/ui/page/playground/chat.py (98%) rename llama_stack/{distribution => core}/ui/page/playground/rag.py (98%) rename llama_stack/{distribution => core}/ui/page/playground/tools.py (99%) rename llama_stack/{distribution => core}/ui/requirements.txt (100%) rename llama_stack/{distribution => core}/utils/__init__.py (100%) rename llama_stack/{distribution => core}/utils/config.py (100%) rename llama_stack/{distribution => core}/utils/config_dirs.py (100%) rename llama_stack/{distribution => core}/utils/config_resolution.py (96%) rename llama_stack/{distribution => core}/utils/context.py (100%) rename llama_stack/{distribution => core}/utils/dynamic.py (100%) rename llama_stack/{distribution => core}/utils/exec.py (96%) rename llama_stack/{distribution => core}/utils/image_types.py (100%) rename llama_stack/{distribution => core}/utils/model_utils.py (100%) rename llama_stack/{distribution => core}/utils/prompt_for_config.py (100%) rename llama_stack/{distribution => core}/utils/serialize.py (100%) diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 284076d50..e0edb5486 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -9,8 +9,8 @@ on: paths: - 'llama_stack/cli/stack/build.py' - 'llama_stack/cli/stack/_build.py' - - 'llama_stack/distribution/build.*' - - 'llama_stack/distribution/*.sh' + - 'llama_stack/core/build.*' + - 'llama_stack/core/*.sh' - '.github/workflows/providers-build.yml' - 'llama_stack/templates/**' - 'pyproject.toml' @@ -19,8 +19,8 @@ on: paths: - 'llama_stack/cli/stack/build.py' - 'llama_stack/cli/stack/_build.py' - - 'llama_stack/distribution/build.*' - - 'llama_stack/distribution/*.sh' + - 'llama_stack/core/build.*' + - 'llama_stack/core/*.sh' - '.github/workflows/providers-build.yml' - 'llama_stack/templates/**' - 'pyproject.toml' @@ -108,7 +108,7 @@ jobs: IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1) entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID) echo "Entrypoint: $entrypoint" - if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then + if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then echo "Entrypoint is not correct" exit 1 fi @@ -142,7 +142,7 @@ jobs: IMAGE_ID=$(docker images --format "{{.Repository}}:{{.Tag}}" | head -n 1) entrypoint=$(docker inspect --format '{{ .Config.Entrypoint }}' $IMAGE_ID) echo "Entrypoint: $entrypoint" - if [ "$entrypoint" != "[python -m llama_stack.distribution.server.server --config /app/run.yaml]" ]; then + if [ "$entrypoint" != "[python -m llama_stack.core.server.server --config /app/run.yaml]" ]; then echo "Entrypoint is not correct" exit 1 fi diff --git a/.github/workflows/test-external-provider-module.yml b/.github/workflows/test-external-provider-module.yml index 8567a9446..c3d11f28a 100644 --- a/.github/workflows/test-external-provider-module.yml +++ b/.github/workflows/test-external-provider-module.yml @@ -16,6 +16,9 @@ on: - 'tests/external/*' - '.github/workflows/test-external-provider-module.yml' # This workflow +# This workflow is disabled. See https://github.com/meta-llama/llama-stack/pull/2975#issuecomment-3138702984 for details +if: false + jobs: test-external-providers-from-module: runs-on: ubuntu-latest @@ -47,7 +50,7 @@ jobs: - name: Build distro from config file run: | - USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/ramalama-stack/build.yaml + USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/ramalama-stack/build.yaml - name: Start Llama Stack server in background if: ${{ matrix.image-type }} == 'venv' diff --git a/.github/workflows/test-external.yml b/.github/workflows/test-external.yml index 053b38fab..27181a236 100644 --- a/.github/workflows/test-external.yml +++ b/.github/workflows/test-external.yml @@ -43,11 +43,11 @@ jobs: - name: Print distro dependencies run: | - USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml --print-deps-only + USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml --print-deps-only - name: Build distro from config file run: | - USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. llama stack build --config tests/external/build.yaml + USE_COPY_NOT_MOUNT=true LLAMA_STACK_DIR=. uv run llama stack build --config tests/external/build.yaml - name: Start Llama Stack server in background if: ${{ matrix.image-type }} == 'venv' diff --git a/MANIFEST.in b/MANIFEST.in index 88bd11767..fde033d96 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ include pyproject.toml include llama_stack/models/llama/llama3/tokenizer.model include llama_stack/models/llama/llama4/tokenizer.model -include llama_stack/distribution/*.sh +include llama_stack.core/*.sh include llama_stack/cli/scripts/*.sh include llama_stack/templates/*/*.yaml include llama_stack/providers/tests/test_cases/inference/*.json diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index 88878c9be..c87ce4152 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -165,7 +165,7 @@ "# use this helper if needed to kill the server \n", "def kill_llama_stack_server():\n", " # Kill any existing llama stack server processes\n", - " os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n" + " os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n" ] }, { diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb index 82aef6039..7c274de8c 100644 --- a/docs/getting_started_llama4.ipynb +++ b/docs/getting_started_llama4.ipynb @@ -275,7 +275,7 @@ "# use this helper if needed to kill the server \n", "def kill_llama_stack_server():\n", " # Kill any existing llama stack server processes\n", - " os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n" + " os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n" ] }, { diff --git a/docs/getting_started_llama_api.ipynb b/docs/getting_started_llama_api.ipynb index e6c74986b..7950cbd12 100644 --- a/docs/getting_started_llama_api.ipynb +++ b/docs/getting_started_llama_api.ipynb @@ -265,7 +265,7 @@ "# use this helper if needed to kill the server \n", "def kill_llama_stack_server():\n", " # Kill any existing llama stack server processes\n", - " os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n" + " os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n" ] }, { diff --git a/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb b/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb index b7d769b51..59e516fd0 100644 --- a/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb +++ b/docs/notebooks/Alpha_Llama_Stack_Post_Training.ipynb @@ -3216,19 +3216,19 @@ "INFO:datasets:Duckdb version 1.1.3 available.\n", "INFO:datasets:TensorFlow version 2.18.0 available.\n", "INFO:datasets:JAX version 0.4.33 available.\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: basic::equality served by basic\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: basic::subset_of served by basic\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::factuality served by braintrust\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-precision served by braintrust\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-recall served by braintrust\n", - "INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n", - "INFO:llama_stack.distribution.stack:\n" + "INFO:llama_stack.core.stack:Scoring_fns: basic::equality served by basic\n", + "INFO:llama_stack.core.stack:Scoring_fns: basic::subset_of served by basic\n", + "INFO:llama_stack.core.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::factuality served by braintrust\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-precision served by braintrust\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-recall served by braintrust\n", + "INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n", + "INFO:llama_stack.core.stack:\n" ] }, { @@ -3448,7 +3448,7 @@ "\n", "os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n", "\n", - "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", + "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n", "client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n", "_ = client.initialize()" ] diff --git a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb index cad28ab82..0edef4204 100644 --- a/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb +++ b/docs/notebooks/Llama_Stack_Agent_Workflows.ipynb @@ -48,7 +48,7 @@ "outputs": [], "source": [ "from llama_stack_client import LlamaStackClient, Agent\n", - "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", + "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n", "from rich.pretty import pprint\n", "import json\n", "import uuid\n", diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 93f78d268..72840ceaf 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -661,7 +661,7 @@ "except ImportError:\n", " print(\"Not in Google Colab environment\")\n", "\n", - "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", + "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n", "\n", "client = LlamaStackAsLibraryClient(\"together\")\n", "_ = client.initialize()" diff --git a/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb b/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb index e70cc3bbe..769c91dfd 100644 --- a/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb +++ b/docs/notebooks/Llama_Stack_RAG_Lifecycle.ipynb @@ -35,7 +35,7 @@ ], "source": [ "from llama_stack_client import LlamaStackClient, Agent\n", - "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", + "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n", "from rich.pretty import pprint\n", "import json\n", "import uuid\n", diff --git a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb index 583870404..0db0b26b6 100644 --- a/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb +++ b/docs/notebooks/nvidia/beginner_e2e/Llama_Stack_NVIDIA_E2E_Flow.ipynb @@ -194,7 +194,7 @@ "metadata": {}, "outputs": [], "source": [ - "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", + "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n", "\n", "client = LlamaStackAsLibraryClient(\"nvidia\")\n", "client.initialize()" diff --git a/docs/notebooks/nvidia/tool_calling/2_finetuning_and_inference.ipynb b/docs/notebooks/nvidia/tool_calling/2_finetuning_and_inference.ipynb index 647a16b6d..a80720a5f 100644 --- a/docs/notebooks/nvidia/tool_calling/2_finetuning_and_inference.ipynb +++ b/docs/notebooks/nvidia/tool_calling/2_finetuning_and_inference.ipynb @@ -56,7 +56,7 @@ "metadata": {}, "outputs": [], "source": [ - "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", + "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n", "\n", "client = LlamaStackAsLibraryClient(\"nvidia\")\n", "client.initialize()" diff --git a/docs/notebooks/nvidia/tool_calling/3_model_evaluation.ipynb b/docs/notebooks/nvidia/tool_calling/3_model_evaluation.ipynb index 5a1316adb..91d1db88f 100644 --- a/docs/notebooks/nvidia/tool_calling/3_model_evaluation.ipynb +++ b/docs/notebooks/nvidia/tool_calling/3_model_evaluation.ipynb @@ -56,7 +56,7 @@ "metadata": {}, "outputs": [], "source": [ - "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", + "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n", "\n", "client = LlamaStackAsLibraryClient(\"nvidia\")\n", "client.initialize()" diff --git a/docs/notebooks/nvidia/tool_calling/4_adding_safety_guardrails.ipynb b/docs/notebooks/nvidia/tool_calling/4_adding_safety_guardrails.ipynb index 699a561f9..25bcd0b69 100644 --- a/docs/notebooks/nvidia/tool_calling/4_adding_safety_guardrails.ipynb +++ b/docs/notebooks/nvidia/tool_calling/4_adding_safety_guardrails.ipynb @@ -56,7 +56,7 @@ "metadata": {}, "outputs": [], "source": [ - "from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n", + "from llama_stack.core.library_client import LlamaStackAsLibraryClient\n", "\n", "client = LlamaStackAsLibraryClient(\"nvidia\")\n", "client.initialize()" diff --git a/docs/openapi_generator/README.md b/docs/openapi_generator/README.md index 7888e7828..85021d911 100644 --- a/docs/openapi_generator/README.md +++ b/docs/openapi_generator/README.md @@ -1 +1 @@ -The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility. +The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility. diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index 9fc375175..c27bc6440 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -17,7 +17,7 @@ import fire import ruamel.yaml as yaml from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402 -from llama_stack.distribution.stack import LlamaStack # noqa: E402 +from llama_stack.core.stack import LlamaStack # noqa: E402 from .pyopenapi.options import Options # noqa: E402 from .pyopenapi.specification import Info, Server # noqa: E402 diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py index 57f92403d..d302b114f 100644 --- a/docs/openapi_generator/pyopenapi/utility.py +++ b/docs/openapi_generator/pyopenapi/utility.py @@ -12,7 +12,7 @@ from typing import TextIO from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args from llama_stack.strong_typing.schema import object_to_json, StrictJsonType -from llama_stack.distribution.resolver import api_protocol_map +from llama_stack.core.resolver import api_protocol_map from .generator import Generator from .options import Options diff --git a/docs/original_rfc.md b/docs/original_rfc.md index dc95a04cb..e9191cb6d 100644 --- a/docs/original_rfc.md +++ b/docs/original_rfc.md @@ -73,7 +73,7 @@ The API is defined in the [YAML](_static/llama-stack-spec.yaml) and [HTML](_stat To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent. -There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distribution/server/server.py) repository. +There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack.core/server/server.py) repository. ## Limitations diff --git a/docs/quick_start.ipynb b/docs/quick_start.ipynb index c3049a70f..e99276427 100644 --- a/docs/quick_start.ipynb +++ b/docs/quick_start.ipynb @@ -187,7 +187,7 @@ "# use this helper if needed to kill the server \n", "def kill_llama_stack_server():\n", " # Kill any existing llama stack server processes\n", - " os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n" + " os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n" ] }, { diff --git a/docs/source/apis/external.md b/docs/source/apis/external.md index 025267c33..cc13deb9b 100644 --- a/docs/source/apis/external.md +++ b/docs/source/apis/external.md @@ -355,7 +355,7 @@ server: 8. Run the server: ```bash -python -m llama_stack.distribution.server.server --yaml-config ~/.llama/run-byoa.yaml +python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml ``` 9. Test the API: diff --git a/docs/source/building_applications/playground/index.md b/docs/source/building_applications/playground/index.md index 85895f6a5..399b41203 100644 --- a/docs/source/building_applications/playground/index.md +++ b/docs/source/building_applications/playground/index.md @@ -103,5 +103,5 @@ llama stack run together 2. Start Streamlit UI ```bash -uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py +uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py ``` diff --git a/docs/source/deploying/kubernetes_deployment.md b/docs/source/deploying/kubernetes_deployment.md index 7e9791d8d..4bdd87b24 100644 --- a/docs/source/deploying/kubernetes_deployment.md +++ b/docs/source/deploying/kubernetes_deployment.md @@ -174,7 +174,7 @@ spec: - name: llama-stack image: localhost/llama-stack-run-k8s:latest imagePullPolicy: IfNotPresent - command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"] + command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"] ports: - containerPort: 5000 volumeMounts: diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index cd2c6b6a8..fecfbac38 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -59,7 +59,7 @@ Build a Llama stack container options: -h, --help show this help message and exit - --config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will + --config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively (default: None) --template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None) --list-templates Show the available templates for building a Llama Stack distribution (default: False) diff --git a/docs/source/distributions/importing_as_library.md b/docs/source/distributions/importing_as_library.md index 3427356a7..0f2acc974 100644 --- a/docs/source/distributions/importing_as_library.md +++ b/docs/source/distributions/importing_as_library.md @@ -10,7 +10,7 @@ llama stack build --template starter --image-type venv ``` ```python -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient client = LlamaStackAsLibraryClient( "starter", diff --git a/docs/source/distributions/k8s/stack-k8s.yaml.template b/docs/source/distributions/k8s/stack-k8s.yaml.template index 912445f68..ad5d2c716 100644 --- a/docs/source/distributions/k8s/stack-k8s.yaml.template +++ b/docs/source/distributions/k8s/stack-k8s.yaml.template @@ -52,7 +52,7 @@ spec: value: "${SAFETY_MODEL}" - name: TAVILY_SEARCH_API_KEY value: "${TAVILY_SEARCH_API_KEY}" - command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"] + command: ["python", "-m", "llama_stack.core.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"] ports: - containerPort: 8321 volumeMounts: diff --git a/docs/zero_to_hero_guide/06_Safety101.ipynb b/docs/zero_to_hero_guide/06_Safety101.ipynb index 5d7763924..91b809621 100644 --- a/docs/zero_to_hero_guide/06_Safety101.ipynb +++ b/docs/zero_to_hero_guide/06_Safety101.ipynb @@ -66,7 +66,7 @@ "from pydantic import BaseModel\n", "from termcolor import cprint\n", "\n", - "from llama_stack.distribution.datatypes import RemoteProviderConfig\n", + "from llama_stack.core.datatypes import RemoteProviderConfig\n", "from llama_stack.apis.safety import Safety\n", "from llama_stack_client import LlamaStackClient\n", "\n", diff --git a/llama_stack/__init__.py b/llama_stack/__init__.py index 98f2441c0..1c2ce7123 100644 --- a/llama_stack/__init__.py +++ b/llama_stack/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.distribution.library_client import ( # noqa: F401 +from llama_stack.core.library_client import ( # noqa: F401 AsyncLlamaStackAsLibraryClient, LlamaStackAsLibraryClient, ) diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py index 30b6e11e9..70cb9f4db 100644 --- a/llama_stack/cli/download.py +++ b/llama_stack/cli/download.py @@ -323,7 +323,7 @@ def _hf_download( from huggingface_hub import snapshot_download from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError - from llama_stack.distribution.utils.model_utils import model_local_dir + from llama_stack.core.utils.model_utils import model_local_dir repo_id = model.huggingface_repo if repo_id is None: @@ -361,7 +361,7 @@ def _meta_download( info: "LlamaDownloadInfo", max_concurrent_downloads: int, ): - from llama_stack.distribution.utils.model_utils import model_local_dir + from llama_stack.core.utils.model_utils import model_local_dir output_dir = Path(model_local_dir(model.descriptor())) os.makedirs(output_dir, exist_ok=True) @@ -403,7 +403,7 @@ class Manifest(BaseModel): def _download_from_manifest(manifest_file: str, max_concurrent_downloads: int): - from llama_stack.distribution.utils.model_utils import model_local_dir + from llama_stack.core.utils.model_utils import model_local_dir with open(manifest_file) as f: d = json.load(f) diff --git a/llama_stack/cli/model/list.py b/llama_stack/cli/model/list.py index cf84dd526..f46a8c88d 100644 --- a/llama_stack/cli/model/list.py +++ b/llama_stack/cli/model/list.py @@ -11,7 +11,7 @@ from pathlib import Path from llama_stack.cli.subcommand import Subcommand from llama_stack.cli.table import print_table -from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR +from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR from llama_stack.models.llama.sku_list import all_registered_models diff --git a/llama_stack/cli/model/remove.py b/llama_stack/cli/model/remove.py index 98710d82b..138e06a2a 100644 --- a/llama_stack/cli/model/remove.py +++ b/llama_stack/cli/model/remove.py @@ -9,7 +9,7 @@ import os import shutil from llama_stack.cli.subcommand import Subcommand -from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR +from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR from llama_stack.models.llama.sku_list import resolve_model diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 3718e456c..ab3662f01 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -23,27 +23,27 @@ from termcolor import colored, cprint from llama_stack.cli.stack.utils import ImageType from llama_stack.cli.table import print_table -from llama_stack.distribution.build import ( +from llama_stack.core.build import ( SERVER_DEPENDENCIES, build_image, get_provider_dependencies, ) -from llama_stack.distribution.configure import parse_and_maybe_upgrade_config -from llama_stack.distribution.datatypes import ( +from llama_stack.core.configure import parse_and_maybe_upgrade_config +from llama_stack.core.datatypes import ( BuildConfig, BuildProvider, DistributionSpec, Provider, StackRunConfig, ) -from llama_stack.distribution.distribution import get_provider_registry -from llama_stack.distribution.external import load_external_apis -from llama_stack.distribution.resolver import InvalidProviderError -from llama_stack.distribution.stack import replace_env_vars -from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR -from llama_stack.distribution.utils.dynamic import instantiate_class_type -from llama_stack.distribution.utils.exec import formulate_run_args, run_command -from llama_stack.distribution.utils.image_types import LlamaStackImageType +from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.external import load_external_apis +from llama_stack.core.resolver import InvalidProviderError +from llama_stack.core.stack import replace_env_vars +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR, EXTERNAL_PROVIDERS_DIR +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.core.utils.exec import formulate_run_args, run_command +from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.datatypes import Api TEMPLATES_PATH = Path(__file__).parent.parent.parent / "templates" diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index 2c402beeb..279806bdf 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -27,7 +27,7 @@ class StackBuild(Subcommand): "--config", type=str, default=None, - help="Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively", + help="Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will be prompted to enter information interactively", ) self.parser.add_argument( diff --git a/llama_stack/cli/stack/list_apis.py b/llama_stack/cli/stack/list_apis.py index cac803f92..6eed5ca51 100644 --- a/llama_stack/cli/stack/list_apis.py +++ b/llama_stack/cli/stack/list_apis.py @@ -26,7 +26,7 @@ class StackListApis(Subcommand): def _run_apis_list_cmd(self, args: argparse.Namespace) -> None: from llama_stack.cli.table import print_table - from llama_stack.distribution.distribution import stack_apis + from llama_stack.core.distribution import stack_apis # eventually, this should query a registry at llama.meta.com/llamastack/distributions headers = [ diff --git a/llama_stack/cli/stack/list_providers.py b/llama_stack/cli/stack/list_providers.py index deebd937b..b78b3c31f 100644 --- a/llama_stack/cli/stack/list_providers.py +++ b/llama_stack/cli/stack/list_providers.py @@ -23,7 +23,7 @@ class StackListProviders(Subcommand): @property def providable_apis(self): - from llama_stack.distribution.distribution import providable_apis + from llama_stack.core.distribution import providable_apis return [api.value for api in providable_apis()] @@ -38,7 +38,7 @@ class StackListProviders(Subcommand): def _run_providers_list_cmd(self, args: argparse.Namespace) -> None: from llama_stack.cli.table import print_table - from llama_stack.distribution.distribution import Api, get_provider_registry + from llama_stack.core.distribution import Api, get_provider_registry all_providers = get_provider_registry() if args.api: diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 3cb2e213c..9a366a8b2 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -85,8 +85,8 @@ class StackRun(Subcommand): def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: import yaml - from llama_stack.distribution.configure import parse_and_maybe_upgrade_config - from llama_stack.distribution.utils.exec import formulate_run_args, run_command + from llama_stack.core.configure import parse_and_maybe_upgrade_config + from llama_stack.core.utils.exec import formulate_run_args, run_command if args.enable_ui: self._start_ui_development_server(args.port) @@ -94,7 +94,7 @@ class StackRun(Subcommand): if args.config: try: - from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template + from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template config_file = resolve_config_or_template(args.config, Mode.RUN) except ValueError as e: @@ -127,7 +127,7 @@ class StackRun(Subcommand): # using the current environment packages. if not image_type and not image_name: logger.info("No image type or image name provided. Assuming environment packages.") - from llama_stack.distribution.server.server import main as server_main + from llama_stack.core.server.server import main as server_main # Build the server args from the current args passed to the CLI server_args = argparse.Namespace() diff --git a/llama_stack/cli/verify_download.py b/llama_stack/cli/verify_download.py index 3a1af3cbc..b7f4cfdb5 100644 --- a/llama_stack/cli/verify_download.py +++ b/llama_stack/cli/verify_download.py @@ -107,7 +107,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) - def run_verify_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser): - from llama_stack.distribution.utils.model_utils import model_local_dir + from llama_stack.core.utils.model_utils import model_local_dir console = Console() model_dir = Path(model_local_dir(args.model_id)) diff --git a/llama_stack/distribution/__init__.py b/llama_stack/core/__init__.py similarity index 100% rename from llama_stack/distribution/__init__.py rename to llama_stack/core/__init__.py diff --git a/llama_stack/distribution/access_control/__init__.py b/llama_stack/core/access_control/__init__.py similarity index 100% rename from llama_stack/distribution/access_control/__init__.py rename to llama_stack/core/access_control/__init__.py diff --git a/llama_stack/distribution/access_control/access_control.py b/llama_stack/core/access_control/access_control.py similarity index 98% rename from llama_stack/distribution/access_control/access_control.py rename to llama_stack/core/access_control/access_control.py index 64c0122c1..bde5cfd76 100644 --- a/llama_stack/distribution/access_control/access_control.py +++ b/llama_stack/core/access_control/access_control.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.distribution.datatypes import User +from llama_stack.core.datatypes import User from .conditions import ( Condition, diff --git a/llama_stack/distribution/access_control/conditions.py b/llama_stack/core/access_control/conditions.py similarity index 100% rename from llama_stack/distribution/access_control/conditions.py rename to llama_stack/core/access_control/conditions.py diff --git a/llama_stack/distribution/access_control/datatypes.py b/llama_stack/core/access_control/datatypes.py similarity index 100% rename from llama_stack/distribution/access_control/datatypes.py rename to llama_stack/core/access_control/datatypes.py diff --git a/llama_stack/distribution/build.py b/llama_stack/core/build.py similarity index 93% rename from llama_stack/distribution/build.py rename to llama_stack/core/build.py index b4eaac1c7..a6544435f 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/core/build.py @@ -12,11 +12,11 @@ from pathlib import Path from pydantic import BaseModel from termcolor import cprint -from llama_stack.distribution.datatypes import BuildConfig -from llama_stack.distribution.distribution import get_provider_registry -from llama_stack.distribution.external import load_external_apis -from llama_stack.distribution.utils.exec import run_command -from llama_stack.distribution.utils.image_types import LlamaStackImageType +from llama_stack.core.datatypes import BuildConfig +from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.external import load_external_apis +from llama_stack.core.utils.exec import run_command +from llama_stack.core.utils.image_types import LlamaStackImageType from llama_stack.providers.datatypes import Api from llama_stack.templates.template import DistributionTemplate @@ -122,7 +122,7 @@ def build_image( normal_deps.extend(api_spec.pip_packages) if build_config.image_type == LlamaStackImageType.CONTAINER.value: - script = str(importlib.resources.files("llama_stack") / "distribution/build_container.sh") + script = str(importlib.resources.files("llama_stack") / "core/build_container.sh") args = [ script, "--template-or-config", @@ -139,7 +139,7 @@ def build_image( if run_config is not None: args.extend(["--run-config", run_config]) elif build_config.image_type == LlamaStackImageType.CONDA.value: - script = str(importlib.resources.files("llama_stack") / "distribution/build_conda_env.sh") + script = str(importlib.resources.files("llama_stack") / "core/build_conda_env.sh") args = [ script, "--env-name", @@ -150,7 +150,7 @@ def build_image( " ".join(normal_deps), ] elif build_config.image_type == LlamaStackImageType.VENV.value: - script = str(importlib.resources.files("llama_stack") / "distribution/build_venv.sh") + script = str(importlib.resources.files("llama_stack") / "core/build_venv.sh") args = [ script, "--env-name", diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/core/build_conda_env.sh similarity index 100% rename from llama_stack/distribution/build_conda_env.sh rename to llama_stack/core/build_conda_env.sh diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/core/build_container.sh similarity index 98% rename from llama_stack/distribution/build_container.sh rename to llama_stack/core/build_container.sh index 50d8e4925..1376aaa28 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/core/build_container.sh @@ -327,12 +327,12 @@ EOF # If a run config is provided, we use the --config flag if [[ -n "$run_config" ]]; then add_to_container << EOF -ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--config", "$RUN_CONFIG_PATH"] +ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--config", "$RUN_CONFIG_PATH"] EOF # If a template is provided (not a yaml file), we use the --template flag elif [[ "$template_or_config" != *.yaml ]]; then add_to_container << EOF -ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "$template_or_config"] +ENTRYPOINT ["python", "-m", "llama_stack.core.server.server", "--template", "$template_or_config"] EOF fi diff --git a/llama_stack/distribution/build_venv.sh b/llama_stack/core/build_venv.sh similarity index 100% rename from llama_stack/distribution/build_venv.sh rename to llama_stack/core/build_venv.sh diff --git a/llama_stack/distribution/client.py b/llama_stack/core/client.py similarity index 100% rename from llama_stack/distribution/client.py rename to llama_stack/core/client.py diff --git a/llama_stack/distribution/common.sh b/llama_stack/core/common.sh similarity index 100% rename from llama_stack/distribution/common.sh rename to llama_stack/core/common.sh diff --git a/llama_stack/distribution/configure.py b/llama_stack/core/configure.py similarity index 93% rename from llama_stack/distribution/configure.py rename to llama_stack/core/configure.py index 20be040a0..9e18b438c 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/core/configure.py @@ -7,20 +7,20 @@ import logging import textwrap from typing import Any -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, DistributionSpec, Provider, StackRunConfig, ) -from llama_stack.distribution.distribution import ( +from llama_stack.core.distribution import ( builtin_automatically_routed_apis, get_provider_registry, ) -from llama_stack.distribution.stack import cast_image_name_to_string, replace_env_vars -from llama_stack.distribution.utils.config_dirs import EXTERNAL_PROVIDERS_DIR -from llama_stack.distribution.utils.dynamic import instantiate_class_type -from llama_stack.distribution.utils.prompt_for_config import prompt_for_config +from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars +from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR +from llama_stack.core.utils.dynamic import instantiate_class_type +from llama_stack.core.utils.prompt_for_config import prompt_for_config from llama_stack.providers.datatypes import Api, ProviderSpec logger = logging.getLogger(__name__) diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/core/datatypes.py similarity index 99% rename from llama_stack/distribution/datatypes.py rename to llama_stack/core/datatypes.py index 60c317337..c66d510cc 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -24,7 +24,7 @@ from llama_stack.apis.shields import Shield, ShieldInput from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_io import VectorIO -from llama_stack.distribution.access_control.datatypes import AccessRule +from llama_stack.core.access_control.datatypes import AccessRule from llama_stack.providers.datatypes import Api, ProviderSpec from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig from llama_stack.providers.utils.sqlstore.sqlstore import SqlStoreConfig diff --git a/llama_stack/distribution/distribution.py b/llama_stack/core/distribution.py similarity index 98% rename from llama_stack/distribution/distribution.py rename to llama_stack/core/distribution.py index 6e7297e32..977eb5393 100644 --- a/llama_stack/distribution/distribution.py +++ b/llama_stack/core/distribution.py @@ -12,8 +12,8 @@ from typing import Any import yaml from pydantic import BaseModel -from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec -from llama_stack.distribution.external import load_external_apis +from llama_stack.core.datatypes import BuildConfig, DistributionSpec +from llama_stack.core.external import load_external_apis from llama_stack.log import get_logger from llama_stack.providers.datatypes import ( AdapterSpec, diff --git a/llama_stack/distribution/external.py b/llama_stack/core/external.py similarity index 96% rename from llama_stack/distribution/external.py rename to llama_stack/core/external.py index 0a7da16b1..12e9824ad 100644 --- a/llama_stack/distribution/external.py +++ b/llama_stack/core/external.py @@ -8,7 +8,7 @@ import yaml from llama_stack.apis.datatypes import Api, ExternalApiSpec -from llama_stack.distribution.datatypes import BuildConfig, StackRunConfig +from llama_stack.core.datatypes import BuildConfig, StackRunConfig from llama_stack.log import get_logger logger = get_logger(name=__name__, category="core") diff --git a/llama_stack/distribution/inspect.py b/llama_stack/core/inspect.py similarity index 93% rename from llama_stack/distribution/inspect.py rename to llama_stack/core/inspect.py index f62de4f6b..37dab4199 100644 --- a/llama_stack/distribution/inspect.py +++ b/llama_stack/core/inspect.py @@ -15,9 +15,9 @@ from llama_stack.apis.inspect import ( RouteInfo, VersionInfo, ) -from llama_stack.distribution.datatypes import StackRunConfig -from llama_stack.distribution.external import load_external_apis -from llama_stack.distribution.server.routes import get_all_api_routes +from llama_stack.core.datatypes import StackRunConfig +from llama_stack.core.external import load_external_apis +from llama_stack.core.server.routes import get_all_api_routes from llama_stack.providers.datatypes import HealthStatus diff --git a/llama_stack/distribution/library_client.py b/llama_stack/core/library_client.py similarity index 96% rename from llama_stack/distribution/library_client.py rename to llama_stack/core/library_client.py index 43ee7f417..d5020d16b 100644 --- a/llama_stack/distribution/library_client.py +++ b/llama_stack/core/library_client.py @@ -31,23 +31,23 @@ from pydantic import BaseModel, TypeAdapter from rich.console import Console from termcolor import cprint -from llama_stack.distribution.build import print_pip_install_help -from llama_stack.distribution.configure import parse_and_maybe_upgrade_config -from llama_stack.distribution.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec -from llama_stack.distribution.request_headers import ( +from llama_stack.core.build import print_pip_install_help +from llama_stack.core.configure import parse_and_maybe_upgrade_config +from llama_stack.core.datatypes import Api, BuildConfig, BuildProvider, DistributionSpec +from llama_stack.core.request_headers import ( PROVIDER_DATA_VAR, request_provider_data_context, ) -from llama_stack.distribution.resolver import ProviderRegistry -from llama_stack.distribution.server.routes import RouteImpls, find_matching_route, initialize_route_impls -from llama_stack.distribution.stack import ( +from llama_stack.core.resolver import ProviderRegistry +from llama_stack.core.server.routes import RouteImpls, find_matching_route, initialize_route_impls +from llama_stack.core.stack import ( construct_stack, get_stack_run_config_from_template, replace_env_vars, ) -from llama_stack.distribution.utils.config import redact_sensitive_fields -from llama_stack.distribution.utils.context import preserve_contexts_async_generator -from llama_stack.distribution.utils.exec import in_notebook +from llama_stack.core.utils.config import redact_sensitive_fields +from llama_stack.core.utils.context import preserve_contexts_async_generator +from llama_stack.core.utils.exec import in_notebook from llama_stack.providers.utils.telemetry.tracing import ( CURRENT_TRACE_CONTEXT, end_trace, diff --git a/llama_stack/distribution/providers.py b/llama_stack/core/providers.py similarity index 100% rename from llama_stack/distribution/providers.py rename to llama_stack/core/providers.py diff --git a/llama_stack/distribution/request_headers.py b/llama_stack/core/request_headers.py similarity index 98% rename from llama_stack/distribution/request_headers.py rename to llama_stack/core/request_headers.py index 509c2be44..35ac72775 100644 --- a/llama_stack/distribution/request_headers.py +++ b/llama_stack/core/request_headers.py @@ -10,7 +10,7 @@ import logging from contextlib import AbstractContextManager from typing import Any -from llama_stack.distribution.datatypes import User +from llama_stack.core.datatypes import User from .utils.dynamic import instantiate_class_type diff --git a/llama_stack/distribution/resolver.py b/llama_stack/core/resolver.py similarity index 97% rename from llama_stack/distribution/resolver.py rename to llama_stack/core/resolver.py index db6856ed2..70c78fb01 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/core/resolver.py @@ -27,18 +27,18 @@ from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_dbs import VectorDBs from llama_stack.apis.vector_io import VectorIO -from llama_stack.distribution.client import get_client_impl -from llama_stack.distribution.datatypes import ( +from llama_stack.core.client import get_client_impl +from llama_stack.core.datatypes import ( AccessRule, AutoRoutedProviderSpec, Provider, RoutingTableProviderSpec, StackRunConfig, ) -from llama_stack.distribution.distribution import builtin_automatically_routed_apis -from llama_stack.distribution.external import load_external_apis -from llama_stack.distribution.store import DistributionRegistry -from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.core.distribution import builtin_automatically_routed_apis +from llama_stack.core.external import load_external_apis +from llama_stack.core.store import DistributionRegistry +from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger from llama_stack.providers.datatypes import ( Api, @@ -183,7 +183,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, spec=RoutingTableProviderSpec( api=info.routing_table_api, router_api=info.router_api, - module="llama_stack.distribution.routers", + module="llama_stack.core.routers", api_dependencies=[], deps__=[f"inner-{info.router_api.value}"], ), @@ -197,7 +197,7 @@ def specs_for_autorouted_apis(apis_to_serve: list[str] | set[str]) -> dict[str, config={}, spec=AutoRoutedProviderSpec( api=info.router_api, - module="llama_stack.distribution.routers", + module="llama_stack.core.routers", routing_table_api=info.routing_table_api, api_dependencies=[info.routing_table_api], # Add telemetry as an optional dependency to all auto-routed providers diff --git a/llama_stack/distribution/routers/__init__.py b/llama_stack/core/routers/__init__.py similarity index 94% rename from llama_stack/distribution/routers/__init__.py rename to llama_stack/core/routers/__init__.py index 8671a62e1..1faace34a 100644 --- a/llama_stack/distribution/routers/__init__.py +++ b/llama_stack/core/routers/__init__.py @@ -6,9 +6,9 @@ from typing import Any -from llama_stack.distribution.datatypes import AccessRule, RoutedProtocol -from llama_stack.distribution.stack import StackRunConfig -from llama_stack.distribution.store import DistributionRegistry +from llama_stack.core.datatypes import AccessRule, RoutedProtocol +from llama_stack.core.stack import StackRunConfig +from llama_stack.core.store import DistributionRegistry from llama_stack.providers.datatypes import Api, RoutingTable from llama_stack.providers.utils.inference.inference_store import InferenceStore diff --git a/llama_stack/distribution/routers/datasets.py b/llama_stack/core/routers/datasets.py similarity index 100% rename from llama_stack/distribution/routers/datasets.py rename to llama_stack/core/routers/datasets.py diff --git a/llama_stack/distribution/routers/eval_scoring.py b/llama_stack/core/routers/eval_scoring.py similarity index 100% rename from llama_stack/distribution/routers/eval_scoring.py rename to llama_stack/core/routers/eval_scoring.py diff --git a/llama_stack/distribution/routers/inference.py b/llama_stack/core/routers/inference.py similarity index 100% rename from llama_stack/distribution/routers/inference.py rename to llama_stack/core/routers/inference.py diff --git a/llama_stack/distribution/routers/safety.py b/llama_stack/core/routers/safety.py similarity index 100% rename from llama_stack/distribution/routers/safety.py rename to llama_stack/core/routers/safety.py diff --git a/llama_stack/distribution/routers/tool_runtime.py b/llama_stack/core/routers/tool_runtime.py similarity index 100% rename from llama_stack/distribution/routers/tool_runtime.py rename to llama_stack/core/routers/tool_runtime.py diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/core/routers/vector_io.py similarity index 100% rename from llama_stack/distribution/routers/vector_io.py rename to llama_stack/core/routers/vector_io.py diff --git a/llama_stack/distribution/routing_tables/__init__.py b/llama_stack/core/routing_tables/__init__.py similarity index 100% rename from llama_stack/distribution/routing_tables/__init__.py rename to llama_stack/core/routing_tables/__init__.py diff --git a/llama_stack/distribution/routing_tables/benchmarks.py b/llama_stack/core/routing_tables/benchmarks.py similarity index 97% rename from llama_stack/distribution/routing_tables/benchmarks.py rename to llama_stack/core/routing_tables/benchmarks.py index 815483494..74bee8040 100644 --- a/llama_stack/distribution/routing_tables/benchmarks.py +++ b/llama_stack/core/routing_tables/benchmarks.py @@ -7,7 +7,7 @@ from typing import Any from llama_stack.apis.benchmarks import Benchmark, Benchmarks, ListBenchmarksResponse -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( BenchmarkWithOwner, ) from llama_stack.log import get_logger diff --git a/llama_stack/distribution/routing_tables/common.py b/llama_stack/core/routing_tables/common.py similarity index 96% rename from llama_stack/distribution/routing_tables/common.py rename to llama_stack/core/routing_tables/common.py index a759ea8dd..4be3de42d 100644 --- a/llama_stack/distribution/routing_tables/common.py +++ b/llama_stack/core/routing_tables/common.py @@ -10,16 +10,16 @@ from llama_stack.apis.common.errors import ModelNotFoundError from llama_stack.apis.models import Model from llama_stack.apis.resource import ResourceType from llama_stack.apis.scoring_functions import ScoringFn -from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed -from llama_stack.distribution.access_control.datatypes import Action -from llama_stack.distribution.datatypes import ( +from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed +from llama_stack.core.access_control.datatypes import Action +from llama_stack.core.datatypes import ( AccessRule, RoutableObject, RoutableObjectWithProvider, RoutedProtocol, ) -from llama_stack.distribution.request_headers import get_authenticated_user -from llama_stack.distribution.store import DistributionRegistry +from llama_stack.core.request_headers import get_authenticated_user +from llama_stack.core.store import DistributionRegistry from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api, RoutingTable diff --git a/llama_stack/distribution/routing_tables/datasets.py b/llama_stack/core/routing_tables/datasets.py similarity index 98% rename from llama_stack/distribution/routing_tables/datasets.py rename to llama_stack/core/routing_tables/datasets.py index 89da7d081..fc6a75df4 100644 --- a/llama_stack/distribution/routing_tables/datasets.py +++ b/llama_stack/core/routing_tables/datasets.py @@ -19,7 +19,7 @@ from llama_stack.apis.datasets import ( URIDataSource, ) from llama_stack.apis.resource import ResourceType -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( DatasetWithOwner, ) from llama_stack.log import get_logger diff --git a/llama_stack/distribution/routing_tables/models.py b/llama_stack/core/routing_tables/models.py similarity index 99% rename from llama_stack/distribution/routing_tables/models.py rename to llama_stack/core/routing_tables/models.py index ae1fe2882..c76619271 100644 --- a/llama_stack/distribution/routing_tables/models.py +++ b/llama_stack/core/routing_tables/models.py @@ -9,7 +9,7 @@ from typing import Any from llama_stack.apis.common.errors import ModelNotFoundError from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType, OpenAIListModelsResponse, OpenAIModel -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( ModelWithOwner, RegistryEntrySource, ) diff --git a/llama_stack/distribution/routing_tables/scoring_functions.py b/llama_stack/core/routing_tables/scoring_functions.py similarity index 97% rename from llama_stack/distribution/routing_tables/scoring_functions.py rename to llama_stack/core/routing_tables/scoring_functions.py index 742cc3ca6..5874ba941 100644 --- a/llama_stack/distribution/routing_tables/scoring_functions.py +++ b/llama_stack/core/routing_tables/scoring_functions.py @@ -12,7 +12,7 @@ from llama_stack.apis.scoring_functions import ( ScoringFnParams, ScoringFunctions, ) -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( ScoringFnWithOwner, ) from llama_stack.log import get_logger diff --git a/llama_stack/distribution/routing_tables/shields.py b/llama_stack/core/routing_tables/shields.py similarity index 97% rename from llama_stack/distribution/routing_tables/shields.py rename to llama_stack/core/routing_tables/shields.py index 5215981b9..0c592601a 100644 --- a/llama_stack/distribution/routing_tables/shields.py +++ b/llama_stack/core/routing_tables/shields.py @@ -8,7 +8,7 @@ from typing import Any from llama_stack.apis.resource import ResourceType from llama_stack.apis.shields import ListShieldsResponse, Shield, Shields -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( ShieldWithOwner, ) from llama_stack.log import get_logger diff --git a/llama_stack/distribution/routing_tables/toolgroups.py b/llama_stack/core/routing_tables/toolgroups.py similarity index 98% rename from llama_stack/distribution/routing_tables/toolgroups.py rename to llama_stack/core/routing_tables/toolgroups.py index 22c4e109a..a6d15796a 100644 --- a/llama_stack/distribution/routing_tables/toolgroups.py +++ b/llama_stack/core/routing_tables/toolgroups.py @@ -8,7 +8,7 @@ from typing import Any from llama_stack.apis.common.content_types import URL from llama_stack.apis.tools import ListToolGroupsResponse, ListToolsResponse, Tool, ToolGroup, ToolGroups -from llama_stack.distribution.datatypes import ToolGroupWithOwner +from llama_stack.core.datatypes import ToolGroupWithOwner from llama_stack.log import get_logger from .common import CommonRoutingTableImpl diff --git a/llama_stack/distribution/routing_tables/vector_dbs.py b/llama_stack/core/routing_tables/vector_dbs.py similarity index 99% rename from llama_stack/distribution/routing_tables/vector_dbs.py rename to llama_stack/core/routing_tables/vector_dbs.py index aa61ea2fd..5ea4e9854 100644 --- a/llama_stack/distribution/routing_tables/vector_dbs.py +++ b/llama_stack/core/routing_tables/vector_dbs.py @@ -23,7 +23,7 @@ from llama_stack.apis.vector_io.vector_io import ( VectorStoreObject, VectorStoreSearchResponsePage, ) -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( VectorDBWithOwner, ) from llama_stack.log import get_logger diff --git a/llama_stack/distribution/server/__init__.py b/llama_stack/core/server/__init__.py similarity index 100% rename from llama_stack/distribution/server/__init__.py rename to llama_stack/core/server/__init__.py diff --git a/llama_stack/distribution/server/auth.py b/llama_stack/core/server/auth.py similarity index 95% rename from llama_stack/distribution/server/auth.py rename to llama_stack/core/server/auth.py index 87c1a2ab6..e4fb4ff2b 100644 --- a/llama_stack/distribution/server/auth.py +++ b/llama_stack/core/server/auth.py @@ -9,10 +9,10 @@ import json import httpx from aiohttp import hdrs -from llama_stack.distribution.datatypes import AuthenticationConfig, User -from llama_stack.distribution.request_headers import user_from_scope -from llama_stack.distribution.server.auth_providers import create_auth_provider -from llama_stack.distribution.server.routes import find_matching_route, initialize_route_impls +from llama_stack.core.datatypes import AuthenticationConfig, User +from llama_stack.core.request_headers import user_from_scope +from llama_stack.core.server.auth_providers import create_auth_provider +from llama_stack.core.server.routes import find_matching_route, initialize_route_impls from llama_stack.log import get_logger logger = get_logger(name=__name__, category="auth") diff --git a/llama_stack/distribution/server/auth_providers.py b/llama_stack/core/server/auth_providers.py similarity index 99% rename from llama_stack/distribution/server/auth_providers.py rename to llama_stack/core/server/auth_providers.py index 9b0e182f5..73d5581c2 100644 --- a/llama_stack/distribution/server/auth_providers.py +++ b/llama_stack/core/server/auth_providers.py @@ -14,7 +14,7 @@ import httpx from jose import jwt from pydantic import BaseModel, Field -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( AuthenticationConfig, CustomAuthConfig, GitHubTokenAuthConfig, diff --git a/llama_stack/distribution/server/quota.py b/llama_stack/core/server/quota.py similarity index 100% rename from llama_stack/distribution/server/quota.py rename to llama_stack/core/server/quota.py diff --git a/llama_stack/distribution/server/routes.py b/llama_stack/core/server/routes.py similarity index 98% rename from llama_stack/distribution/server/routes.py rename to llama_stack/core/server/routes.py index ca6f629af..7baf20da5 100644 --- a/llama_stack/distribution/server/routes.py +++ b/llama_stack/core/server/routes.py @@ -15,7 +15,7 @@ from starlette.routing import Route from llama_stack.apis.datatypes import Api, ExternalApiSpec from llama_stack.apis.tools import RAGToolRuntime, SpecialToolGroup from llama_stack.apis.version import LLAMA_STACK_API_VERSION -from llama_stack.distribution.resolver import api_protocol_map +from llama_stack.core.resolver import api_protocol_map from llama_stack.schema_utils import WebMethod EndpointFunc = Callable[..., Any] diff --git a/llama_stack/distribution/server/server.py b/llama_stack/core/server/server.py similarity index 96% rename from llama_stack/distribution/server/server.py rename to llama_stack/core/server/server.py index 96a0d60e7..481c6c321 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/core/server/server.py @@ -33,35 +33,35 @@ from pydantic import BaseModel, ValidationError from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.cli.utils import add_config_template_args, get_config_from_args -from llama_stack.distribution.access_control.access_control import AccessDeniedError -from llama_stack.distribution.datatypes import ( +from llama_stack.core.access_control.access_control import AccessDeniedError +from llama_stack.core.datatypes import ( AuthenticationRequiredError, LoggingConfig, StackRunConfig, ) -from llama_stack.distribution.distribution import builtin_automatically_routed_apis -from llama_stack.distribution.external import ExternalApiSpec, load_external_apis -from llama_stack.distribution.request_headers import ( +from llama_stack.core.distribution import builtin_automatically_routed_apis +from llama_stack.core.external import ExternalApiSpec, load_external_apis +from llama_stack.core.request_headers import ( PROVIDER_DATA_VAR, request_provider_data_context, user_from_scope, ) -from llama_stack.distribution.resolver import InvalidProviderError -from llama_stack.distribution.server.routes import ( +from llama_stack.core.resolver import InvalidProviderError +from llama_stack.core.server.routes import ( find_matching_route, get_all_api_routes, initialize_route_impls, ) -from llama_stack.distribution.stack import ( +from llama_stack.core.stack import ( cast_image_name_to_string, construct_stack, replace_env_vars, shutdown_stack, validate_env_pair, ) -from llama_stack.distribution.utils.config import redact_sensitive_fields -from llama_stack.distribution.utils.config_resolution import Mode, resolve_config_or_template -from llama_stack.distribution.utils.context import preserve_contexts_async_generator +from llama_stack.core.utils.config import redact_sensitive_fields +from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_template +from llama_stack.core.utils.context import preserve_contexts_async_generator from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api from llama_stack.providers.inline.telemetry.meta_reference.config import TelemetryConfig diff --git a/llama_stack/distribution/stack.py b/llama_stack/core/stack.py similarity index 96% rename from llama_stack/distribution/stack.py rename to llama_stack/core/stack.py index 1dbcbb7fa..c7fe67503 100644 --- a/llama_stack/distribution/stack.py +++ b/llama_stack/core/stack.py @@ -34,14 +34,14 @@ from llama_stack.apis.telemetry import Telemetry from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime from llama_stack.apis.vector_dbs import VectorDBs from llama_stack.apis.vector_io import VectorIO -from llama_stack.distribution.datatypes import Provider, StackRunConfig -from llama_stack.distribution.distribution import get_provider_registry -from llama_stack.distribution.inspect import DistributionInspectConfig, DistributionInspectImpl -from llama_stack.distribution.providers import ProviderImpl, ProviderImplConfig -from llama_stack.distribution.resolver import ProviderRegistry, resolve_impls -from llama_stack.distribution.routing_tables.common import CommonRoutingTableImpl -from llama_stack.distribution.store.registry import create_dist_registry -from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.core.datatypes import Provider, StackRunConfig +from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.inspect import DistributionInspectConfig, DistributionInspectImpl +from llama_stack.core.providers import ProviderImpl, ProviderImplConfig +from llama_stack.core.resolver import ProviderRegistry, resolve_impls +from llama_stack.core.routing_tables.common import CommonRoutingTableImpl +from llama_stack.core.store.registry import create_dist_registry +from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api diff --git a/llama_stack/distribution/start_stack.sh b/llama_stack/core/start_stack.sh similarity index 98% rename from llama_stack/distribution/start_stack.sh rename to llama_stack/core/start_stack.sh index 77a7dc92e..ff8275aeb 100755 --- a/llama_stack/distribution/start_stack.sh +++ b/llama_stack/core/start_stack.sh @@ -122,7 +122,7 @@ if [[ "$env_type" == "venv" || "$env_type" == "conda" ]]; then yaml_config_arg="" fi - $PYTHON_BINARY -m llama_stack.distribution.server.server \ + $PYTHON_BINARY -m llama_stack.core.server.server \ $yaml_config_arg \ --port "$port" \ $env_vars \ diff --git a/llama_stack/distribution/store/__init__.py b/llama_stack/core/store/__init__.py similarity index 100% rename from llama_stack/distribution/store/__init__.py rename to llama_stack/core/store/__init__.py diff --git a/llama_stack/distribution/store/registry.py b/llama_stack/core/store/registry.py similarity index 98% rename from llama_stack/distribution/store/registry.py rename to llama_stack/core/store/registry.py index cd7cd9f00..4b60e1001 100644 --- a/llama_stack/distribution/store/registry.py +++ b/llama_stack/core/store/registry.py @@ -10,8 +10,8 @@ from typing import Protocol import pydantic -from llama_stack.distribution.datatypes import RoutableObjectWithProvider -from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.datatypes import RoutableObjectWithProvider +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.log import get_logger from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig diff --git a/llama_stack/distribution/ui/Containerfile b/llama_stack/core/ui/Containerfile similarity index 100% rename from llama_stack/distribution/ui/Containerfile rename to llama_stack/core/ui/Containerfile diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/core/ui/README.md similarity index 96% rename from llama_stack/distribution/ui/README.md rename to llama_stack/core/ui/README.md index 51c2d2bc2..5f4a9e250 100644 --- a/llama_stack/distribution/ui/README.md +++ b/llama_stack/core/ui/README.md @@ -36,7 +36,7 @@ llama-stack-client benchmarks register \ 3. Start Streamlit UI ```bash -uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py +uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py ``` ## Environment Variables diff --git a/llama_stack/distribution/ui/__init__.py b/llama_stack/core/ui/__init__.py similarity index 100% rename from llama_stack/distribution/ui/__init__.py rename to llama_stack/core/ui/__init__.py diff --git a/llama_stack/distribution/ui/app.py b/llama_stack/core/ui/app.py similarity index 100% rename from llama_stack/distribution/ui/app.py rename to llama_stack/core/ui/app.py diff --git a/llama_stack/distribution/ui/modules/__init__.py b/llama_stack/core/ui/modules/__init__.py similarity index 100% rename from llama_stack/distribution/ui/modules/__init__.py rename to llama_stack/core/ui/modules/__init__.py diff --git a/llama_stack/distribution/ui/modules/api.py b/llama_stack/core/ui/modules/api.py similarity index 100% rename from llama_stack/distribution/ui/modules/api.py rename to llama_stack/core/ui/modules/api.py diff --git a/llama_stack/distribution/ui/modules/utils.py b/llama_stack/core/ui/modules/utils.py similarity index 100% rename from llama_stack/distribution/ui/modules/utils.py rename to llama_stack/core/ui/modules/utils.py diff --git a/llama_stack/distribution/ui/page/__init__.py b/llama_stack/core/ui/page/__init__.py similarity index 100% rename from llama_stack/distribution/ui/page/__init__.py rename to llama_stack/core/ui/page/__init__.py diff --git a/llama_stack/distribution/ui/page/distribution/__init__.py b/llama_stack/core/ui/page/distribution/__init__.py similarity index 100% rename from llama_stack/distribution/ui/page/distribution/__init__.py rename to llama_stack/core/ui/page/distribution/__init__.py diff --git a/llama_stack/distribution/ui/page/distribution/datasets.py b/llama_stack/core/ui/page/distribution/datasets.py similarity index 88% rename from llama_stack/distribution/ui/page/distribution/datasets.py rename to llama_stack/core/ui/page/distribution/datasets.py index 6842b29a7..aab0901ac 100644 --- a/llama_stack/distribution/ui/page/distribution/datasets.py +++ b/llama_stack/core/ui/page/distribution/datasets.py @@ -6,7 +6,7 @@ import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api def datasets(): diff --git a/llama_stack/distribution/ui/page/distribution/eval_tasks.py b/llama_stack/core/ui/page/distribution/eval_tasks.py similarity index 90% rename from llama_stack/distribution/ui/page/distribution/eval_tasks.py rename to llama_stack/core/ui/page/distribution/eval_tasks.py index 492be4700..1a0ce502b 100644 --- a/llama_stack/distribution/ui/page/distribution/eval_tasks.py +++ b/llama_stack/core/ui/page/distribution/eval_tasks.py @@ -6,7 +6,7 @@ import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api def benchmarks(): diff --git a/llama_stack/distribution/ui/page/distribution/models.py b/llama_stack/core/ui/page/distribution/models.py similarity index 87% rename from llama_stack/distribution/ui/page/distribution/models.py rename to llama_stack/core/ui/page/distribution/models.py index f29459098..f84508746 100644 --- a/llama_stack/distribution/ui/page/distribution/models.py +++ b/llama_stack/core/ui/page/distribution/models.py @@ -6,7 +6,7 @@ import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api def models(): diff --git a/llama_stack/distribution/ui/page/distribution/providers.py b/llama_stack/core/ui/page/distribution/providers.py similarity index 91% rename from llama_stack/distribution/ui/page/distribution/providers.py rename to llama_stack/core/ui/page/distribution/providers.py index c660cb986..3ec6026d1 100644 --- a/llama_stack/distribution/ui/page/distribution/providers.py +++ b/llama_stack/core/ui/page/distribution/providers.py @@ -6,7 +6,7 @@ import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api def providers(): diff --git a/llama_stack/distribution/ui/page/distribution/resources.py b/llama_stack/core/ui/page/distribution/resources.py similarity index 70% rename from llama_stack/distribution/ui/page/distribution/resources.py rename to llama_stack/core/ui/page/distribution/resources.py index 5e10e6e80..c56fcfff3 100644 --- a/llama_stack/distribution/ui/page/distribution/resources.py +++ b/llama_stack/core/ui/page/distribution/resources.py @@ -6,12 +6,12 @@ from streamlit_option_menu import option_menu -from llama_stack.distribution.ui.page.distribution.datasets import datasets -from llama_stack.distribution.ui.page.distribution.eval_tasks import benchmarks -from llama_stack.distribution.ui.page.distribution.models import models -from llama_stack.distribution.ui.page.distribution.scoring_functions import scoring_functions -from llama_stack.distribution.ui.page.distribution.shields import shields -from llama_stack.distribution.ui.page.distribution.vector_dbs import vector_dbs +from llama_stack.core.ui.page.distribution.datasets import datasets +from llama_stack.core.ui.page.distribution.eval_tasks import benchmarks +from llama_stack.core.ui.page.distribution.models import models +from llama_stack.core.ui.page.distribution.scoring_functions import scoring_functions +from llama_stack.core.ui.page.distribution.shields import shields +from llama_stack.core.ui.page.distribution.vector_dbs import vector_dbs def resources_page(): diff --git a/llama_stack/distribution/ui/page/distribution/scoring_functions.py b/llama_stack/core/ui/page/distribution/scoring_functions.py similarity index 89% rename from llama_stack/distribution/ui/page/distribution/scoring_functions.py rename to llama_stack/core/ui/page/distribution/scoring_functions.py index 193146356..2a5196fa9 100644 --- a/llama_stack/distribution/ui/page/distribution/scoring_functions.py +++ b/llama_stack/core/ui/page/distribution/scoring_functions.py @@ -6,7 +6,7 @@ import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api def scoring_functions(): diff --git a/llama_stack/distribution/ui/page/distribution/shields.py b/llama_stack/core/ui/page/distribution/shields.py similarity index 88% rename from llama_stack/distribution/ui/page/distribution/shields.py rename to llama_stack/core/ui/page/distribution/shields.py index 67d66d64f..ecce2f12b 100644 --- a/llama_stack/distribution/ui/page/distribution/shields.py +++ b/llama_stack/core/ui/page/distribution/shields.py @@ -6,7 +6,7 @@ import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api def shields(): diff --git a/llama_stack/distribution/ui/page/distribution/vector_dbs.py b/llama_stack/core/ui/page/distribution/vector_dbs.py similarity index 90% rename from llama_stack/distribution/ui/page/distribution/vector_dbs.py rename to llama_stack/core/ui/page/distribution/vector_dbs.py index 49a4f25bb..e81077d2a 100644 --- a/llama_stack/distribution/ui/page/distribution/vector_dbs.py +++ b/llama_stack/core/ui/page/distribution/vector_dbs.py @@ -6,7 +6,7 @@ import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api def vector_dbs(): diff --git a/llama_stack/distribution/ui/page/evaluations/__init__.py b/llama_stack/core/ui/page/evaluations/__init__.py similarity index 100% rename from llama_stack/distribution/ui/page/evaluations/__init__.py rename to llama_stack/core/ui/page/evaluations/__init__.py diff --git a/llama_stack/distribution/ui/page/evaluations/app_eval.py b/llama_stack/core/ui/page/evaluations/app_eval.py similarity index 97% rename from llama_stack/distribution/ui/page/evaluations/app_eval.py rename to llama_stack/core/ui/page/evaluations/app_eval.py index d7bc6388c..07e6349c9 100644 --- a/llama_stack/distribution/ui/page/evaluations/app_eval.py +++ b/llama_stack/core/ui/page/evaluations/app_eval.py @@ -9,8 +9,8 @@ import json import pandas as pd import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api -from llama_stack.distribution.ui.modules.utils import process_dataset +from llama_stack.core.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.utils import process_dataset def application_evaluation_page(): diff --git a/llama_stack/distribution/ui/page/evaluations/native_eval.py b/llama_stack/core/ui/page/evaluations/native_eval.py similarity index 99% rename from llama_stack/distribution/ui/page/evaluations/native_eval.py rename to llama_stack/core/ui/page/evaluations/native_eval.py index 97f875e17..2bef63b2f 100644 --- a/llama_stack/distribution/ui/page/evaluations/native_eval.py +++ b/llama_stack/core/ui/page/evaluations/native_eval.py @@ -9,7 +9,7 @@ import json import pandas as pd import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api def select_benchmark_1(): diff --git a/llama_stack/distribution/ui/page/playground/__init__.py b/llama_stack/core/ui/page/playground/__init__.py similarity index 100% rename from llama_stack/distribution/ui/page/playground/__init__.py rename to llama_stack/core/ui/page/playground/__init__.py diff --git a/llama_stack/distribution/ui/page/playground/chat.py b/llama_stack/core/ui/page/playground/chat.py similarity index 98% rename from llama_stack/distribution/ui/page/playground/chat.py rename to llama_stack/core/ui/page/playground/chat.py index fcaf08795..d391d0fb7 100644 --- a/llama_stack/distribution/ui/page/playground/chat.py +++ b/llama_stack/core/ui/page/playground/chat.py @@ -6,7 +6,7 @@ import streamlit as st -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api # Sidebar configurations with st.sidebar: diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/core/ui/page/playground/rag.py similarity index 98% rename from llama_stack/distribution/ui/page/playground/rag.py rename to llama_stack/core/ui/page/playground/rag.py index 696d89bc2..2ffae1c33 100644 --- a/llama_stack/distribution/ui/page/playground/rag.py +++ b/llama_stack/core/ui/page/playground/rag.py @@ -10,8 +10,8 @@ import streamlit as st from llama_stack_client import Agent, AgentEventLogger, RAGDocument from llama_stack.apis.common.content_types import ToolCallDelta -from llama_stack.distribution.ui.modules.api import llama_stack_api -from llama_stack.distribution.ui.modules.utils import data_url_from_file +from llama_stack.core.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.utils import data_url_from_file def rag_chat_page(): diff --git a/llama_stack/distribution/ui/page/playground/tools.py b/llama_stack/core/ui/page/playground/tools.py similarity index 99% rename from llama_stack/distribution/ui/page/playground/tools.py rename to llama_stack/core/ui/page/playground/tools.py index 149d8cce9..602c9eea1 100644 --- a/llama_stack/distribution/ui/page/playground/tools.py +++ b/llama_stack/core/ui/page/playground/tools.py @@ -13,7 +13,7 @@ from llama_stack_client import Agent from llama_stack_client.lib.agents.react.agent import ReActAgent from llama_stack_client.lib.agents.react.tool_parser import ReActOutput -from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.core.ui.modules.api import llama_stack_api class AgentType(enum.Enum): diff --git a/llama_stack/distribution/ui/requirements.txt b/llama_stack/core/ui/requirements.txt similarity index 100% rename from llama_stack/distribution/ui/requirements.txt rename to llama_stack/core/ui/requirements.txt diff --git a/llama_stack/distribution/utils/__init__.py b/llama_stack/core/utils/__init__.py similarity index 100% rename from llama_stack/distribution/utils/__init__.py rename to llama_stack/core/utils/__init__.py diff --git a/llama_stack/distribution/utils/config.py b/llama_stack/core/utils/config.py similarity index 100% rename from llama_stack/distribution/utils/config.py rename to llama_stack/core/utils/config.py diff --git a/llama_stack/distribution/utils/config_dirs.py b/llama_stack/core/utils/config_dirs.py similarity index 100% rename from llama_stack/distribution/utils/config_dirs.py rename to llama_stack/core/utils/config_dirs.py diff --git a/llama_stack/distribution/utils/config_resolution.py b/llama_stack/core/utils/config_resolution.py similarity index 96% rename from llama_stack/distribution/utils/config_resolution.py rename to llama_stack/core/utils/config_resolution.py index 7e8de1242..445050944 100644 --- a/llama_stack/distribution/utils/config_resolution.py +++ b/llama_stack/core/utils/config_resolution.py @@ -7,7 +7,7 @@ from enum import StrEnum from pathlib import Path -from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR +from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.log import get_logger logger = get_logger(name=__name__, category="config_resolution") @@ -74,7 +74,7 @@ def _get_template_config_path(template_name: str, mode: Mode) -> Path: def _format_resolution_error(config_or_template: str, mode: Mode) -> str: """Format a helpful error message for resolution failures.""" - from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR + from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR template_path = _get_template_config_path(config_or_template, mode) distrib_path = DISTRIBS_BASE_DIR / f"llamastack-{config_or_template}" / f"{config_or_template}-{mode}.yaml" diff --git a/llama_stack/distribution/utils/context.py b/llama_stack/core/utils/context.py similarity index 100% rename from llama_stack/distribution/utils/context.py rename to llama_stack/core/utils/context.py diff --git a/llama_stack/distribution/utils/dynamic.py b/llama_stack/core/utils/dynamic.py similarity index 100% rename from llama_stack/distribution/utils/dynamic.py rename to llama_stack/core/utils/dynamic.py diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/core/utils/exec.py similarity index 96% rename from llama_stack/distribution/utils/exec.py rename to llama_stack/core/utils/exec.py index c646ae821..4272db3b8 100644 --- a/llama_stack/distribution/utils/exec.py +++ b/llama_stack/core/utils/exec.py @@ -18,7 +18,7 @@ import importlib import json from pathlib import Path -from llama_stack.distribution.utils.image_types import LlamaStackImageType +from llama_stack.core.utils.image_types import LlamaStackImageType def formulate_run_args(image_type: str, image_name: str) -> list[str]: @@ -79,7 +79,7 @@ def formulate_run_args(image_type: str, image_name: str) -> list[str]: return cprint(f"Using virtual environment: {env_name}", file=sys.stderr) - script = importlib.resources.files("llama_stack") / "distribution/start_stack.sh" + script = importlib.resources.files("llama_stack") / "core/start_stack.sh" run_args = [ script, image_type, diff --git a/llama_stack/distribution/utils/image_types.py b/llama_stack/core/utils/image_types.py similarity index 100% rename from llama_stack/distribution/utils/image_types.py rename to llama_stack/core/utils/image_types.py diff --git a/llama_stack/distribution/utils/model_utils.py b/llama_stack/core/utils/model_utils.py similarity index 100% rename from llama_stack/distribution/utils/model_utils.py rename to llama_stack/core/utils/model_utils.py diff --git a/llama_stack/distribution/utils/prompt_for_config.py b/llama_stack/core/utils/prompt_for_config.py similarity index 100% rename from llama_stack/distribution/utils/prompt_for_config.py rename to llama_stack/core/utils/prompt_for_config.py diff --git a/llama_stack/distribution/utils/serialize.py b/llama_stack/core/utils/serialize.py similarity index 100% rename from llama_stack/distribution/utils/serialize.py rename to llama_stack/core/utils/serialize.py diff --git a/llama_stack/log.py b/llama_stack/log.py index fb6fa85f9..ab53e08c0 100644 --- a/llama_stack/log.py +++ b/llama_stack/log.py @@ -15,7 +15,7 @@ from rich.errors import MarkupError from rich.logging import RichHandler from termcolor import cprint -from .distribution.datatypes import LoggingConfig +from llama_stack.core.datatypes import LoggingConfig # Default log level DEFAULT_LOG_LEVEL = logging.INFO diff --git a/llama_stack/providers/inline/agents/meta_reference/__init__.py b/llama_stack/providers/inline/agents/meta_reference/__init__.py index 4a77e65b9..334c32e15 100644 --- a/llama_stack/providers/inline/agents/meta_reference/__init__.py +++ b/llama_stack/providers/inline/agents/meta_reference/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.distribution.datatypes import AccessRule, Api +from llama_stack.core.datatypes import AccessRule, Api from .config import MetaReferenceAgentsImplConfig diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 3c34c71fb..fb22e10cc 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -61,7 +61,7 @@ from llama_stack.apis.inference import ( from llama_stack.apis.safety import Safety from llama_stack.apis.tools import ToolGroups, ToolInvocationResult, ToolRuntime from llama_stack.apis.vector_io import VectorIO -from llama_stack.distribution.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import ( BuiltinTool, diff --git a/llama_stack/providers/inline/agents/meta_reference/agents.py b/llama_stack/providers/inline/agents/meta_reference/agents.py index 4d0c429bd..b326a95b4 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agents.py +++ b/llama_stack/providers/inline/agents/meta_reference/agents.py @@ -41,7 +41,7 @@ from llama_stack.apis.inference import ( from llama_stack.apis.safety import Safety from llama_stack.apis.tools import ToolGroups, ToolRuntime from llama_stack.apis.vector_io import VectorIO -from llama_stack.distribution.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule from llama_stack.providers.utils.kvstore import InmemoryKVStoreImpl, kvstore_impl from llama_stack.providers.utils.pagination import paginate_records from llama_stack.providers.utils.responses.responses_store import ResponsesStore diff --git a/llama_stack/providers/inline/agents/meta_reference/persistence.py b/llama_stack/providers/inline/agents/meta_reference/persistence.py index 437d617ad..8d4713cd2 100644 --- a/llama_stack/providers/inline/agents/meta_reference/persistence.py +++ b/llama_stack/providers/inline/agents/meta_reference/persistence.py @@ -10,10 +10,10 @@ import uuid from datetime import UTC, datetime from llama_stack.apis.agents import AgentConfig, Session, ToolExecutionStep, Turn -from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed -from llama_stack.distribution.access_control.datatypes import AccessRule -from llama_stack.distribution.datatypes import User -from llama_stack.distribution.request_headers import get_authenticated_user +from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed +from llama_stack.core.access_control.datatypes import AccessRule +from llama_stack.core.datatypes import User +from llama_stack.core.request_headers import get_authenticated_user from llama_stack.providers.utils.kvstore import KVStore log = logging.getLogger(__name__) diff --git a/llama_stack/providers/inline/eval/meta_reference/__init__.py b/llama_stack/providers/inline/eval/meta_reference/__init__.py index 7afe7f33b..cf2578a72 100644 --- a/llama_stack/providers/inline/eval/meta_reference/__init__.py +++ b/llama_stack/providers/inline/eval/meta_reference/__init__.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from typing import Any -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from .config import MetaReferenceEvalConfig diff --git a/llama_stack/providers/inline/files/localfs/__init__.py b/llama_stack/providers/inline/files/localfs/__init__.py index 71664efad..363b6f04c 100644 --- a/llama_stack/providers/inline/files/localfs/__init__.py +++ b/llama_stack/providers/inline/files/localfs/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.distribution.datatypes import AccessRule, Api +from llama_stack.core.datatypes import AccessRule, Api from .config import LocalfsFilesImplConfig from .files import LocalfsFilesImpl diff --git a/llama_stack/providers/inline/files/localfs/files.py b/llama_stack/providers/inline/files/localfs/files.py index 433762c5a..1e9dca3b5 100644 --- a/llama_stack/providers/inline/files/localfs/files.py +++ b/llama_stack/providers/inline/files/localfs/files.py @@ -19,7 +19,7 @@ from llama_stack.apis.files import ( OpenAIFileObject, OpenAIFilePurpose, ) -from llama_stack.distribution.datatypes import AccessRule +from llama_stack.core.datatypes import AccessRule from llama_stack.providers.utils.sqlstore.api import ColumnDefinition, ColumnType from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore from llama_stack.providers.utils.sqlstore.sqlstore import sqlstore_impl diff --git a/llama_stack/providers/inline/inference/meta_reference/common.py b/llama_stack/providers/inline/inference/meta_reference/common.py index beb0d39d4..1e164430d 100644 --- a/llama_stack/providers/inline/inference/meta_reference/common.py +++ b/llama_stack/providers/inline/inference/meta_reference/common.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.distribution.utils.model_utils import model_local_dir +from llama_stack.core.utils.model_utils import model_local_dir def model_checkpoint_dir(model_id) -> str: diff --git a/llama_stack/providers/inline/post_training/huggingface/__init__.py b/llama_stack/providers/inline/post_training/huggingface/__init__.py index cc1a671c1..96c45cc4f 100644 --- a/llama_stack/providers/inline/post_training/huggingface/__init__.py +++ b/llama_stack/providers/inline/post_training/huggingface/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from .config import HuggingFacePostTrainingConfig diff --git a/llama_stack/providers/inline/post_training/torchtune/__init__.py b/llama_stack/providers/inline/post_training/torchtune/__init__.py index 7a2f9eba2..af4ebd92a 100644 --- a/llama_stack/providers/inline/post_training/torchtune/__init__.py +++ b/llama_stack/providers/inline/post_training/torchtune/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from .config import TorchtunePostTrainingConfig diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index fed19428c..49e1c95b8 100644 --- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -43,8 +43,8 @@ from llama_stack.apis.post_training import ( QATFinetuningConfig, TrainingConfig, ) -from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR -from llama_stack.distribution.utils.model_utils import model_local_dir +from llama_stack.core.utils.config_dirs import DEFAULT_CHECKPOINT_DIR +from llama_stack.core.utils.model_utils import model_local_dir from llama_stack.models.llama.sku_list import resolve_model from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from llama_stack.providers.inline.post_training.torchtune.common import utils diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py index 0d1c4ffe1..c580adfad 100644 --- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py +++ b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py @@ -21,7 +21,7 @@ from llama_stack.apis.safety import ( ViolationLevel, ) from llama_stack.apis.shields import Shield -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from llama_stack.models.llama.datatypes import Role from llama_stack.models.llama.sku_types import CoreModelId from llama_stack.providers.datatypes import ShieldsProtocolPrivate diff --git a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py index ff87889ea..ee645a41d 100644 --- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +++ b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py @@ -18,7 +18,7 @@ from llama_stack.apis.safety import ( ViolationLevel, ) from llama_stack.apis.shields import Shield -from llama_stack.distribution.utils.model_utils import model_local_dir +from llama_stack.core.utils.model_utils import model_local_dir from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.inference.prompt_adapter import ( interleaved_content_as_str, diff --git a/llama_stack/providers/inline/scoring/basic/__init__.py b/llama_stack/providers/inline/scoring/basic/__init__.py index d9d150b1a..c996b9c2d 100644 --- a/llama_stack/providers/inline/scoring/basic/__init__.py +++ b/llama_stack/providers/inline/scoring/basic/__init__.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from typing import Any -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from .config import BasicScoringConfig diff --git a/llama_stack/providers/inline/scoring/basic/scoring.py b/llama_stack/providers/inline/scoring/basic/scoring.py index 09f89be5e..91b10daae 100644 --- a/llama_stack/providers/inline/scoring/basic/scoring.py +++ b/llama_stack/providers/inline/scoring/basic/scoring.py @@ -14,7 +14,7 @@ from llama_stack.apis.scoring import ( ScoringResult, ) from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.utils.common.data_schema_validator import ( get_valid_schemas, diff --git a/llama_stack/providers/inline/scoring/braintrust/__init__.py b/llama_stack/providers/inline/scoring/braintrust/__init__.py index 8ea6e9b96..3b492ae3f 100644 --- a/llama_stack/providers/inline/scoring/braintrust/__init__.py +++ b/llama_stack/providers/inline/scoring/braintrust/__init__.py @@ -7,7 +7,7 @@ from typing import Any from pydantic import BaseModel -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from .config import BraintrustScoringConfig diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/llama_stack/providers/inline/scoring/braintrust/braintrust.py index d6655d657..14810f706 100644 --- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py +++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py @@ -29,8 +29,8 @@ from llama_stack.apis.scoring import ( ScoringResultRow, ) from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams -from llama_stack.distribution.datatypes import Api -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.datatypes import Api +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.utils.common.data_schema_validator import ( get_valid_schemas, diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py b/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py index 88bf10737..76735fcb3 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/__init__.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from typing import Any -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from .config import LlmAsJudgeScoringConfig diff --git a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py index 2bd113a94..fd651877c 100644 --- a/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +++ b/llama_stack/providers/inline/scoring/llm_as_judge/scoring.py @@ -15,7 +15,7 @@ from llama_stack.apis.scoring import ( ScoringResult, ) from llama_stack.apis.scoring_functions import ScoringFn, ScoringFnParams -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.utils.common.data_schema_validator import ( get_valid_schemas, diff --git a/llama_stack/providers/inline/telemetry/meta_reference/__init__.py b/llama_stack/providers/inline/telemetry/meta_reference/__init__.py index 09e97136a..21743b653 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/__init__.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/__init__.py @@ -6,7 +6,7 @@ from typing import Any -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from .config import TelemetryConfig, TelemetrySink diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index f2a7c2a6e..31ae80050 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -9,7 +9,7 @@ from typing import Any from pydantic import BaseModel, Field, field_validator -from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR class TelemetrySink(StrEnum): diff --git a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py index c63fc23c2..623267172 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/telemetry.py @@ -36,7 +36,7 @@ from llama_stack.apis.telemetry import ( Trace, UnstructuredLogEvent, ) -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from llama_stack.providers.inline.telemetry.meta_reference.console_span_processor import ( ConsoleSpanProcessor, ) diff --git a/llama_stack/providers/remote/datasetio/nvidia/README.md b/llama_stack/providers/remote/datasetio/nvidia/README.md index 8b1e2e6ee..40f2922f2 100644 --- a/llama_stack/providers/remote/datasetio/nvidia/README.md +++ b/llama_stack/providers/remote/datasetio/nvidia/README.md @@ -34,7 +34,7 @@ os.environ["NVIDIA_API_KEY"] = "your-api-key" os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test" os.environ["NVIDIA_DATASET_NAMESPACE"] = "default" os.environ["NVIDIA_PROJECT_ID"] = "test-project" -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient client = LlamaStackAsLibraryClient("nvidia") client.initialize() diff --git a/llama_stack/providers/remote/eval/nvidia/__init__.py b/llama_stack/providers/remote/eval/nvidia/__init__.py index 55e3754f3..1314fdb83 100644 --- a/llama_stack/providers/remote/eval/nvidia/__init__.py +++ b/llama_stack/providers/remote/eval/nvidia/__init__.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from typing import Any -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api from .config import NVIDIAEvalConfig diff --git a/llama_stack/providers/remote/inference/fireworks/fireworks.py b/llama_stack/providers/remote/inference/fireworks/fireworks.py index c76aa39f3..ca4c7b578 100644 --- a/llama_stack/providers/remote/inference/fireworks/fireworks.py +++ b/llama_stack/providers/remote/inference/fireworks/fireworks.py @@ -39,7 +39,7 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ( ModelRegistryHelper, diff --git a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md index a353c67f5..0d1da0365 100644 --- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md +++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md @@ -33,7 +33,7 @@ os.environ["NVIDIA_API_KEY"] = ( ) os.environ["NVIDIA_BASE_URL"] = "http://nim.test" # NIM URL -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient client = LlamaStackAsLibraryClient("nvidia") client.initialize() diff --git a/llama_stack/providers/remote/inference/passthrough/passthrough.py b/llama_stack/providers/remote/inference/passthrough/passthrough.py index d5b3a5973..2f1cd40f2 100644 --- a/llama_stack/providers/remote/inference/passthrough/passthrough.py +++ b/llama_stack/providers/remote/inference/passthrough/passthrough.py @@ -34,7 +34,7 @@ from llama_stack.apis.inference import ( ToolPromptFormat, ) from llama_stack.apis.models import Model -from llama_stack.distribution.library_client import convert_pydantic_to_json_value, convert_to_pydantic +from llama_stack.core.library_client import convert_pydantic_to_json_value, convert_to_pydantic from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import prepare_openai_completion_params diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 46094c146..a06e4173b 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -38,7 +38,7 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import ( diff --git a/llama_stack/providers/remote/post_training/nvidia/README.md b/llama_stack/providers/remote/post_training/nvidia/README.md index 3ef538d29..1cce78c89 100644 --- a/llama_stack/providers/remote/post_training/nvidia/README.md +++ b/llama_stack/providers/remote/post_training/nvidia/README.md @@ -40,7 +40,7 @@ os.environ["NVIDIA_DATASET_NAMESPACE"] = "default" os.environ["NVIDIA_PROJECT_ID"] = "test-project" os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1" -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient client = LlamaStackAsLibraryClient("nvidia") client.initialize() diff --git a/llama_stack/providers/remote/safety/nvidia/README.md b/llama_stack/providers/remote/safety/nvidia/README.md index 434db32fb..218963e47 100644 --- a/llama_stack/providers/remote/safety/nvidia/README.md +++ b/llama_stack/providers/remote/safety/nvidia/README.md @@ -32,7 +32,7 @@ import os os.environ["NVIDIA_API_KEY"] = "your-api-key" os.environ["NVIDIA_GUARDRAILS_URL"] = "http://guardrails.test" -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient client = LlamaStackAsLibraryClient("nvidia") client.initialize() diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py index 1a65f6aa1..3e0d03956 100644 --- a/llama_stack/providers/remote/safety/sambanova/sambanova.py +++ b/llama_stack/providers/remote/safety/sambanova/sambanova.py @@ -19,7 +19,7 @@ from llama_stack.apis.safety import ( ViolationLevel, ) from llama_stack.apis.shields import Shield -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.providers.datatypes import ShieldsProtocolPrivate from llama_stack.providers.utils.inference.openai_compat import convert_message_to_openai_dict_new diff --git a/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py b/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py index 7e82cb6d4..e40903969 100644 --- a/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py +++ b/llama_stack/providers/remote/tool_runtime/bing_search/bing_search.py @@ -18,7 +18,7 @@ from llama_stack.apis.tools import ( ToolParameter, ToolRuntime, ) -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import BingSearchToolConfig diff --git a/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py b/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py index b96b9e59c..ba3b910d5 100644 --- a/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py +++ b/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py @@ -17,7 +17,7 @@ from llama_stack.apis.tools import ( ToolParameter, ToolRuntime, ) -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.models.llama.datatypes import BuiltinTool from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate diff --git a/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py b/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py index a9b252dfe..578bb6d34 100644 --- a/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py +++ b/llama_stack/providers/remote/tool_runtime/model_context_protocol/model_context_protocol.py @@ -15,7 +15,7 @@ from llama_stack.apis.tools import ( ToolInvocationResult, ToolRuntime, ) -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool, list_mcp_tools diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index 1fe91fd7f..976ec9c57 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -18,7 +18,7 @@ from llama_stack.apis.tools import ( ToolParameter, ToolRuntime, ) -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import TavilySearchToolConfig diff --git a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py index 6e1d0f61d..f12a44958 100644 --- a/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py +++ b/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py @@ -18,7 +18,7 @@ from llama_stack.apis.tools import ( ToolParameter, ToolRuntime, ) -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate from .config import WolframAlphaToolConfig diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index 7ae2035db..228e5fa62 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -18,7 +18,7 @@ from llama_stack.apis.common.errors import VectorStoreNotFoundError from llama_stack.apis.files.files import Files from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore diff --git a/llama_stack/providers/utils/common/data_schema_validator.py b/llama_stack/providers/utils/common/data_schema_validator.py index 28a243863..b0305104f 100644 --- a/llama_stack/providers/utils/common/data_schema_validator.py +++ b/llama_stack/providers/utils/common/data_schema_validator.py @@ -12,7 +12,7 @@ from llama_stack.apis.common.type_system import ( CompletionInputType, StringType, ) -from llama_stack.distribution.datatypes import Api +from llama_stack.core.datatypes import Api class ColumnName(Enum): diff --git a/llama_stack/providers/utils/inference/inference_store.py b/llama_stack/providers/utils/inference/inference_store.py index 60a87494e..43006cfd5 100644 --- a/llama_stack/providers/utils/inference/inference_store.py +++ b/llama_stack/providers/utils/inference/inference_store.py @@ -10,8 +10,8 @@ from llama_stack.apis.inference import ( OpenAIMessageParam, Order, ) -from llama_stack.distribution.datatypes import AccessRule -from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index 6ccf2a729..abba62f87 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -38,7 +38,7 @@ from llama_stack.apis.inference import ( ToolDefinition, ToolPromptFormat, ) -from llama_stack.distribution.request_headers import NeedsRequestProviderData +from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.log import get_logger from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import ( diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index 0219bbebe..f00cb1f8b 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -10,7 +10,7 @@ from typing import Annotated, Literal from pydantic import BaseModel, Field, field_validator -from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR class KVStoreType(Enum): diff --git a/llama_stack/providers/utils/responses/responses_store.py b/llama_stack/providers/utils/responses/responses_store.py index ea6db7991..04778ed1c 100644 --- a/llama_stack/providers/utils/responses/responses_store.py +++ b/llama_stack/providers/utils/responses/responses_store.py @@ -14,8 +14,8 @@ from llama_stack.apis.agents.openai_responses import ( OpenAIResponseObject, OpenAIResponseObjectWithInput, ) -from llama_stack.distribution.datatypes import AccessRule -from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.datatypes import AccessRule +from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR from ..sqlstore.api import ColumnDefinition, ColumnType from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py index 864a7dbb6..ccc835768 100644 --- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py @@ -7,11 +7,11 @@ from collections.abc import Mapping from typing import Any, Literal -from llama_stack.distribution.access_control.access_control import default_policy, is_action_allowed -from llama_stack.distribution.access_control.conditions import ProtectedResource -from llama_stack.distribution.access_control.datatypes import AccessRule, Action, Scope -from llama_stack.distribution.datatypes import User -from llama_stack.distribution.request_headers import get_authenticated_user +from llama_stack.core.access_control.access_control import default_policy, is_action_allowed +from llama_stack.core.access_control.conditions import ProtectedResource +from llama_stack.core.access_control.datatypes import AccessRule, Action, Scope +from llama_stack.core.datatypes import User +from llama_stack.core.request_headers import get_authenticated_user from llama_stack.log import get_logger from .api import ColumnDefinition, ColumnType, PaginatedResponse, SqlStore diff --git a/llama_stack/providers/utils/sqlstore/sqlstore.py b/llama_stack/providers/utils/sqlstore/sqlstore.py index 9f7eefcf5..fc44402ae 100644 --- a/llama_stack/providers/utils/sqlstore/sqlstore.py +++ b/llama_stack/providers/utils/sqlstore/sqlstore.py @@ -11,7 +11,7 @@ from typing import Annotated, Literal from pydantic import BaseModel, Field -from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR +from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR from .api import SqlStore diff --git a/llama_stack/providers/utils/tools/mcp.py b/llama_stack/providers/utils/tools/mcp.py index 76593a4b8..02f7aaf8a 100644 --- a/llama_stack/providers/utils/tools/mcp.py +++ b/llama_stack/providers/utils/tools/mcp.py @@ -22,7 +22,7 @@ from llama_stack.apis.tools import ( ToolInvocationResult, ToolParameter, ) -from llama_stack.distribution.datatypes import AuthenticationRequiredError +from llama_stack.core.datatypes import AuthenticationRequiredError from llama_stack.log import get_logger from llama_stack.providers.utils.tools.ttl_dict import TTLDict diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index 64e01535c..743b4dc86 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( BuildProvider, ModelInput, Provider, diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 981c66bf5..24403a8ed 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -7,7 +7,7 @@ from pathlib import Path from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( BuildProvider, ModelInput, Provider, diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index df82cf7c0..ccf28fce5 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -6,7 +6,7 @@ from pathlib import Path -from llama_stack.distribution.datatypes import BuildProvider, ModelInput, Provider, ShieldInput, ToolGroupInput +from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ShieldInput, ToolGroupInput from llama_stack.providers.remote.datasetio.nvidia import NvidiaDatasetIOConfig from llama_stack.providers.remote.eval.nvidia import NVIDIAEvalConfig from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index 0a0d9fb14..724ca0d4a 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -7,7 +7,7 @@ from llama_stack.apis.datasets import DatasetPurpose, URIDataSource from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( BenchmarkInput, BuildProvider, DatasetInput, diff --git a/llama_stack/templates/postgres-demo/postgres_demo.py b/llama_stack/templates/postgres-demo/postgres_demo.py index d9ded9a86..65345648a 100644 --- a/llama_stack/templates/postgres-demo/postgres_demo.py +++ b/llama_stack/templates/postgres-demo/postgres_demo.py @@ -6,7 +6,7 @@ from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( BuildProvider, ModelInput, Provider, diff --git a/llama_stack/templates/starter/starter.py b/llama_stack/templates/starter/starter.py index 3ea3c8f5e..54e200311 100644 --- a/llama_stack/templates/starter/starter.py +++ b/llama_stack/templates/starter/starter.py @@ -7,14 +7,14 @@ from typing import Any -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( BuildProvider, Provider, ProviderSpec, ShieldInput, ToolGroupInput, ) -from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.providers.datatypes import RemoteProviderSpec from llama_stack.providers.inline.files.localfs.config import LocalfsFilesImplConfig from llama_stack.providers.inline.inference.sentence_transformers import ( diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index 084996cd4..4e5e6051b 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -14,7 +14,7 @@ from pydantic import BaseModel, Field from llama_stack.apis.datasets import DatasetPurpose from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( LLAMA_STACK_RUN_CONFIG_VERSION, Api, BenchmarkInput, @@ -27,8 +27,8 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.distribution.distribution import get_provider_registry -from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.core.distribution import get_provider_registry +from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import get_pip_packages as get_kv_pip_packages diff --git a/llama_stack/templates/watsonx/watsonx.py b/llama_stack/templates/watsonx/watsonx.py index 5d8332c4f..570389e03 100644 --- a/llama_stack/templates/watsonx/watsonx.py +++ b/llama_stack/templates/watsonx/watsonx.py @@ -7,7 +7,7 @@ from pathlib import Path from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput +from llama_stack.core.datatypes import BuildProvider, ModelInput, Provider, ToolGroupInput from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) diff --git a/pyproject.toml b/pyproject.toml index be003bf92..f666000a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -235,17 +235,17 @@ follow_imports = "silent" exclude = [ # As we fix more and more of these, we should remove them from the list "^llama_stack/cli/download\\.py$", - "^llama_stack/distribution/build\\.py$", - "^llama_stack/distribution/client\\.py$", - "^llama_stack/distribution/request_headers\\.py$", - "^llama_stack/distribution/routers/", - "^llama_stack/distribution/routing_tables/", - "^llama_stack/distribution/server/endpoints\\.py$", - "^llama_stack/distribution/server/server\\.py$", - "^llama_stack/distribution/stack\\.py$", - "^llama_stack/distribution/store/registry\\.py$", - "^llama_stack/distribution/utils/exec\\.py$", - "^llama_stack/distribution/utils/prompt_for_config\\.py$", + "^llama_stack.core/build\\.py$", + "^llama_stack.core/client\\.py$", + "^llama_stack.core/request_headers\\.py$", + "^llama_stack.core/routers/", + "^llama_stack.core/routing_tables/", + "^llama_stack.core/server/endpoints\\.py$", + "^llama_stack.core/server/server\\.py$", + "^llama_stack.core/stack\\.py$", + "^llama_stack.core/store/registry\\.py$", + "^llama_stack.core/utils/exec\\.py$", + "^llama_stack.core/utils/prompt_for_config\\.py$", "^llama_stack/models/llama/llama3/interface\\.py$", "^llama_stack/models/llama/llama3/tokenizer\\.py$", "^llama_stack/models/llama/llama3/tool_utils\\.py$", diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py index 6e316c539..80c5b7840 100755 --- a/scripts/provider_codegen.py +++ b/scripts/provider_codegen.py @@ -12,7 +12,7 @@ from typing import Any from rich.progress import Progress, SpinnerColumn, TextColumn -from llama_stack.distribution.distribution import get_provider_registry +from llama_stack.core.distribution import get_provider_registry REPO_ROOT = Path(__file__).parent.parent diff --git a/tests/integration/agents/test_openai_responses.py b/tests/integration/agents/test_openai_responses.py index 7ae48913b..784ab6893 100644 --- a/tests/integration/agents/test_openai_responses.py +++ b/tests/integration/agents/test_openai_responses.py @@ -6,7 +6,7 @@ import pytest from openai import BadRequestError, OpenAI -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient @pytest.fixture diff --git a/tests/integration/files/test_files.py b/tests/integration/files/test_files.py index 118a751f0..b17c7db83 100644 --- a/tests/integration/files/test_files.py +++ b/tests/integration/files/test_files.py @@ -10,8 +10,8 @@ from unittest.mock import patch import pytest from openai import OpenAI -from llama_stack.distribution.datatypes import User -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.datatypes import User +from llama_stack.core.library_client import LlamaStackAsLibraryClient def test_openai_client_basic_operations(compat_client, client_with_models): diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index f6b5b3026..bdbe0d66f 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -20,7 +20,7 @@ from llama_stack_client import LlamaStackClient from openai import OpenAI from llama_stack import LlamaStackAsLibraryClient -from llama_stack.distribution.stack import run_config_from_adhoc_config_spec +from llama_stack.core.stack import run_config_from_adhoc_config_spec from llama_stack.env import get_env_or_fail DEFAULT_PORT = 8321 diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py index 51a208b17..14254baa7 100644 --- a/tests/integration/inference/test_openai_completion.py +++ b/tests/integration/inference/test_openai_completion.py @@ -14,7 +14,7 @@ from openai import OpenAI from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient from ..test_cases.test_case import TestCase diff --git a/tests/integration/inference/test_openai_embeddings.py b/tests/integration/inference/test_openai_embeddings.py index 1b8bd9038..2c545cc43 100644 --- a/tests/integration/inference/test_openai_embeddings.py +++ b/tests/integration/inference/test_openai_embeddings.py @@ -10,7 +10,7 @@ import struct import pytest from openai import OpenAI -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient def decode_base64_to_floats(base64_string: str) -> list[float]: diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py index c32d6cd17..4002f2e1f 100644 --- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py +++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py @@ -10,8 +10,8 @@ from unittest.mock import patch import pytest -from llama_stack.distribution.access_control.access_control import default_policy -from llama_stack.distribution.datatypes import User +from llama_stack.core.access_control.access_control import default_policy +from llama_stack.core.datatypes import User from llama_stack.providers.utils.sqlstore.api import ColumnType from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig, SqliteSqlStoreConfig, sqlstore_impl @@ -186,7 +186,7 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_user_ownership_policy(mock_get_authenticated_user, authorized_store, request): """Test that 'user is owner' policies work correctly with record ownership""" - from llama_stack.distribution.access_control.datatypes import AccessRule, Action, Scope + from llama_stack.core.access_control.datatypes import AccessRule, Action, Scope backend_name = request.node.callspec.id diff --git a/tests/integration/tool_runtime/test_mcp.py b/tests/integration/tool_runtime/test_mcp.py index f208dcbea..91ed26684 100644 --- a/tests/integration/tool_runtime/test_mcp.py +++ b/tests/integration/tool_runtime/test_mcp.py @@ -10,7 +10,7 @@ import pytest from llama_stack_client import Agent from llama_stack import LlamaStackAsLibraryClient -from llama_stack.distribution.datatypes import AuthenticationRequiredError +from llama_stack.core.datatypes import AuthenticationRequiredError AUTH_TOKEN = "test-token" diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index a34c5b410..399f1b1cd 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -14,7 +14,7 @@ from openai import BadRequestError as OpenAIBadRequestError from openai import OpenAI from llama_stack.apis.vector_io import Chunk -from llama_stack.distribution.library_client import LlamaStackAsLibraryClient +from llama_stack.core.library_client import LlamaStackAsLibraryClient logger = logging.getLogger(__name__) diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py index a41049006..daaf229e5 100644 --- a/tests/unit/cli/test_stack_config.py +++ b/tests/unit/cli/test_stack_config.py @@ -9,7 +9,7 @@ from datetime import datetime import pytest import yaml -from llama_stack.distribution.configure import ( +from llama_stack.core.configure import ( LLAMA_STACK_RUN_CONFIG_VERSION, parse_and_maybe_upgrade_config, ) diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 308b5c28f..155ad0142 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -15,14 +15,14 @@ from llama_stack.apis.models import Model, ModelType from llama_stack.apis.shields.shields import Shield from llama_stack.apis.tools import ListToolDefsResponse, ToolDef, ToolGroup, ToolParameter from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.distribution.datatypes import RegistryEntrySource -from llama_stack.distribution.routing_tables.benchmarks import BenchmarksRoutingTable -from llama_stack.distribution.routing_tables.datasets import DatasetsRoutingTable -from llama_stack.distribution.routing_tables.models import ModelsRoutingTable -from llama_stack.distribution.routing_tables.scoring_functions import ScoringFunctionsRoutingTable -from llama_stack.distribution.routing_tables.shields import ShieldsRoutingTable -from llama_stack.distribution.routing_tables.toolgroups import ToolGroupsRoutingTable -from llama_stack.distribution.routing_tables.vector_dbs import VectorDBsRoutingTable +from llama_stack.core.datatypes import RegistryEntrySource +from llama_stack.core.routing_tables.benchmarks import BenchmarksRoutingTable +from llama_stack.core.routing_tables.datasets import DatasetsRoutingTable +from llama_stack.core.routing_tables.models import ModelsRoutingTable +from llama_stack.core.routing_tables.scoring_functions import ScoringFunctionsRoutingTable +from llama_stack.core.routing_tables.shields import ShieldsRoutingTable +from llama_stack.core.routing_tables.toolgroups import ToolGroupsRoutingTable +from llama_stack.core.routing_tables.vector_dbs import VectorDBsRoutingTable class Impl: diff --git a/tests/unit/distribution/routing_tables/test_vector_dbs.py b/tests/unit/distribution/routing_tables/test_vector_dbs.py index 28887e1cf..789eda433 100644 --- a/tests/unit/distribution/routing_tables/test_vector_dbs.py +++ b/tests/unit/distribution/routing_tables/test_vector_dbs.py @@ -24,10 +24,10 @@ from llama_stack.apis.vector_io.vector_io import ( VectorStoreObject, VectorStoreSearchResponsePage, ) -from llama_stack.distribution.access_control.datatypes import AccessRule, Scope -from llama_stack.distribution.datatypes import User -from llama_stack.distribution.request_headers import request_provider_data_context -from llama_stack.distribution.routing_tables.vector_dbs import VectorDBsRoutingTable +from llama_stack.core.access_control.datatypes import AccessRule, Scope +from llama_stack.core.datatypes import User +from llama_stack.core.request_headers import request_provider_data_context +from llama_stack.core.routing_tables.vector_dbs import VectorDBsRoutingTable from tests.unit.distribution.routers.test_routing_tables import Impl, InferenceImpl, ModelsRoutingTable diff --git a/tests/unit/distribution/test_build_path.py b/tests/unit/distribution/test_build_path.py index 555cdda4a..f71dd77ec 100644 --- a/tests/unit/distribution/test_build_path.py +++ b/tests/unit/distribution/test_build_path.py @@ -9,8 +9,8 @@ from pathlib import Path from llama_stack.cli.stack._build import ( _run_stack_build_command_from_build_config, ) -from llama_stack.distribution.datatypes import BuildConfig, DistributionSpec -from llama_stack.distribution.utils.image_types import LlamaStackImageType +from llama_stack.core.datatypes import BuildConfig, DistributionSpec +from llama_stack.core.utils.image_types import LlamaStackImageType def test_container_build_passes_path(monkeypatch, tmp_path): diff --git a/tests/unit/distribution/test_context.py b/tests/unit/distribution/test_context.py index 7914be51d..43dd5a6e8 100644 --- a/tests/unit/distribution/test_context.py +++ b/tests/unit/distribution/test_context.py @@ -10,7 +10,7 @@ from contextvars import ContextVar import pytest -from llama_stack.distribution.utils.context import preserve_contexts_async_generator +from llama_stack.core.utils.context import preserve_contexts_async_generator async def test_preserve_contexts_with_exception(): diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py index 04d5cde67..c72106e46 100644 --- a/tests/unit/distribution/test_distribution.py +++ b/tests/unit/distribution/test_distribution.py @@ -11,8 +11,8 @@ import pytest import yaml from pydantic import BaseModel, Field, ValidationError -from llama_stack.distribution.datatypes import Api, Provider, StackRunConfig -from llama_stack.distribution.distribution import get_provider_registry +from llama_stack.core.datatypes import Api, Provider, StackRunConfig +from llama_stack.core.distribution import get_provider_registry from llama_stack.providers.datatypes import ProviderSpec @@ -260,7 +260,7 @@ pip_packages: """Test loading an external provider from a module (success path).""" from types import SimpleNamespace - from llama_stack.distribution.datatypes import Provider, StackRunConfig + from llama_stack.core.datatypes import Provider, StackRunConfig from llama_stack.providers.datatypes import Api, ProviderSpec # Simulate a provider module with get_provider_spec @@ -299,7 +299,7 @@ pip_packages: def test_external_provider_from_module_not_found(self, mock_providers): """Test handling ModuleNotFoundError for missing provider module.""" - from llama_stack.distribution.datatypes import Provider, StackRunConfig + from llama_stack.core.datatypes import Provider, StackRunConfig import_module_side_effect = make_import_module_side_effect(raise_for_external=True) @@ -323,7 +323,7 @@ pip_packages: def test_external_provider_from_module_missing_get_provider_spec(self, mock_providers): """Test handling missing get_provider_spec in provider module (should raise ValueError).""" - from llama_stack.distribution.datatypes import Provider, StackRunConfig + from llama_stack.core.datatypes import Provider, StackRunConfig import_module_side_effect = make_import_module_side_effect(missing_get_provider_spec=True) @@ -346,7 +346,7 @@ pip_packages: def test_external_provider_from_module_building(self, mock_providers): """Test loading an external provider from a module during build (building=True, partial spec).""" - from llama_stack.distribution.datatypes import BuildConfig, BuildProvider, DistributionSpec + from llama_stack.core.datatypes import BuildConfig, BuildProvider, DistributionSpec from llama_stack.providers.datatypes import Api # No importlib patch needed, should not import module when type of `config` is BuildConfig or DistributionSpec diff --git a/tests/unit/distribution/test_library_client_initialization.py b/tests/unit/distribution/test_library_client_initialization.py index 2c394fc0e..e510d513d 100644 --- a/tests/unit/distribution/test_library_client_initialization.py +++ b/tests/unit/distribution/test_library_client_initialization.py @@ -13,7 +13,7 @@ initialize() on the library client, preventing AttributeError regressions. import pytest -from llama_stack.distribution.library_client import ( +from llama_stack.core.library_client import ( AsyncLlamaStackAsLibraryClient, LlamaStackAsLibraryClient, ) diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py index c3ec25116..04f33e97d 100644 --- a/tests/unit/files/test_files.py +++ b/tests/unit/files/test_files.py @@ -9,7 +9,7 @@ import pytest from llama_stack.apis.common.responses import Order from llama_stack.apis.files import OpenAIFilePurpose -from llama_stack.distribution.access_control.access_control import default_policy +from llama_stack.core.access_control.access_control import default_policy from llama_stack.providers.inline.files.localfs import ( LocalfsFilesImpl, LocalfsFilesImplConfig, diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py index 7174d2e78..443a1d371 100644 --- a/tests/unit/fixtures.py +++ b/tests/unit/fixtures.py @@ -6,7 +6,7 @@ import pytest -from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry +from llama_stack.core.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 6485e3512..2ab5b557e 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -40,7 +40,7 @@ from llama_stack.apis.inference import ( OpenAIUserMessageParam, ) from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime -from llama_stack.distribution.access_control.access_control import default_policy +from llama_stack.core.access_control.access_control import default_policy from llama_stack.providers.inline.agents.meta_reference.openai_responses import ( OpenAIResponsesImpl, ) diff --git a/tests/unit/providers/agents/test_persistence_access_control.py b/tests/unit/providers/agents/test_persistence_access_control.py index 26001fcf1..93dd8ad95 100644 --- a/tests/unit/providers/agents/test_persistence_access_control.py +++ b/tests/unit/providers/agents/test_persistence_access_control.py @@ -12,7 +12,7 @@ import pytest from llama_stack.apis.agents import Turn from llama_stack.apis.inference import CompletionMessage, StopReason -from llama_stack.distribution.datatypes import User +from llama_stack.core.datatypes import User from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py index ba36a3e3d..b371cf907 100644 --- a/tests/unit/providers/inference/test_inference_client_caching.py +++ b/tests/unit/providers/inference/test_inference_client_caching.py @@ -7,7 +7,7 @@ import json from unittest.mock import MagicMock -from llama_stack.distribution.request_headers import request_provider_data_context +from llama_stack.core.request_headers import request_provider_data_context from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.groq import GroqInferenceAdapter from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig diff --git a/tests/unit/providers/inference/test_openai_base_url_config.py b/tests/unit/providers/inference/test_openai_base_url_config.py index 453ac9089..150f6210b 100644 --- a/tests/unit/providers/inference/test_openai_base_url_config.py +++ b/tests/unit/providers/inference/test_openai_base_url_config.py @@ -7,7 +7,7 @@ import os from unittest.mock import AsyncMock, MagicMock, patch -from llama_stack.distribution.stack import replace_env_vars +from llama_stack.core.stack import replace_env_vars from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter diff --git a/tests/unit/providers/nvidia/test_parameters.py b/tests/unit/providers/nvidia/test_parameters.py index 7e4323bd7..ad381da26 100644 --- a/tests/unit/providers/nvidia/test_parameters.py +++ b/tests/unit/providers/nvidia/test_parameters.py @@ -19,7 +19,7 @@ from llama_stack.apis.post_training.post_training import ( OptimizerType, TrainingConfig, ) -from llama_stack.distribution.library_client import convert_pydantic_to_json_value +from llama_stack.core.library_client import convert_pydantic_to_json_value from llama_stack.providers.remote.post_training.nvidia.post_training import ( NvidiaPostTrainingAdapter, NvidiaPostTrainingConfig, diff --git a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py index bc474f3bc..91148605d 100644 --- a/tests/unit/providers/nvidia/test_supervised_fine_tuning.py +++ b/tests/unit/providers/nvidia/test_supervised_fine_tuning.py @@ -19,7 +19,7 @@ from llama_stack.apis.post_training.post_training import ( QATFinetuningConfig, TrainingConfig, ) -from llama_stack.distribution.library_client import convert_pydantic_to_json_value +from llama_stack.core.library_client import convert_pydantic_to_json_value from llama_stack.providers.remote.post_training.nvidia.post_training import ( ListNvidiaPostTrainingJobs, NvidiaPostTrainingAdapter, diff --git a/tests/unit/providers/test_configs.py b/tests/unit/providers/test_configs.py index 99081c8b0..867cfffbc 100644 --- a/tests/unit/providers/test_configs.py +++ b/tests/unit/providers/test_configs.py @@ -7,8 +7,8 @@ import pytest from pydantic import BaseModel -from llama_stack.distribution.distribution import get_provider_registry, providable_apis -from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.core.distribution import get_provider_registry, providable_apis +from llama_stack.core.utils.dynamic import instantiate_class_type class TestProviderConfigurations: diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index 87fe18d54..4ea4a20b9 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -9,7 +9,7 @@ import pytest from llama_stack.apis.inference import Model from llama_stack.apis.vector_dbs import VectorDB -from llama_stack.distribution.store.registry import ( +from llama_stack.core.store.registry import ( KEY_FORMAT, CachedDiskDistributionRegistry, DiskDistributionRegistry, diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py index 6cfb20944..09b9a3cfb 100644 --- a/tests/unit/registry/test_registry_acl.py +++ b/tests/unit/registry/test_registry_acl.py @@ -6,8 +6,8 @@ from llama_stack.apis.models import ModelType -from llama_stack.distribution.datatypes import ModelWithOwner, User -from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry +from llama_stack.core.datatypes import ModelWithOwner, User +from llama_stack.core.store.registry import CachedDiskDistributionRegistry async def test_registry_cache_with_acl(cached_disk_dist_registry): diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py index d6a420c13..55449804a 100644 --- a/tests/unit/server/test_access_control.py +++ b/tests/unit/server/test_access_control.py @@ -12,9 +12,9 @@ from pydantic import TypeAdapter, ValidationError from llama_stack.apis.datatypes import Api from llama_stack.apis.models import ModelType -from llama_stack.distribution.access_control.access_control import AccessDeniedError, is_action_allowed -from llama_stack.distribution.datatypes import AccessRule, ModelWithOwner, User -from llama_stack.distribution.routing_tables.models import ModelsRoutingTable +from llama_stack.core.access_control.access_control import AccessDeniedError, is_action_allowed +from llama_stack.core.datatypes import AccessRule, ModelWithOwner, User +from llama_stack.core.routing_tables.models import ModelsRoutingTable class AsyncMock(MagicMock): @@ -40,7 +40,7 @@ async def test_setup(cached_disk_dist_registry): yield cached_disk_dist_registry, routing_table -@patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") +@patch("llama_stack.core.routing_tables.common.get_authenticated_user") async def test_access_control_with_cache(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup model_public = ModelWithOwner( @@ -104,7 +104,7 @@ async def test_access_control_with_cache(mock_get_authenticated_user, test_setup await routing_table.get_model("model-admin") -@patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") +@patch("llama_stack.core.routing_tables.common.get_authenticated_user") async def test_access_control_and_updates(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup model_public = ModelWithOwner( @@ -142,7 +142,7 @@ async def test_access_control_and_updates(mock_get_authenticated_user, test_setu assert model.identifier == "model-updates" -@patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") +@patch("llama_stack.core.routing_tables.common.get_authenticated_user") async def test_access_control_empty_attributes(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup model = ModelWithOwner( @@ -166,7 +166,7 @@ async def test_access_control_empty_attributes(mock_get_authenticated_user, test assert "model-empty-attrs" in model_ids -@patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") +@patch("llama_stack.core.routing_tables.common.get_authenticated_user") async def test_no_user_attributes(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup model_public = ModelWithOwner( @@ -196,7 +196,7 @@ async def test_no_user_attributes(mock_get_authenticated_user, test_setup): assert all_models.data[0].identifier == "model-public-2" -@patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") +@patch("llama_stack.core.routing_tables.common.get_authenticated_user") async def test_automatic_access_attributes(mock_get_authenticated_user, test_setup): """Test that newly created resources inherit access attributes from their creator.""" registry, routing_table = test_setup @@ -275,7 +275,7 @@ async def test_setup_with_access_policy(cached_disk_dist_registry): yield routing_table -@patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") +@patch("llama_stack.core.routing_tables.common.get_authenticated_user") async def test_access_policy(mock_get_authenticated_user, test_setup_with_access_policy): routing_table = test_setup_with_access_policy mock_get_authenticated_user.return_value = User( @@ -561,6 +561,6 @@ def test_invalid_condition(): ], ) def test_condition_reprs(condition): - from llama_stack.distribution.access_control.conditions import parse_condition + from llama_stack.core.access_control.conditions import parse_condition assert condition == str(parse_condition(condition)) diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index adf0140e2..37b543976 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -11,7 +11,7 @@ import pytest from fastapi import FastAPI from fastapi.testclient import TestClient -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( AuthenticationConfig, AuthProviderType, CustomAuthConfig, @@ -19,9 +19,9 @@ from llama_stack.distribution.datatypes import ( OAuth2JWKSConfig, OAuth2TokenAuthConfig, ) -from llama_stack.distribution.request_headers import User -from llama_stack.distribution.server.auth import AuthenticationMiddleware, _has_required_scope -from llama_stack.distribution.server.auth_providers import ( +from llama_stack.core.request_headers import User +from llama_stack.core.server.auth import AuthenticationMiddleware, _has_required_scope +from llama_stack.core.server.auth_providers import ( get_attributes_from_claims, ) @@ -150,10 +150,10 @@ def scope_middleware_with_mocks(mock_auth_endpoint): else: raise ValueError("No matching route") - import llama_stack.distribution.server.auth + import llama_stack.core.server.auth - llama_stack.distribution.server.auth.find_matching_route = mock_find_matching_route - llama_stack.distribution.server.auth.initialize_route_impls = lambda impls: {} + llama_stack.core.server.auth.find_matching_route = mock_find_matching_route + llama_stack.core.server.auth.initialize_route_impls = lambda impls: {} return middleware, mock_app diff --git a/tests/unit/server/test_auth_github.py b/tests/unit/server/test_auth_github.py index 21d2f2c6a..d87643579 100644 --- a/tests/unit/server/test_auth_github.py +++ b/tests/unit/server/test_auth_github.py @@ -11,8 +11,8 @@ import pytest from fastapi import FastAPI from fastapi.testclient import TestClient -from llama_stack.distribution.datatypes import AuthenticationConfig, AuthProviderType, GitHubTokenAuthConfig -from llama_stack.distribution.server.auth import AuthenticationMiddleware +from llama_stack.core.datatypes import AuthenticationConfig, AuthProviderType, GitHubTokenAuthConfig +from llama_stack.core.server.auth import AuthenticationMiddleware class MockResponse: @@ -78,7 +78,7 @@ def test_authenticated_endpoint_with_invalid_bearer_format(github_token_client): assert "Invalid Authorization header format" in response.json()["error"]["message"] -@patch("llama_stack.distribution.server.auth_providers.httpx.AsyncClient") +@patch("llama_stack.core.server.auth_providers.httpx.AsyncClient") def test_authenticated_endpoint_with_valid_github_token(mock_client_class, github_token_client): """Test accessing protected endpoint with valid GitHub token""" # Mock the GitHub API responses @@ -118,7 +118,7 @@ def test_authenticated_endpoint_with_valid_github_token(mock_client_class, githu assert calls[0][1]["headers"]["Authorization"] == "Bearer github_token_123" -@patch("llama_stack.distribution.server.auth_providers.httpx.AsyncClient") +@patch("llama_stack.core.server.auth_providers.httpx.AsyncClient") def test_authenticated_endpoint_with_invalid_github_token(mock_client_class, github_token_client): """Test accessing protected endpoint with invalid GitHub token""" # Mock the GitHub API to return 401 Unauthorized @@ -135,7 +135,7 @@ def test_authenticated_endpoint_with_invalid_github_token(mock_client_class, git ) -@patch("llama_stack.distribution.server.auth_providers.httpx.AsyncClient") +@patch("llama_stack.core.server.auth_providers.httpx.AsyncClient") def test_github_enterprise_support(mock_client_class): """Test GitHub Enterprise support with custom API base URL""" app = FastAPI() diff --git a/tests/unit/server/test_quota.py b/tests/unit/server/test_quota.py index 763bf8e94..85acbc66a 100644 --- a/tests/unit/server/test_quota.py +++ b/tests/unit/server/test_quota.py @@ -9,8 +9,8 @@ from fastapi import FastAPI, Request from fastapi.testclient import TestClient from starlette.middleware.base import BaseHTTPMiddleware -from llama_stack.distribution.datatypes import QuotaConfig, QuotaPeriod -from llama_stack.distribution.server.quota import QuotaMiddleware +from llama_stack.core.datatypes import QuotaConfig, QuotaPeriod +from llama_stack.core.server.quota import QuotaMiddleware from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig diff --git a/tests/unit/server/test_replace_env_vars.py b/tests/unit/server/test_replace_env_vars.py index 55817044d..0dda682c0 100644 --- a/tests/unit/server/test_replace_env_vars.py +++ b/tests/unit/server/test_replace_env_vars.py @@ -8,7 +8,7 @@ import os import pytest -from llama_stack.distribution.stack import replace_env_vars +from llama_stack.core.stack import replace_env_vars @pytest.fixture diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index a348590b1..1ee1b2f47 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -12,14 +12,14 @@ from unittest.mock import AsyncMock, MagicMock from pydantic import BaseModel, Field from llama_stack.apis.inference import Inference -from llama_stack.distribution.datatypes import ( +from llama_stack.core.datatypes import ( Api, Provider, StackRunConfig, ) -from llama_stack.distribution.resolver import resolve_impls -from llama_stack.distribution.routers.inference import InferenceRouter -from llama_stack.distribution.routing_tables.models import ModelsRoutingTable +from llama_stack.core.resolver import resolve_impls +from llama_stack.core.routers.inference import InferenceRouter +from llama_stack.core.routing_tables.models import ModelsRoutingTable from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec diff --git a/tests/unit/server/test_server.py b/tests/unit/server/test_server.py index d17d58b8a..803111fc7 100644 --- a/tests/unit/server/test_server.py +++ b/tests/unit/server/test_server.py @@ -10,9 +10,9 @@ from fastapi import HTTPException from openai import BadRequestError from pydantic import ValidationError -from llama_stack.distribution.access_control.access_control import AccessDeniedError -from llama_stack.distribution.datatypes import AuthenticationRequiredError -from llama_stack.distribution.server.server import translate_exception +from llama_stack.core.access_control.access_control import AccessDeniedError +from llama_stack.core.datatypes import AuthenticationRequiredError +from llama_stack.core.server.server import translate_exception class TestTranslateException: @@ -29,7 +29,7 @@ class TestTranslateException: def test_translate_access_denied_error_with_context(self): """Test that AccessDeniedError with context includes detailed information.""" - from llama_stack.distribution.datatypes import User + from llama_stack.core.datatypes import User # Create mock user and resource user = User("test-user", {"roles": ["user"], "teams": ["dev"]}) diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py index d42857186..54afe4ee4 100644 --- a/tests/unit/server/test_sse.py +++ b/tests/unit/server/test_sse.py @@ -8,7 +8,7 @@ import asyncio from unittest.mock import AsyncMock, MagicMock from llama_stack.apis.common.responses import PaginatedResponse -from llama_stack.distribution.server.server import create_dynamic_typed_route, create_sse_event, sse_generator +from llama_stack.core.server.server import create_dynamic_typed_route, create_sse_event, sse_generator async def test_sse_generator_basic(): diff --git a/tests/unit/utils/test_authorized_sqlstore.py b/tests/unit/utils/test_authorized_sqlstore.py index 066f67a98..90eb706e4 100644 --- a/tests/unit/utils/test_authorized_sqlstore.py +++ b/tests/unit/utils/test_authorized_sqlstore.py @@ -7,9 +7,9 @@ from tempfile import TemporaryDirectory from unittest.mock import patch -from llama_stack.distribution.access_control.access_control import default_policy, is_action_allowed -from llama_stack.distribution.access_control.datatypes import Action -from llama_stack.distribution.datatypes import User +from llama_stack.core.access_control.access_control import default_policy, is_action_allowed +from llama_stack.core.access_control.datatypes import Action +from llama_stack.core.datatypes import User from llama_stack.providers.utils.sqlstore.api import ColumnType from llama_stack.providers.utils.sqlstore.authorized_sqlstore import AuthorizedSqlStore, SqlRecord from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemySqlStoreImpl diff --git a/tests/verifications/openai_api/test_responses.py b/tests/verifications/openai_api/test_responses.py index 08bbb2252..e312de6aa 100644 --- a/tests/verifications/openai_api/test_responses.py +++ b/tests/verifications/openai_api/test_responses.py @@ -13,7 +13,7 @@ import openai import pytest from llama_stack import LlamaStackAsLibraryClient -from llama_stack.distribution.datatypes import AuthenticationRequiredError +from llama_stack.core.datatypes import AuthenticationRequiredError from tests.common.mcp import dependency_tools, make_mcp_server from tests.verifications.openai_api.fixtures.fixtures import ( case_id_generator, From cf731461322584252ceacbee6cd629aab8431cc5 Mon Sep 17 00:00:00 2001 From: Nehanth Narendrula Date: Thu, 31 Jul 2025 02:33:36 -0400 Subject: [PATCH 2/2] feat: Enable DPO training with HuggingFace inline provider (#2825) What does this PR do? This PR adds support for Direct Preference Optimization (DPO) training via the existing HuggingFace inline provider. It introduces a new DPO training recipe, config schema updates, dataset integration, and end-to-end testing to support preference-based fine-tuning with TRL. Test Plan Added integration test: tests/integration/post_training/test_post_training.py::TestPostTraining::test_preference_optimize Ran tests on both CPU and CUDA environments --------- Co-authored-by: Ubuntu Co-authored-by: Ashwin Bharambe --- .../post_training/inline_huggingface.md | 4 + .../post_training/huggingface/config.py | 6 + .../huggingface/post_training.py | 45 +- .../recipes/finetune_single_device.py | 236 +-------- .../recipes/finetune_single_device_dpo.py | 485 ++++++++++++++++++ .../inline/post_training/huggingface/utils.py | 269 ++++++++++ .../post_training/test_post_training.py | 83 ++- 7 files changed, 913 insertions(+), 215 deletions(-) create mode 100644 llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py create mode 100644 llama_stack/providers/inline/post_training/huggingface/utils.py diff --git a/docs/source/providers/post_training/inline_huggingface.md b/docs/source/providers/post_training/inline_huggingface.md index 82b08bf7a..0a8745e71 100644 --- a/docs/source/providers/post_training/inline_huggingface.md +++ b/docs/source/providers/post_training/inline_huggingface.md @@ -24,6 +24,10 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin | `weight_decay` | `` | No | 0.01 | | | `dataloader_num_workers` | `` | No | 4 | | | `dataloader_pin_memory` | `` | No | True | | +| `dpo_beta` | `` | No | 0.1 | | +| `use_reference_model` | `` | No | True | | +| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | | +| `dpo_output_dir` | `` | No | ./checkpoints/dpo | | ## Sample Configuration diff --git a/llama_stack/providers/inline/post_training/huggingface/config.py b/llama_stack/providers/inline/post_training/huggingface/config.py index 06c6d8073..dae8fcc04 100644 --- a/llama_stack/providers/inline/post_training/huggingface/config.py +++ b/llama_stack/providers/inline/post_training/huggingface/config.py @@ -67,6 +67,12 @@ class HuggingFacePostTrainingConfig(BaseModel): # Can improve data transfer speed to GPU but uses more memory dataloader_pin_memory: bool = True + # DPO-specific parameters + dpo_beta: float = 0.1 + use_reference_model: bool = True + dpo_loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = "sigmoid" + dpo_output_dir: str = "./checkpoints/dpo" + @classmethod def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]: return {"checkpoint_format": "huggingface", "distributed_backend": None, "device": "cpu"} diff --git a/llama_stack/providers/inline/post_training/huggingface/post_training.py b/llama_stack/providers/inline/post_training/huggingface/post_training.py index 0b2760792..81622e2b7 100644 --- a/llama_stack/providers/inline/post_training/huggingface/post_training.py +++ b/llama_stack/providers/inline/post_training/huggingface/post_training.py @@ -25,6 +25,9 @@ from llama_stack.providers.inline.post_training.huggingface.config import ( from llama_stack.providers.inline.post_training.huggingface.recipes.finetune_single_device import ( HFFinetuningSingleDevice, ) +from llama_stack.providers.inline.post_training.huggingface.recipes.finetune_single_device_dpo import ( + HFDPOAlignmentSingleDevice, +) from llama_stack.providers.utils.scheduler import JobArtifact, Scheduler from llama_stack.providers.utils.scheduler import JobStatus as SchedulerJobStatus from llama_stack.schema_utils import webmethod @@ -36,6 +39,7 @@ class TrainingArtifactType(Enum): _JOB_TYPE_SUPERVISED_FINE_TUNE = "supervised-fine-tune" +_JOB_TYPE_DPO_TRAINING = "dpo-training" class HuggingFacePostTrainingImpl: @@ -119,12 +123,37 @@ class HuggingFacePostTrainingImpl: hyperparam_search_config: dict[str, Any], logger_config: dict[str, Any], ) -> PostTrainingJob: - raise NotImplementedError("DPO alignment is not implemented yet") + async def handler(on_log_message_cb, on_status_change_cb, on_artifact_collected_cb): + on_log_message_cb("Starting HF DPO alignment") - async def get_training_jobs(self) -> ListPostTrainingJobsResponse: - return ListPostTrainingJobsResponse( - data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()] - ) + recipe = HFDPOAlignmentSingleDevice( + job_uuid=job_uuid, + datasetio_api=self.datasetio_api, + datasets_api=self.datasets_api, + ) + + resources_allocated, checkpoints = await recipe.train( + model=finetuned_model, + output_dir=f"{self.config.dpo_output_dir}/{job_uuid}", + job_uuid=job_uuid, + dpo_config=algorithm_config, + config=training_config, + provider_config=self.config, + ) + + on_artifact_collected_cb(self._resources_stats_to_artifact(resources_allocated)) + if checkpoints: + for checkpoint in checkpoints: + artifact = self._checkpoint_to_artifact(checkpoint) + on_artifact_collected_cb(artifact) + else: + on_log_message_cb("Warning: No checkpoints were saved during DPO training") + + on_status_change_cb(SchedulerJobStatus.completed) + on_log_message_cb("HF DPO alignment completed") + + job_uuid = self._scheduler.schedule(_JOB_TYPE_DPO_TRAINING, job_uuid, handler) + return PostTrainingJob(job_uuid=job_uuid) @staticmethod def _get_artifacts_metadata_by_type(job, artifact_type): @@ -174,3 +203,9 @@ class HuggingFacePostTrainingImpl: async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse | None: job = self._scheduler.get_job(job_uuid) return PostTrainingJobArtifactsResponse(job_uuid=job_uuid, checkpoints=self._get_checkpoints(job)) + + @webmethod(route="/post-training/jobs", method="GET") + async def get_training_jobs(self) -> ListPostTrainingJobsResponse: + return ListPostTrainingJobsResponse( + data=[PostTrainingJob(job_uuid=job.id) for job in self._scheduler.get_jobs()] + ) diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py index 2a024eb25..2574b995b 100644 --- a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py +++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device.py @@ -8,30 +8,13 @@ import gc import json import logging import multiprocessing -import os -import signal -import sys -from datetime import UTC, datetime from pathlib import Path from typing import Any -import psutil - -from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device - -# Set tokenizer parallelism environment variable -os.environ["TOKENIZERS_PARALLELISM"] = "false" - -# Force PyTorch to use OpenBLAS instead of MKL -os.environ["MKL_THREADING_LAYER"] = "GNU" -os.environ["MKL_SERVICE_FORCE_INTEL"] = "0" -os.environ["MKL_NUM_THREADS"] = "1" - import torch from datasets import Dataset from peft import LoraConfig from transformers import ( - AutoConfig, AutoModelForCausalLM, AutoTokenizer, ) @@ -45,93 +28,25 @@ from llama_stack.apis.post_training import ( LoraFinetuningConfig, TrainingConfig, ) +from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device from ..config import HuggingFacePostTrainingConfig +from ..utils import ( + calculate_training_steps, + create_checkpoints, + get_memory_stats, + get_save_strategy, + load_model, + load_rows_from_dataset, + setup_environment, + setup_signal_handlers, + setup_torch_device, + split_dataset, +) logger = logging.getLogger(__name__) -def get_gb(to_convert: int) -> str: - """Converts memory stats to GB and formats to 2 decimal places. - Args: - to_convert: Memory value in bytes - Returns: - str: Memory value in GB formatted to 2 decimal places - """ - return f"{(to_convert / (1024**3)):.2f}" - - -def get_memory_stats(device: torch.device) -> dict[str, Any]: - """Get memory statistics for the given device.""" - stats = { - "system_memory": { - "total": get_gb(psutil.virtual_memory().total), - "available": get_gb(psutil.virtual_memory().available), - "used": get_gb(psutil.virtual_memory().used), - "percent": psutil.virtual_memory().percent, - } - } - - if device.type == "cuda": - stats["device_memory"] = { - "allocated": get_gb(torch.cuda.memory_allocated(device)), - "reserved": get_gb(torch.cuda.memory_reserved(device)), - "max_allocated": get_gb(torch.cuda.max_memory_allocated(device)), - } - elif device.type == "mps": - # MPS doesn't provide direct memory stats, but we can track system memory - stats["device_memory"] = { - "note": "MPS memory stats not directly available", - "system_memory_used": get_gb(psutil.virtual_memory().used), - } - elif device.type == "cpu": - # For CPU, we track process memory usage - process = psutil.Process() - stats["device_memory"] = { - "process_rss": get_gb(process.memory_info().rss), - "process_vms": get_gb(process.memory_info().vms), - "process_percent": process.memory_percent(), - } - - return stats - - -def setup_torch_device(device_str: str) -> torch.device: - """Initialize and validate a PyTorch device. - This function handles device initialization and validation for different device types: - - CUDA: Validates CUDA availability and handles device selection - - MPS: Validates MPS availability for Apple Silicon - - CPU: Basic validation - - HPU: Raises error as it's not supported - Args: - device_str: String specifying the device ('cuda', 'cpu', 'mps') - Returns: - torch.device: The initialized and validated device - Raises: - RuntimeError: If device initialization fails or device is not supported - """ - try: - device = torch.device(device_str) - except RuntimeError as e: - raise RuntimeError(f"Error getting Torch Device {str(e)}") from e - - # Validate device capabilities - if device.type == "cuda": - if not torch.cuda.is_available(): - raise RuntimeError( - f"{device.type}: Torch has no CUDA/ROCm support or could not detect a compatible device." - ) - if device.index is None: - device = torch.device(device.type, torch.cuda.current_device()) - elif device.type == "mps": - if not torch.backends.mps.is_available(): - raise RuntimeError(f"{device.type}: Torch has no MPS support or could not detect a compatible device.") - elif device.type == "hpu": - raise RuntimeError(f"{device.type}: training does not support Intel Gaudi.") - - return device - - class HFFinetuningSingleDevice: def __init__( self, @@ -262,19 +177,6 @@ class HFFinetuningSingleDevice: remove_columns=ds.column_names, ) - async def _setup_data(self, dataset_id: str) -> list[dict[str, Any]]: - """Load dataset from llama stack dataset provider""" - try: - all_rows = await self.datasetio_api.iterrows( - dataset_id=dataset_id, - limit=-1, - ) - if not isinstance(all_rows.data, list): - raise RuntimeError("Expected dataset data to be a list") - return all_rows.data - except Exception as e: - raise RuntimeError(f"Failed to load dataset: {str(e)}") from e - def _run_training_sync( self, model: str, @@ -327,7 +229,7 @@ class HFFinetuningSingleDevice: # Load dataset logger.info(f"Loading dataset: {config.data_config.dataset_id}") - rows = await self._setup_data(config.data_config.dataset_id) + rows = await load_rows_from_dataset(self.datasetio_api, config.data_config.dataset_id) if not self.validate_dataset_format(rows): raise ValueError("Dataset is missing required fields: input_query, expected_answer, chat_completion_input") logger.info(f"Loaded {len(rows)} rows from dataset") @@ -369,47 +271,10 @@ class HFFinetuningSingleDevice: raise ValueError(f"Failed to create dataset: {str(e)}") from e # Split dataset - logger.info("Splitting dataset into train and validation sets") - train_val_split = ds.train_test_split(test_size=0.1, seed=42) - train_dataset = train_val_split["train"] - eval_dataset = train_val_split["test"] - logger.info(f"Split dataset into {len(train_dataset)} training and {len(eval_dataset)} validation examples") + train_dataset, eval_dataset = split_dataset(ds) return train_dataset, eval_dataset, tokenizer - def load_model( - self, - model: str, - device: torch.device, - provider_config: HuggingFacePostTrainingConfig, - ) -> AutoModelForCausalLM: - """Load and initialize the model for training. - Args: - model: The model identifier to load - device: The device to load the model onto - provider_config: Provider-specific configuration - Returns: - The loaded and initialized model - Raises: - RuntimeError: If model loading fails - """ - logger.info("Loading the base model") - try: - model_config = AutoConfig.from_pretrained(model, **provider_config.model_specific_config) - model_obj = AutoModelForCausalLM.from_pretrained( - model, - torch_dtype="auto" if device.type != "cpu" else "float32", - quantization_config=None, - config=model_config, - **provider_config.model_specific_config, - ) - # Always move model to specified device - model_obj = model_obj.to(device) - logger.info(f"Model loaded and moved to device: {model_obj.device}") - return model_obj - except Exception as e: - raise RuntimeError(f"Failed to load model: {str(e)}") from e - def setup_training_args( self, config: TrainingConfig, @@ -439,27 +304,12 @@ class HFFinetuningSingleDevice: raise ValueError("DataConfig is required for training") data_config = config.data_config - # Calculate steps - total_steps = steps_per_epoch * config.n_epochs - max_steps = min(config.max_steps_per_epoch, total_steps) - logging_steps = max(1, steps_per_epoch // 50) # Log 50 times per epoch - - logger.info("Training configuration:") - logger.info(f"- Steps per epoch: {steps_per_epoch}") - logger.info(f"- Total steps: {total_steps}") - logger.info(f"- Max steps: {max_steps}") - logger.info(f"- Logging steps: {logging_steps}") - - # Configure save strategy - save_strategy = "no" - eval_strategy = "no" - if output_dir_path: - save_strategy = "epoch" - eval_strategy = "epoch" - logger.info(f"Will save checkpoints to {output_dir_path}") + # Calculate steps and get save strategy + step_info = calculate_training_steps(steps_per_epoch, config) + save_strategy, eval_strategy = get_save_strategy(output_dir_path) return SFTConfig( - max_steps=max_steps, + max_steps=step_info["max_steps"], output_dir=str(output_dir_path) if output_dir_path is not None else None, num_train_epochs=config.n_epochs, per_device_train_batch_size=data_config.batch_size, @@ -483,7 +333,7 @@ class HFFinetuningSingleDevice: load_best_model_at_end=True if output_dir_path else False, metric_for_best_model="eval_loss", greater_is_better=False, - logging_steps=logging_steps, + logging_steps=step_info["logging_steps"], ) def save_model( @@ -523,13 +373,11 @@ class HFFinetuningSingleDevice: ) -> None: """Run the training process with signal handling.""" - def signal_handler(signum, frame): - """Handle termination signals gracefully.""" - logger.info(f"Received signal {signum}, initiating graceful shutdown") - sys.exit(0) + # Setup environment variables + setup_environment() - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) + # Setup signal handlers + setup_signal_handlers() # Convert config dicts back to objects logger.info("Initializing configuration objects") @@ -558,7 +406,7 @@ class HFFinetuningSingleDevice: ) # Load model - model_obj = self.load_model(model, device, provider_config_obj) + model_obj = load_model(model, device, provider_config_obj) # Initialize trainer logger.info("Initializing SFTTrainer") @@ -633,7 +481,7 @@ class HFFinetuningSingleDevice: # Train in a separate process logger.info("Starting training in separate process") try: - # Set multiprocessing start method to 'spawn' for CUDA/MPS compatibility + # Setup multiprocessing for device if device.type in ["cuda", "mps"]: multiprocessing.set_start_method("spawn", force=True) @@ -663,37 +511,7 @@ class HFFinetuningSingleDevice: checkpoints = [] if output_dir_path: - # Get all checkpoint directories and sort them numerically - checkpoint_dirs = sorted( - [d for d in output_dir_path.glob("checkpoint-*") if d.is_dir()], - key=lambda x: int(x.name.split("-")[1]), - ) - - # Add all checkpoint directories - for epoch_number, checkpoint_dir in enumerate(checkpoint_dirs, start=1): - # Get the creation time of the directory - created_time = datetime.fromtimestamp(os.path.getctime(checkpoint_dir), tz=UTC) - - checkpoint = Checkpoint( - identifier=checkpoint_dir.name, - created_at=created_time, - epoch=epoch_number, - post_training_job_id=job_uuid, - path=str(checkpoint_dir), - ) - checkpoints.append(checkpoint) - - # Add the merged model as a checkpoint - merged_model_path = output_dir_path / "merged_model" - if merged_model_path.exists(): - checkpoint = Checkpoint( - identifier=f"{model}-sft-{config.n_epochs}", - created_at=datetime.now(UTC), - epoch=config.n_epochs, - post_training_job_id=job_uuid, - path=str(merged_model_path), - ) - checkpoints.append(checkpoint) + checkpoints = create_checkpoints(output_dir_path, job_uuid, model, config, "merged_model") return memory_stats, checkpoints if checkpoints else None finally: diff --git a/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py new file mode 100644 index 000000000..a7c19faac --- /dev/null +++ b/llama_stack/providers/inline/post_training/huggingface/recipes/finetune_single_device_dpo.py @@ -0,0 +1,485 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import gc +import logging +import multiprocessing +from pathlib import Path +from typing import Any + +import torch +from datasets import Dataset +from transformers import ( + AutoTokenizer, +) +from trl import DPOConfig, DPOTrainer + +from llama_stack.apis.datasetio import DatasetIO +from llama_stack.apis.datasets import Datasets +from llama_stack.apis.post_training import ( + Checkpoint, + DPOAlignmentConfig, + TrainingConfig, +) +from llama_stack.providers.inline.post_training.common.utils import evacuate_model_from_device + +from ..config import HuggingFacePostTrainingConfig +from ..utils import ( + calculate_training_steps, + create_checkpoints, + get_memory_stats, + get_save_strategy, + load_model, + load_rows_from_dataset, + setup_environment, + setup_signal_handlers, + setup_torch_device, + split_dataset, +) + +logger = logging.getLogger(__name__) + + +class HFDPOAlignmentSingleDevice: + def __init__( + self, + job_uuid: str, + datasetio_api: DatasetIO, + datasets_api: Datasets, + ): + self.datasetio_api = datasetio_api + self.datasets_api = datasets_api + self.job_uuid = job_uuid + + def validate_dataset_format(self, rows: list[dict]) -> None: + """Validate that the dataset has the required fields for DPO training.""" + required_fields = ["prompt", "chosen", "rejected"] + + if not rows: + logger.warning("Dataset is empty") + raise ValueError("Dataset is empty") + + for i, row in enumerate(rows): + if not isinstance(row, dict): + logger.warning(f"Row {i} is not a dictionary") + raise ValueError(f"Row {i} is not a dictionary") + + for field in required_fields: + if field not in row: + logger.warning(f"Row {i} missing required DPO field: {field}") + raise ValueError(f"Row {i} missing required DPO field: {field}") + + # Handle both string and list formats + if field == "prompt": + # Prompt should be a string + if not isinstance(row[field], str): + logger.warning(f"Row {i} field '{field}' is not a string") + raise ValueError(f"Row {i} field '{field}' is not a string") + if not row[field].strip(): + logger.warning(f"Row {i} field '{field}' is empty") + raise ValueError(f"Row {i} field '{field}' is empty") + else: + # chosen/rejected can be either strings or lists of messages + if isinstance(row[field], str): + if not row[field].strip(): + logger.warning(f"Row {i} field '{field}' is empty") + raise ValueError(f"Row {i} field '{field}' is empty") + elif isinstance(row[field], list): + if not row[field]: + logger.warning(f"Row {i} field '{field}' is empty list") + raise ValueError(f"Row {i} field '{field}' is empty list") + else: + logger.warning(f"Row {i} field '{field}' is neither string nor list") + raise ValueError(f"Row {i} field '{field}' is neither string nor list") + + logger.info(f"DPO dataset validation passed: {len(rows)} preference examples") + + def _process_dpo_format(self, row: dict) -> tuple[str | None, str | None, str | None]: + """Process a row in DPO format, handling both string and conversation list formats.""" + if all(field in row for field in ["prompt", "chosen", "rejected"]): + prompt = row["prompt"] + + # Handle chosen field - convert list to string if needed + if isinstance(row["chosen"], list): + # For conversation format, concatenate messages + chosen = "\n".join( + [msg.get("content", "") if isinstance(msg, dict) else str(msg) for msg in row["chosen"]] + ) + else: + chosen = row["chosen"] + + # Handle rejected field - convert list to string if needed + if isinstance(row["rejected"], list): + # For conversation format, concatenate messages + rejected = "\n".join( + [msg.get("content", "") if isinstance(msg, dict) else str(msg) for msg in row["rejected"]] + ) + else: + rejected = row["rejected"] + + return prompt, chosen, rejected + return None, None, None + + def _format_text_for_dpo(self, prompt: str, response: str, provider_config: HuggingFacePostTrainingConfig) -> str: + """Format prompt and response text based on model requirements.""" + if hasattr(provider_config, "chat_template") and provider_config.chat_template: + # Use the chat template, supporting both {prompt}/{response} and {input}/{output} + template = provider_config.chat_template + # Try prompt/response first (DPO style) + if "{prompt}" in template and "{response}" in template: + return template.format(prompt=prompt, response=response) + # Fall back to input/output (SFT style) + elif "{input}" in template and "{output}" in template: + return template.format(input=prompt, output=response) + else: + # If template doesn't have expected placeholders, use default + return f"{prompt}\n{response}" + return f"{prompt}\n{response}" + + def _create_dataset( + self, rows: list[dict], config: TrainingConfig, provider_config: HuggingFacePostTrainingConfig + ) -> Dataset: + """Create and preprocess the dataset for DPO.""" + dpo_examples = [] + for row in rows: + prompt, chosen, rejected = self._process_dpo_format(row) + + if prompt and chosen and rejected: + # Format the texts + chosen_formatted = self._format_text_for_dpo(prompt, chosen, provider_config) + rejected_formatted = self._format_text_for_dpo(prompt, rejected, provider_config) + + dpo_examples.append( + { + "prompt": prompt, + "chosen": chosen_formatted, + "rejected": rejected_formatted, + } + ) + + if not dpo_examples: + raise ValueError("No valid preference examples found in dataset") + + logger.info(f"Created DPO dataset with {len(dpo_examples)} preference pairs") + return Dataset.from_list(dpo_examples) + + def _preprocess_dataset( + self, ds: Dataset, tokenizer: AutoTokenizer, provider_config: HuggingFacePostTrainingConfig + ) -> Dataset: + """Preprocess the dataset with tokenizer for DPO.""" + # DPOTrainer expects raw text, so we don't tokenize here + # Just return the dataset as is + return ds + + def _run_training_sync( + self, + model: str, + provider_config: dict[str, Any], + dpo_config: dict[str, Any], + config: dict[str, Any], + output_dir_path: Path | None, + ) -> None: + """Synchronous wrapper for running DPO training process.""" + import asyncio + + logger.info("Starting DPO training process with async wrapper") + asyncio.run( + self._run_training( + model=model, + provider_config=provider_config, + dpo_config=dpo_config, + config=config, + output_dir_path=output_dir_path, + ) + ) + + async def load_dataset( + self, + model: str, + config: TrainingConfig, + provider_config: HuggingFacePostTrainingConfig, + ) -> tuple[Dataset, Dataset, AutoTokenizer]: + """Load and prepare the dataset for DPO training.""" + # Validate data config + if not config.data_config: + raise ValueError("DataConfig is required for DPO training") + + # Load dataset + logger.info(f"Loading dataset: {config.data_config.dataset_id}") + rows = await load_rows_from_dataset(self.datasetio_api, config.data_config.dataset_id) + self.validate_dataset_format(rows) + logger.info(f"Loaded {len(rows)} rows from dataset") + + # Initialize tokenizer + logger.info(f"Initializing tokenizer for model: {model}") + try: + tokenizer = AutoTokenizer.from_pretrained(model, **provider_config.model_specific_config) + + # Set pad token to eos token if not present + if not tokenizer.pad_token: + tokenizer.pad_token = tokenizer.eos_token + + # Set padding side to left for DPO + tokenizer.padding_side = "left" + + # Set truncation side to right to keep the beginning of the sequence + tokenizer.truncation_side = "right" + + # Set model max length to match provider config + tokenizer.model_max_length = provider_config.max_seq_length + + logger.info("Tokenizer initialized successfully for DPO") + except Exception as e: + raise RuntimeError(f"Failed to initialize tokenizer: {str(e)}") from e + + # Create and preprocess dataset + logger.info("Creating and preprocessing dataset for DPO") + try: + ds = self._create_dataset(rows, config, provider_config) + ds = self._preprocess_dataset(ds, tokenizer, provider_config) + logger.info(f"Dataset created with {len(ds)} examples") + except Exception as e: + raise ValueError(f"Failed to create dataset: {str(e)}") from e + + # Split dataset + train_dataset, eval_dataset = split_dataset(ds) + + return train_dataset, eval_dataset, tokenizer + + def setup_training_args( + self, + config: TrainingConfig, + provider_config: HuggingFacePostTrainingConfig, + dpo_config: DPOAlignmentConfig, + device: torch.device, + output_dir_path: Path | None, + steps_per_epoch: int, + ) -> DPOConfig: + """Setup DPO training arguments.""" + logger.info("Configuring DPO training arguments") + lr = 5e-7 # Lower learning rate for DPO + if config.optimizer_config: + lr = config.optimizer_config.lr + logger.info(f"Using custom learning rate: {lr}") + + # Validate data config + if not config.data_config: + raise ValueError("DataConfig is required for training") + data_config = config.data_config + + # Calculate steps and get save strategy + step_info = calculate_training_steps(steps_per_epoch, config) + save_strategy, eval_strategy = get_save_strategy(output_dir_path) + + logger.info("DPO training configuration:") + logger.info(f"- DPO beta: {dpo_config.beta}") + logger.info(f"- DPO loss type: {provider_config.dpo_loss_type}") + + # Calculate max prompt length as half of max sequence length + max_prompt_length = provider_config.max_seq_length // 2 + + return DPOConfig( + max_steps=step_info["max_steps"], + output_dir=str(output_dir_path) if output_dir_path is not None else None, + num_train_epochs=config.n_epochs, + per_device_train_batch_size=data_config.batch_size, + fp16=device.type == "cuda", + bf16=False, # Causes CPU issues. + eval_strategy=eval_strategy, + use_cpu=True if device.type == "cpu" and not torch.backends.mps.is_available() else False, + save_strategy=save_strategy, + report_to="none", + max_length=provider_config.max_seq_length, + max_prompt_length=max_prompt_length, + gradient_accumulation_steps=config.gradient_accumulation_steps, + gradient_checkpointing=provider_config.gradient_checkpointing, + learning_rate=lr, + warmup_ratio=provider_config.warmup_ratio, + weight_decay=provider_config.weight_decay, + remove_unused_columns=False, + dataloader_pin_memory=provider_config.dataloader_pin_memory, + dataloader_num_workers=provider_config.dataloader_num_workers, + load_best_model_at_end=True if output_dir_path else False, + metric_for_best_model="eval_loss", + greater_is_better=False, + logging_steps=step_info["logging_steps"], + save_total_limit=provider_config.save_total_limit, + # DPO specific parameters + beta=dpo_config.beta, + loss_type=provider_config.dpo_loss_type, + ) + + def save_model( + self, + trainer: DPOTrainer, + output_dir_path: Path, + ) -> None: + """Save the trained DPO model.""" + logger.info("Saving final DPO model") + + save_path = output_dir_path / "dpo_model" + logger.info(f"Saving model to {save_path}") + + # Save model and tokenizer + trainer.save_model(str(save_path)) + + async def _run_training( + self, + model: str, + provider_config: dict[str, Any], + dpo_config: dict[str, Any], + config: dict[str, Any], + output_dir_path: Path | None, + ) -> None: + """Run the DPO training process with signal handling.""" + + # Setup environment variables + setup_environment() + + # Setup signal handlers + setup_signal_handlers() + + # Convert config dicts back to objects + logger.info("Initializing configuration objects") + provider_config_obj = HuggingFacePostTrainingConfig(**provider_config) + config_obj = TrainingConfig(**config) + dpo_config_obj = DPOAlignmentConfig(**dpo_config) + + # Initialize and validate device + device = setup_torch_device(provider_config_obj.device) + logger.info(f"Using device '{device}'") + + # Load dataset and tokenizer + train_dataset, eval_dataset, tokenizer = await self.load_dataset(model, config_obj, provider_config_obj) + + # Calculate steps per epoch + if not config_obj.data_config: + raise ValueError("DataConfig is required for training") + steps_per_epoch = len(train_dataset) // config_obj.data_config.batch_size + + # Setup training arguments + training_args = self.setup_training_args( + config_obj, + provider_config_obj, + dpo_config_obj, + device, + output_dir_path, + steps_per_epoch, + ) + + # Load model and reference model + model_obj = load_model(model, device, provider_config_obj) + ref_model = None + if provider_config_obj.use_reference_model: + logger.info("Loading separate reference model for DPO") + ref_model = load_model(model, device, provider_config_obj) + else: + logger.info("Using shared reference model for DPO") + + # Initialize DPO trainer + logger.info("Initializing DPOTrainer") + trainer = DPOTrainer( + model=model_obj, + ref_model=ref_model, + args=training_args, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=tokenizer, + ) + + try: + # Train + logger.info("Starting DPO training") + trainer.train() + logger.info("DPO training completed successfully") + + # Save final model if output directory is provided + if output_dir_path: + logger.info(f"Saving model to output directory: {output_dir_path}") + self.save_model(trainer, output_dir_path) + logger.info("Model save completed") + + finally: + # Clean up resources + logger.info("Cleaning up resources") + if hasattr(trainer, "model"): + evacuate_model_from_device(trainer.model, device.type) + if ref_model: + evacuate_model_from_device(ref_model, device.type) + del trainer + del ref_model + gc.collect() + logger.info("Cleanup completed") + logger.info("DPO training process finishing successfully") + + async def train( + self, + model: str, + output_dir: str | None, + job_uuid: str, + dpo_config: DPOAlignmentConfig, + config: TrainingConfig, + provider_config: HuggingFacePostTrainingConfig, + ) -> tuple[dict[str, Any], list[Checkpoint] | None]: + """Train a model using HuggingFace's DPOTrainer""" + # Initialize and validate device + device = setup_torch_device(provider_config.device) + logger.info(f"Using device '{device}'") + + output_dir_path = None + if output_dir: + output_dir_path = Path(output_dir) + + # Track memory stats + memory_stats = { + "initial": get_memory_stats(device), + "after_training": None, + "final": None, + } + + # Validate data config + if not config.data_config: + raise ValueError("DataConfig is required for training") + + # Train in a separate process + logger.info("Starting DPO training in separate process") + try: + # Setup multiprocessing for device + if device.type in ["cuda", "mps"]: + multiprocessing.set_start_method("spawn", force=True) + + process = multiprocessing.Process( + target=self._run_training_sync, + kwargs={ + "model": model, + "provider_config": provider_config.model_dump(), + "dpo_config": dpo_config.model_dump(), + "config": config.model_dump(), + "output_dir_path": output_dir_path, + }, + ) + process.start() + + # Monitor the process + while process.is_alive(): + process.join(timeout=1) # Check every second + if not process.is_alive(): + break + + # Get the return code + if process.exitcode != 0: + raise RuntimeError(f"DPO training failed with exit code {process.exitcode}") + + memory_stats["after_training"] = get_memory_stats(device) + + checkpoints = [] + if output_dir_path: + checkpoints = create_checkpoints(output_dir_path, job_uuid, model, config, "dpo_model") + + return memory_stats, checkpoints if checkpoints else None + finally: + memory_stats["final"] = get_memory_stats(device) + gc.collect() diff --git a/llama_stack/providers/inline/post_training/huggingface/utils.py b/llama_stack/providers/inline/post_training/huggingface/utils.py new file mode 100644 index 000000000..3147c19ab --- /dev/null +++ b/llama_stack/providers/inline/post_training/huggingface/utils.py @@ -0,0 +1,269 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import logging +import os +import signal +import sys +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import psutil +import torch +from datasets import Dataset +from transformers import AutoConfig, AutoModelForCausalLM + +from llama_stack.apis.datasetio import DatasetIO +from llama_stack.apis.post_training import Checkpoint, TrainingConfig + +from .config import HuggingFacePostTrainingConfig + +logger = logging.getLogger(__name__) + + +def setup_environment(): + """Setup common environment variables for training.""" + os.environ["TOKENIZERS_PARALLELISM"] = "false" + os.environ["MKL_THREADING_LAYER"] = "GNU" + os.environ["MKL_SERVICE_FORCE_INTEL"] = "0" + os.environ["MKL_NUM_THREADS"] = "1" + + +def bytes_to_gb(to_convert: int) -> str: + """Converts memory stats to GB and formats to 2 decimal places. + Args: + to_convert: Memory value in bytes + Returns: + str: Memory value in GB formatted to 2 decimal places + """ + return f"{(to_convert / (1024**3)):.2f}" + + +def get_memory_stats(device: torch.device) -> dict[str, Any]: + """Get memory statistics for the given device.""" + stats = { + "system_memory": { + "total": bytes_to_gb(psutil.virtual_memory().total), + "available": bytes_to_gb(psutil.virtual_memory().available), + "used": bytes_to_gb(psutil.virtual_memory().used), + "percent": psutil.virtual_memory().percent, + } + } + + if device.type == "cuda": + stats["device_memory"] = { + "allocated": bytes_to_gb(torch.cuda.memory_allocated(device)), + "reserved": bytes_to_gb(torch.cuda.memory_reserved(device)), + "max_allocated": bytes_to_gb(torch.cuda.max_memory_allocated(device)), + } + elif device.type == "mps": + # MPS doesn't provide direct memory stats, but we can track system memory + stats["device_memory"] = { + "note": "MPS memory stats not directly available", + "system_memory_used": bytes_to_gb(psutil.virtual_memory().used), + } + elif device.type == "cpu": + # For CPU, we track process memory usage + process = psutil.Process() + stats["device_memory"] = { + "process_rss": bytes_to_gb(process.memory_info().rss), + "process_vms": bytes_to_gb(process.memory_info().vms), + "process_percent": process.memory_percent(), + } + + return stats + + +def setup_torch_device(device_str: str) -> torch.device: + """Initialize and validate a PyTorch device. + This function handles device initialization and validation for different device types: + - CUDA: Validates CUDA availability and handles device selection + - MPS: Validates MPS availability for Apple Silicon + - CPU: Basic validation + - HPU: Raises error as it's not supported + Args: + device_str: String specifying the device ('cuda', 'cpu', 'mps') + Returns: + torch.device: The initialized and validated device + Raises: + RuntimeError: If device initialization fails or device is not supported + """ + try: + device = torch.device(device_str) + except RuntimeError as e: + raise RuntimeError(f"Error getting Torch Device {str(e)}") from e + + # Validate device capabilities + if device.type == "cuda": + if not torch.cuda.is_available(): + raise RuntimeError( + f"{device.type}: Torch has no CUDA/ROCm support or could not detect a compatible device." + ) + if device.index is None: + device = torch.device(device.type, torch.cuda.current_device()) + elif device.type == "mps": + if not torch.backends.mps.is_available(): + raise RuntimeError(f"{device.type}: Torch has no MPS support or could not detect a compatible device.") + elif device.type == "hpu": + raise RuntimeError(f"{device.type}: training does not support Intel Gaudi.") + + return device + + +async def load_rows_from_dataset(datasetio_api: DatasetIO, dataset_id: str) -> list[dict[str, Any]]: + """Load dataset from llama stack dataset provider""" + try: + all_rows = await datasetio_api.iterrows( + dataset_id=dataset_id, + limit=-1, + ) + if not isinstance(all_rows.data, list): + raise RuntimeError("Expected dataset data to be a list") + return all_rows.data + except Exception as e: + raise RuntimeError(f"Failed to load dataset: {str(e)}") from e + + +def load_model( + model: str, + device: torch.device, + provider_config: HuggingFacePostTrainingConfig, +) -> AutoModelForCausalLM: + """Load and initialize the model for training. + Args: + model: The model identifier to load + device: The device to load the model onto + provider_config: Provider-specific configuration + Returns: + The loaded and initialized model + Raises: + RuntimeError: If model loading fails + """ + logger.info("Loading the base model") + try: + model_config = AutoConfig.from_pretrained(model, **provider_config.model_specific_config) + model_obj = AutoModelForCausalLM.from_pretrained( + model, + torch_dtype="auto" if device.type != "cpu" else "float32", + quantization_config=None, + config=model_config, + **provider_config.model_specific_config, + ) + # Always move model to specified device + model_obj = model_obj.to(device) + logger.info(f"Model loaded and moved to device: {model_obj.device}") + return model_obj + except Exception as e: + raise RuntimeError(f"Failed to load model: {str(e)}") from e + + +def split_dataset(ds: Dataset) -> tuple[Dataset, Dataset]: + """Split dataset into train and validation sets. + Args: + ds: Dataset to split + Returns: + tuple: (train_dataset, eval_dataset) + """ + logger.info("Splitting dataset into train and validation sets") + train_val_split = ds.train_test_split(test_size=0.1, seed=42) + train_dataset = train_val_split["train"] + eval_dataset = train_val_split["test"] + logger.info(f"Split dataset into {len(train_dataset)} training and {len(eval_dataset)} validation examples") + return train_dataset, eval_dataset + + +def setup_signal_handlers(): + """Setup signal handlers for graceful shutdown.""" + + def signal_handler(signum, frame): + logger.info(f"Received signal {signum}, initiating graceful shutdown") + sys.exit(0) + + signal.signal(signal.SIGTERM, signal_handler) + signal.signal(signal.SIGINT, signal_handler) + + +def calculate_training_steps(steps_per_epoch: int, config: TrainingConfig) -> dict[str, int]: + """Calculate training steps and logging configuration. + Args: + steps_per_epoch: Number of training steps per epoch + config: Training configuration + Returns: + dict: Dictionary with calculated step values + """ + total_steps = steps_per_epoch * config.n_epochs + max_steps = min(config.max_steps_per_epoch, total_steps) + logging_steps = max(1, steps_per_epoch // 50) # Log 50 times per epoch + + logger.info("Training configuration:") + logger.info(f"- Steps per epoch: {steps_per_epoch}") + logger.info(f"- Total steps: {total_steps}") + logger.info(f"- Max steps: {max_steps}") + logger.info(f"- Logging steps: {logging_steps}") + + return {"total_steps": total_steps, "max_steps": max_steps, "logging_steps": logging_steps} + + +def get_save_strategy(output_dir_path: Path | None) -> tuple[str, str]: + """Get save and evaluation strategy based on output directory. + Args: + output_dir_path: Optional path to save the model + Returns: + tuple: (save_strategy, eval_strategy) + """ + if output_dir_path: + logger.info(f"Will save checkpoints to {output_dir_path}") + return "epoch", "epoch" + return "no", "no" + + +def create_checkpoints( + output_dir_path: Path, job_uuid: str, model: str, config: TrainingConfig, final_model_name: str +) -> list[Checkpoint]: + """Create checkpoint objects from training output. + Args: + output_dir_path: Path to the training output directory + job_uuid: Unique identifier for the training job + model: Model identifier + config: Training configuration + final_model_name: Name of the final model directory ("merged_model" for SFT, "dpo_model" for DPO) + Returns: + List of Checkpoint objects + """ + checkpoints = [] + + # Add checkpoint directories + checkpoint_dirs = sorted( + [d for d in output_dir_path.glob("checkpoint-*") if d.is_dir()], + key=lambda x: int(x.name.split("-")[1]), + ) + + for epoch_number, checkpoint_dir in enumerate(checkpoint_dirs, start=1): + created_time = datetime.fromtimestamp(os.path.getctime(checkpoint_dir), tz=UTC) + checkpoint = Checkpoint( + identifier=checkpoint_dir.name, + created_at=created_time, + epoch=epoch_number, + post_training_job_id=job_uuid, + path=str(checkpoint_dir), + ) + checkpoints.append(checkpoint) + + # Add final model + final_model_path = output_dir_path / final_model_name + if final_model_path.exists(): + training_type = "sft" if final_model_name == "merged_model" else "dpo" + checkpoint = Checkpoint( + identifier=f"{model}-{training_type}-{config.n_epochs}", + created_at=datetime.now(UTC), + epoch=config.n_epochs, + post_training_job_id=job_uuid, + path=str(final_model_path), + ) + checkpoints.append(checkpoint) + + return checkpoints diff --git a/tests/integration/post_training/test_post_training.py b/tests/integration/post_training/test_post_training.py index 93ca4c32d..05f8717d6 100644 --- a/tests/integration/post_training/test_post_training.py +++ b/tests/integration/post_training/test_post_training.py @@ -13,6 +13,9 @@ import pytest from llama_stack.apis.post_training import ( DataConfig, + DatasetFormat, + DPOAlignmentConfig, + DPOLossType, LoraFinetuningConfig, TrainingConfig, ) @@ -51,6 +54,7 @@ sys.stdout.reconfigure(line_buffering=True) # +# SFT test class TestPostTraining: @pytest.mark.integration @pytest.mark.parametrize( @@ -90,7 +94,7 @@ class TestPostTraining: dataset_id=dataset.identifier, batch_size=1, shuffle=False, - data_format="instruct", + data_format=DatasetFormat.instruct, ) # setup training config with minimal settings @@ -132,6 +136,8 @@ class TestPostTraining: artifacts = llama_stack_client.post_training.job.artifacts(job_uuid=job_uuid) logger.info(f"Job artifacts: {artifacts}") + logger.info(f"Registered dataset with ID: {dataset.identifier}") + # TODO: Fix these tests to properly represent the Jobs API in training # # async def test_get_training_jobs(self, post_training_stack): @@ -159,3 +165,78 @@ class TestPostTraining: # assert job_artifacts.checkpoints[0].identifier == "instructlab/granite-7b-lab" # assert job_artifacts.checkpoints[0].epoch == 0 # assert "/.llama/checkpoints/Llama3.2-3B-Instruct-sft-0" in job_artifacts.checkpoints[0].path + + # DPO test + @pytest.mark.integration + @pytest.mark.parametrize( + "purpose, source", + [ + ( + "post-training/messages", + { + "type": "uri", + "uri": "huggingface://datasets/trl-internal-testing/hh-rlhf-helpful-base-trl-style?split=train[:20]", + }, + ), + ], + ) + @pytest.mark.timeout(360) + def test_preference_optimize(self, llama_stack_client, purpose, source): + logger.info("Starting DPO preference optimization test") + + # register preference dataset to train + dataset = llama_stack_client.datasets.register( + purpose=purpose, + source=source, + ) + logger.info(f"Registered preference dataset with ID: {dataset.identifier}") + + # DPO algorithm configuration + algorithm_config = DPOAlignmentConfig( + beta=0.1, + loss_type=DPOLossType.sigmoid, + ) + + data_config = DataConfig( + dataset_id=dataset.identifier, + batch_size=1, + shuffle=False, + data_format=DatasetFormat.dialog, # DPO datasets often use dialog format + ) + + # setup training config with minimal settings for DPO + training_config = TrainingConfig( + n_epochs=1, + data_config=data_config, + max_steps_per_epoch=1, # Just 2 steps for quick testing + gradient_accumulation_steps=1, + ) + + job_uuid = f"test-dpo-job-{uuid.uuid4()}" + logger.info(f"Starting DPO training job with UUID: {job_uuid}") + + # train with HuggingFace DPO implementation + _ = llama_stack_client.post_training.preference_optimize( + job_uuid=job_uuid, + finetuned_model="distilgpt2", # Much smaller model for faster CI testing + algorithm_config=algorithm_config, + training_config=training_config, + hyperparam_search_config={}, + logger_config={}, + ) + + while True: + status = llama_stack_client.post_training.job.status(job_uuid=job_uuid) + if not status: + logger.error("DPO job not found") + break + + logger.info(f"Current DPO status: {status}") + if status.status == "completed": + break + + logger.info("Waiting for DPO job to complete...") + time.sleep(10) # Increased sleep time to reduce polling frequency + + artifacts = llama_stack_client.post_training.job.artifacts(job_uuid=job_uuid) + logger.info(f"DPO job artifacts: {artifacts}")