From 2cf731faea3ea6250eda938010793e1bf9912a6f Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Mon, 16 Sep 2024 17:21:08 -0700
Subject: [PATCH] llama_toolchain -> llama_stack

---
 MANIFEST.in                                   |  6 ++--
 docs/cli_reference.md                         | 20 +++++------
 docs/getting_started.md                       | 22 ++++++------
 {llama_toolchain => llama_stack}/__init__.py  |  0
 .../agentic_system}/__init__.py               |  0
 .../agentic_system/api/__init__.py            |  0
 .../agentic_system/api/api.py                 |  8 ++---
 .../agentic_system/client.py                  |  4 +--
 .../agentic_system/event_logger.py            |  5 +--
 .../execute_with_custom_tools.py              | 10 +++---
 .../agentic_system/meta_reference/__init__.py |  2 +-
 .../meta_reference/agent_instance.py          | 12 +++----
 .../meta_reference/agentic_system.py          | 12 +++----
 .../agentic_system/meta_reference/config.py   |  0
 .../meta_reference/rag/context_retriever.py   |  4 +--
 .../agentic_system/meta_reference/safety.py   |  2 +-
 .../agentic_system/providers.py               |  6 ++--
 .../batch_inference}/__init__.py              |  0
 .../batch_inference/api/__init__.py           |  0
 .../batch_inference/api/api.py                |  2 +-
 .../scripts => llama_stack/cli}/__init__.py   |  0
 .../cli/download.py                           |  8 ++---
 {llama_toolchain => llama_stack}/cli/llama.py |  0
 .../cli/model/__init__.py                     |  0
 .../cli/model/describe.py                     |  8 ++---
 .../cli/model/download.py                     |  4 +--
 .../cli/model/list.py                         |  4 +--
 .../cli/model/model.py                        | 10 +++---
 .../cli/model/template.py                     |  4 +--
 .../cli/scripts}/__init__.py                  |  0
 .../scripts/install-wheel-from-presigned.sh   |  0
 .../cli/scripts/run.py                        |  0
 .../cli/stack/__init__.py                     |  0
 .../cli/stack/build.py                        | 22 ++++++------
 .../cli/stack/configure.py                    | 16 ++++-----
 .../cli/stack/list_apis.py                    |  6 ++--
 .../cli/stack/list_providers.py               |  8 ++---
 .../cli/stack/run.py                          | 10 +++---
 .../cli/stack/stack.py                        |  2 +-
 .../cli/subcommand.py                         |  0
 {llama_toolchain => llama_stack}/cli/table.py |  0
 .../core => llama_stack/common}/__init__.py   |  0
 .../common/config_dirs.py                     |  0
 .../common/deployment_types.py                |  0
 .../common/exec.py                            |  0
 .../common/model_utils.py                     |  0
 .../common/prompt_for_config.py               |  0
 .../common/serialize.py                       |  0
 .../common/training_types.py                  |  0
 .../conda/local-conda-example-build.yaml      |  2 +-
 .../local-fireworks-conda-example-build.yaml  |  0
 .../local-ollama-conda-example-build.yaml     |  0
 .../conda/local-tgi-conda-example-build.yaml  |  0
 .../local-together-conda-example-build.yaml   |  0
 .../docker/local-docker-example-build.yaml    |  2 +-
 .../core}/__init__.py                         |  0
 .../core/build_conda_env.sh                   |  0
 .../core/build_container.sh                   |  2 +-
 .../core/common.sh                            |  0
 .../core/configure.py                         |  8 ++---
 .../core/configure_container.sh               |  0
 .../core/datatypes.py                         |  4 +--
 .../core/distribution.py                      | 12 +++----
 .../core/dynamic.py                           |  2 +-
 .../core/package.py                           | 10 +++---
 .../core/server.py                            |  6 ++--
 .../core/start_conda_env.sh                   |  2 +-
 .../core/start_container.sh                   |  2 +-
 .../dataset/api/__init__.py                   |  0
 .../dataset/api/api.py                        |  0
 .../evaluations/api/__init__.py               |  0
 .../evaluations/api/api.py                    |  4 +--
 .../inference}/__init__.py                    |  0
 .../inference/adapters}/__init__.py           |  0
 .../inference/adapters/fireworks/__init__.py  |  0
 .../inference/adapters/fireworks/config.py    |  0
 .../inference/adapters/fireworks/fireworks.py |  4 +--
 .../inference/adapters/ollama/__init__.py     |  2 +-
 .../inference/adapters/ollama/ollama.py       |  4 +--
 .../inference/adapters/tgi/__init__.py        |  0
 .../inference/adapters/tgi/config.py          |  0
 .../inference/adapters/tgi/tgi.py             |  4 +--
 .../inference/adapters/together/__init__.py   |  0
 .../inference/adapters/together/config.py     |  0
 .../inference/adapters/together/together.py   |  4 +--
 .../inference/api/__init__.py                 |  0
 .../inference/api/api.py                      |  0
 .../inference/client.py                       |  4 +--
 .../inference/event_logger.py                 |  5 +--
 .../inference/meta_reference/__init__.py      |  0
 .../inference/meta_reference/config.py        |  2 +-
 .../inference/meta_reference/generation.py    |  6 ++--
 .../inference/meta_reference/inference.py     |  6 ++--
 .../meta_reference/model_parallel.py          |  0
 .../meta_reference/parallel_utils.py          |  0
 .../inference/prepare_messages.py             |  2 +-
 .../inference/providers.py                    | 20 +++++------
 .../inference/quantization/fp8_impls.py       |  0
 .../inference/quantization/loader.py          |  4 +--
 .../quantization/scripts/build_conda.sh       |  0
 .../scripts/quantize_checkpoint.py            |  0
 .../scripts/run_quantize_checkpoint.sh        |  0
 .../inference/quantization/test_fp8.py        |  0
 .../common => llama_stack/memory}/__init__.py |  0
 .../memory/adapters/chroma/__init__.py        |  2 +-
 .../memory/adapters/chroma/chroma.py          |  4 +--
 .../memory/adapters/pgvector/__init__.py      |  0
 .../memory/adapters/pgvector/config.py        |  0
 .../memory/adapters/pgvector/pgvector.py      |  4 +--
 .../memory/api/__init__.py                    |  0
 .../memory/api/api.py                         |  0
 .../memory/client.py                          |  2 +-
 .../memory/common}/__init__.py                |  0
 .../memory/common/file_utils.py               |  0
 .../memory/common/vector_store.py             |  2 +-
 .../memory/meta_reference}/__init__.py        |  0
 .../memory/meta_reference/faiss/__init__.py   |  0
 .../memory/meta_reference/faiss/config.py     |  0
 .../memory/meta_reference/faiss/faiss.py      |  6 ++--
 .../memory/providers.py                       | 12 +++----
 .../memory/router/__init__.py                 |  2 +-
 .../memory/router/router.py                   |  4 +--
 .../models/api/endpoints.py                   |  0
 .../post_training/api/__init__.py             |  0
 .../post_training/api/api.py                  |  4 +--
 .../reward_scoring/api/__init__.py            |  0
 .../reward_scoring/api/api.py                 |  0
 .../safety}/__init__.py                       |  0
 .../safety/api/__init__.py                    |  0
 .../safety/api/api.py                         |  2 +-
 .../safety/client.py                          |  4 +--
 .../safety/meta_reference/__init__.py         |  0
 .../safety/meta_reference/config.py           |  0
 .../safety/meta_reference/safety.py           |  4 +--
 .../safety/meta_reference/shields/__init__.py |  0
 .../safety/meta_reference/shields/base.py     |  2 +-
 .../meta_reference/shields/code_scanner.py    |  2 +-
 .../shields/contrib}/__init__.py              |  0
 .../shields/contrib/third_party_shield.py     |  2 +-
 .../meta_reference/shields/llama_guard.py     |  2 +-
 .../meta_reference/shields/prompt_guard.py    |  2 +-
 .../safety/providers.py                       |  6 ++--
 llama_stack/stack.py                          | 34 +++++++++++++++++++
 .../synthetic_data_generation/api/__init__.py |  0
 .../synthetic_data_generation/api/api.py      |  2 +-
 .../telemetry}/__init__.py                    |  0
 .../telemetry/api/__init__.py                 |  0
 .../telemetry/api/api.py                      |  0
 .../telemetry/console/__init__.py             |  0
 .../telemetry/console/config.py               |  0
 .../telemetry/console/console.py              |  2 +-
 .../telemetry/providers.py                    |  6 ++--
 .../telemetry/tracing.py                      |  2 +-
 .../custom => llama_stack/tools}/__init__.py  |  0
 .../tools/base.py                             |  2 +-
 .../tools/builtin.py                          |  4 +--
 .../tools/custom}/__init__.py                 |  0
 .../tools/custom/datatypes.py                 |  2 +-
 llama_stack/tools/ipython_tool/__init__.py    |  5 +++
 .../tools/ipython_tool/code_env_prefix.py     |  0
 .../tools/ipython_tool/code_execution.py      |  0
 .../ipython_tool/matplotlib_custom_backend.py |  0
 .../tools/ipython_tool/utils.py               |  0
 .../tools/safety.py                           |  6 ++--
 llama_toolchain/agentic_system/__init__.py    |  0
 llama_toolchain/stack.py                      | 34 -------------------
 rfcs/RFC-0001-llama-stack.md                  | 18 +++++-----
 rfcs/openapi_generator/README.md              |  2 +-
 rfcs/openapi_generator/generate.py            |  2 +-
 setup.py                                      |  6 ++--
 tests/example_custom_tool.py                  |  2 +-
 tests/test_e2e.py                             |  8 ++---
 tests/test_inference.py                       | 12 +++++--
 tests/test_ollama_inference.py                | 12 +++++--
 tests/test_prepare_messages.py                | 10 ++++--
 175 files changed, 300 insertions(+), 279 deletions(-)
 rename {llama_toolchain => llama_stack}/__init__.py (100%)
 rename {llama_toolchain/batch_inference => llama_stack/agentic_system}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/agentic_system/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/agentic_system/api/api.py (98%)
 rename {llama_toolchain => llama_stack}/agentic_system/client.py (97%)
 rename {llama_toolchain => llama_stack}/agentic_system/event_logger.py (98%)
 rename {llama_toolchain => llama_stack}/agentic_system/execute_with_custom_tools.py (91%)
 rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/__init__.py (92%)
 rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/agent_instance.py (98%)
 rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/agentic_system.py (93%)
 rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/config.py (100%)
 rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/rag/context_retriever.py (95%)
 rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/safety.py (98%)
 rename {llama_toolchain => llama_stack}/agentic_system/providers.py (76%)
 rename {llama_toolchain/cli => llama_stack/batch_inference}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/batch_inference/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/batch_inference/api/api.py (97%)
 rename {llama_toolchain/cli/scripts => llama_stack/cli}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/cli/download.py (97%)
 rename {llama_toolchain => llama_stack}/cli/llama.py (100%)
 rename {llama_toolchain => llama_stack}/cli/model/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/cli/model/describe.py (93%)
 rename {llama_toolchain => llama_stack}/cli/model/download.py (83%)
 rename {llama_toolchain => llama_stack}/cli/model/list.py (94%)
 rename {llama_toolchain => llama_stack}/cli/model/model.py (73%)
 rename {llama_toolchain => llama_stack}/cli/model/template.py (97%)
 rename {llama_toolchain/common => llama_stack/cli/scripts}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/cli/scripts/install-wheel-from-presigned.sh (100%)
 rename {llama_toolchain => llama_stack}/cli/scripts/run.py (100%)
 rename {llama_toolchain => llama_stack}/cli/stack/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/cli/stack/build.py (78%)
 rename {llama_toolchain => llama_stack}/cli/stack/configure.py (90%)
 rename {llama_toolchain => llama_stack}/cli/stack/list_apis.py (87%)
 rename {llama_toolchain => llama_stack}/cli/stack/list_providers.py (87%)
 rename {llama_toolchain => llama_stack}/cli/stack/run.py (91%)
 rename {llama_toolchain => llama_stack}/cli/stack/stack.py (94%)
 rename {llama_toolchain => llama_stack}/cli/subcommand.py (100%)
 rename {llama_toolchain => llama_stack}/cli/table.py (100%)
 rename {llama_toolchain/core => llama_stack/common}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/common/config_dirs.py (100%)
 rename {llama_toolchain => llama_stack}/common/deployment_types.py (100%)
 rename {llama_toolchain => llama_stack}/common/exec.py (100%)
 rename {llama_toolchain => llama_stack}/common/model_utils.py (100%)
 rename {llama_toolchain => llama_stack}/common/prompt_for_config.py (100%)
 rename {llama_toolchain => llama_stack}/common/serialize.py (100%)
 rename {llama_toolchain => llama_stack}/common/training_types.py (100%)
 rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-conda-example-build.yaml (72%)
 rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-fireworks-conda-example-build.yaml (100%)
 rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-ollama-conda-example-build.yaml (100%)
 rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-tgi-conda-example-build.yaml (100%)
 rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-together-conda-example-build.yaml (100%)
 rename {llama_toolchain => llama_stack}/configs/distributions/docker/local-docker-example-build.yaml (72%)
 rename {llama_toolchain/inference => llama_stack/core}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/core/build_conda_env.sh (100%)
 rename {llama_toolchain => llama_stack}/core/build_container.sh (97%)
 rename {llama_toolchain => llama_stack}/core/common.sh (100%)
 rename {llama_toolchain => llama_stack}/core/configure.py (92%)
 rename {llama_toolchain => llama_stack}/core/configure_container.sh (100%)
 rename {llama_toolchain => llama_stack}/core/datatypes.py (98%)
 rename {llama_toolchain => llama_stack}/core/distribution.py (84%)
 rename {llama_toolchain => llama_stack}/core/dynamic.py (97%)
 rename {llama_toolchain => llama_stack}/core/package.py (88%)
 rename {llama_toolchain => llama_stack}/core/server.py (98%)
 rename {llama_toolchain => llama_stack}/core/start_conda_env.sh (95%)
 rename {llama_toolchain => llama_stack}/core/start_container.sh (94%)
 rename {llama_toolchain => llama_stack}/dataset/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/dataset/api/api.py (100%)
 rename {llama_toolchain => llama_stack}/evaluations/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/evaluations/api/api.py (95%)
 rename {llama_toolchain/inference/adapters => llama_stack/inference}/__init__.py (100%)
 rename {llama_toolchain/memory => llama_stack/inference/adapters}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/inference/adapters/fireworks/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/inference/adapters/fireworks/config.py (100%)
 rename {llama_toolchain => llama_stack}/inference/adapters/fireworks/fireworks.py (98%)
 rename {llama_toolchain => llama_stack}/inference/adapters/ollama/__init__.py (86%)
 rename {llama_toolchain => llama_stack}/inference/adapters/ollama/ollama.py (98%)
 rename {llama_toolchain => llama_stack}/inference/adapters/tgi/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/inference/adapters/tgi/config.py (100%)
 rename {llama_toolchain => llama_stack}/inference/adapters/tgi/tgi.py (98%)
 rename {llama_toolchain => llama_stack}/inference/adapters/together/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/inference/adapters/together/config.py (100%)
 rename {llama_toolchain => llama_stack}/inference/adapters/together/together.py (98%)
 rename {llama_toolchain => llama_stack}/inference/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/inference/api/api.py (100%)
 rename {llama_toolchain => llama_stack}/inference/client.py (98%)
 rename {llama_toolchain => llama_stack}/inference/event_logger.py (97%)
 rename {llama_toolchain => llama_stack}/inference/meta_reference/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/inference/meta_reference/config.py (96%)
 rename {llama_toolchain => llama_stack}/inference/meta_reference/generation.py (98%)
 rename {llama_toolchain => llama_stack}/inference/meta_reference/inference.py (97%)
 rename {llama_toolchain => llama_stack}/inference/meta_reference/model_parallel.py (100%)
 rename {llama_toolchain => llama_stack}/inference/meta_reference/parallel_utils.py (100%)
 rename {llama_toolchain => llama_stack}/inference/prepare_messages.py (97%)
 rename {llama_toolchain => llama_stack}/inference/providers.py (67%)
 rename {llama_toolchain => llama_stack}/inference/quantization/fp8_impls.py (100%)
 rename {llama_toolchain => llama_stack}/inference/quantization/loader.py (97%)
 rename {llama_toolchain => llama_stack}/inference/quantization/scripts/build_conda.sh (100%)
 rename {llama_toolchain => llama_stack}/inference/quantization/scripts/quantize_checkpoint.py (100%)
 rename {llama_toolchain => llama_stack}/inference/quantization/scripts/run_quantize_checkpoint.sh (100%)
 rename {llama_toolchain => llama_stack}/inference/quantization/test_fp8.py (100%)
 rename {llama_toolchain/memory/common => llama_stack/memory}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/memory/adapters/chroma/__init__.py (86%)
 rename {llama_toolchain => llama_stack}/memory/adapters/chroma/chroma.py (97%)
 rename {llama_toolchain => llama_stack}/memory/adapters/pgvector/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/memory/adapters/pgvector/config.py (100%)
 rename {llama_toolchain => llama_stack}/memory/adapters/pgvector/pgvector.py (98%)
 rename {llama_toolchain => llama_stack}/memory/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/memory/api/api.py (100%)
 rename {llama_toolchain => llama_stack}/memory/client.py (98%)
 rename {llama_toolchain/memory/meta_reference => llama_stack/memory/common}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/memory/common/file_utils.py (100%)
 rename {llama_toolchain => llama_stack}/memory/common/vector_store.py (98%)
 rename {llama_toolchain/safety => llama_stack/memory/meta_reference}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/memory/meta_reference/faiss/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/memory/meta_reference/faiss/config.py (100%)
 rename {llama_toolchain => llama_stack}/memory/meta_reference/faiss/faiss.py (95%)
 rename {llama_toolchain => llama_stack}/memory/providers.py (69%)
 rename {llama_toolchain => llama_stack}/memory/router/__init__.py (90%)
 rename {llama_toolchain => llama_stack}/memory/router/router.py (96%)
 rename {llama_toolchain => llama_stack}/models/api/endpoints.py (100%)
 rename {llama_toolchain => llama_stack}/post_training/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/post_training/api/api.py (97%)
 rename {llama_toolchain => llama_stack}/reward_scoring/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/reward_scoring/api/api.py (100%)
 rename {llama_toolchain/safety/meta_reference/shields/contrib => llama_stack/safety}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/safety/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/safety/api/api.py (96%)
 rename {llama_toolchain => llama_stack}/safety/client.py (97%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/config.py (100%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/safety.py (96%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/base.py (97%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/code_scanner.py (95%)
 rename {llama_toolchain/telemetry => llama_stack/safety/meta_reference/shields/contrib}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/contrib/third_party_shield.py (93%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/llama_guard.py (99%)
 rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/prompt_guard.py (99%)
 rename {llama_toolchain => llama_stack}/safety/providers.py (71%)
 create mode 100644 llama_stack/stack.py
 rename {llama_toolchain => llama_stack}/synthetic_data_generation/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/synthetic_data_generation/api/api.py (96%)
 rename {llama_toolchain/tools => llama_stack/telemetry}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/telemetry/api/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/telemetry/api/api.py (100%)
 rename {llama_toolchain => llama_stack}/telemetry/console/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/telemetry/console/config.py (100%)
 rename {llama_toolchain => llama_stack}/telemetry/console/console.py (97%)
 rename {llama_toolchain => llama_stack}/telemetry/providers.py (69%)
 rename {llama_toolchain => llama_stack}/telemetry/tracing.py (99%)
 rename {llama_toolchain/tools/custom => llama_stack/tools}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/tools/base.py (90%)
 rename {llama_toolchain => llama_stack}/tools/builtin.py (99%)
 rename {llama_toolchain/tools/ipython_tool => llama_stack/tools/custom}/__init__.py (100%)
 rename {llama_toolchain => llama_stack}/tools/custom/datatypes.py (97%)
 create mode 100644 llama_stack/tools/ipython_tool/__init__.py
 rename {llama_toolchain => llama_stack}/tools/ipython_tool/code_env_prefix.py (100%)
 rename {llama_toolchain => llama_stack}/tools/ipython_tool/code_execution.py (100%)
 rename {llama_toolchain => llama_stack}/tools/ipython_tool/matplotlib_custom_backend.py (100%)
 rename {llama_toolchain => llama_stack}/tools/ipython_tool/utils.py (100%)
 rename {llama_toolchain => llama_stack}/tools/safety.py (88%)
 delete mode 100644 llama_toolchain/agentic_system/__init__.py
 delete mode 100644 llama_toolchain/stack.py

diff --git a/MANIFEST.in b/MANIFEST.in
index 4b76f85fe..bacea3148 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,4 @@
 include requirements.txt
-include llama_toolchain/data/*.yaml
-include llama_toolchain/core/*.sh
-include llama_toolchain/cli/scripts/*.sh
+include llama_stack/data/*.yaml
+include llama_stack/core/*.sh
+include llama_stack/cli/scripts/*.sh
diff --git a/docs/cli_reference.md b/docs/cli_reference.md
index 8921fc941..e62d03117 100644
--- a/docs/cli_reference.md
+++ b/docs/cli_reference.md
@@ -276,16 +276,16 @@ The following command and specifications allows you to get started with building
 ```
 llama stack build <path/to/config>
 ```
-- You will be required to pass in a file path to the build.config file (e.g. `./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_toolchain/configs/distributions/` folder.
+- You will be required to pass in a file path to the build.config file (e.g. `./llama_stack/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_stack/configs/distributions/` folder.
 
 The file will be of the contents
 ```
-$ cat ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml
+$ cat ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml
 
 name: 8b-instruct
 distribution_spec:
   distribution_type: local
-  description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
   docker_image: null
   providers:
     inference: meta-reference
@@ -311,7 +311,7 @@ After this step is complete, a file named `8b-instruct-build.yaml` will be gener
 To specify a different API provider, we can change the `distribution_spec` in our `<name>-build.yaml` config. For example, the following build spec allows you to build a distribution using TGI as the inference API provider.
 
 ```
-$ cat ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml
+$ cat ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml
 
 name: local-tgi-conda-example
 distribution_spec:
@@ -328,7 +328,7 @@ image_type: conda
 
 The following command allows you to build a distribution with TGI as the inference API provider, with the name `tgi`.
 ```
-llama stack build --config ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi
+llama stack build --config ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi
 ```
 
 We provide some example build configs to help you get started with building with different API providers.
@@ -337,11 +337,11 @@ We provide some example build configs to help you get started with building with
 To build a docker image, simply change the `image_type` to `docker` in our `<name>-build.yaml` file, and run `llama stack build --config <name>-build.yaml`.
 
 ```
-$ cat ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml
+$ cat ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml
 
 name: local-docker-example
 distribution_spec:
-  description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
   docker_image: null
   providers:
     inference: meta-reference
@@ -354,7 +354,7 @@ image_type: docker
 
 The following command allows you to build a Docker image with the name `docker-local`
 ```
-llama stack build --config ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml --name docker-local
+llama stack build --config ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml --name docker-local
 
 Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim
 WORKDIR /app
@@ -482,7 +482,7 @@ Once the server is setup, we can test it with a client to see the example output
 cd /path/to/llama-stack
 conda activate <env>  # any environment containing the llama-toolchain pip package will work
 
-python -m llama_toolchain.inference.client localhost 5000
+python -m llama_stack.inference.client localhost 5000
 ```
 
 This will run the chat completion client and query the distribution’s /inference/chat_completion API.
@@ -500,7 +500,7 @@ You know what's even more hilarious? People like you who think they can just Goo
 Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by:
 
 ```
-python -m llama_toolchain.safety.client localhost 5000
+python -m llama_stack.safety.client localhost 5000
 ```
 
 You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
diff --git a/docs/getting_started.md b/docs/getting_started.md
index a312b8f33..fe6b1c573 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -9,7 +9,7 @@ This guides allows you to quickly get started with building and running a Llama
 
 **`llama stack build`**
 ```
-llama stack build --config ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml --name my-local-llama-stack
+llama stack build --config ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml --name my-local-llama-stack
 ...
 ...
 Build spec configuration saved at ~/.llama/distributions/conda/my-local-llama-stack-build.yaml
@@ -97,16 +97,16 @@ The following command and specifications allows you to get started with building
 ```
 llama stack build <path/to/config>
 ```
-- You will be required to pass in a file path to the build.config file (e.g. `./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_toolchain/configs/distributions/` folder.
+- You will be required to pass in a file path to the build.config file (e.g. `./llama_stack/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_stack/configs/distributions/` folder.
 
 The file will be of the contents
 ```
-$ cat ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml
+$ cat ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml
 
 name: 8b-instruct
 distribution_spec:
   distribution_type: local
-  description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
   docker_image: null
   providers:
     inference: meta-reference
@@ -132,7 +132,7 @@ After this step is complete, a file named `8b-instruct-build.yaml` will be gener
 To specify a different API provider, we can change the `distribution_spec` in our `<name>-build.yaml` config. For example, the following build spec allows you to build a distribution using TGI as the inference API provider.
 
 ```
-$ cat ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml
+$ cat ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml
 
 name: local-tgi-conda-example
 distribution_spec:
@@ -149,7 +149,7 @@ image_type: conda
 
 The following command allows you to build a distribution with TGI as the inference API provider, with the name `tgi`.
 ```
-llama stack build --config ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi
+llama stack build --config ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi
 ```
 
 We provide some example build configs to help you get started with building with different API providers.
@@ -158,11 +158,11 @@ We provide some example build configs to help you get started with building with
 To build a docker image, simply change the `image_type` to `docker` in our `<name>-build.yaml` file, and run `llama stack build --config <name>-build.yaml`.
 
 ```
-$ cat ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml
+$ cat ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml
 
 name: local-docker-example
 distribution_spec:
-  description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
   docker_image: null
   providers:
     inference: meta-reference
@@ -175,7 +175,7 @@ image_type: docker
 
 The following command allows you to build a Docker image with the name `docker-local`
 ```
-llama stack build --config ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml --name docker-local
+llama stack build --config ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml --name docker-local
 
 Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim
 WORKDIR /app
@@ -296,7 +296,7 @@ Once the server is setup, we can test it with a client to see the example output
 cd /path/to/llama-stack
 conda activate <env>  # any environment containing the llama-toolchain pip package will work
 
-python -m llama_toolchain.inference.client localhost 5000
+python -m llama_stack.inference.client localhost 5000
 ```
 
 This will run the chat completion client and query the distribution’s /inference/chat_completion API.
@@ -314,7 +314,7 @@ You know what's even more hilarious? People like you who think they can just Goo
 Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by:
 
 ```
-python -m llama_toolchain.safety.client localhost 5000
+python -m llama_stack.safety.client localhost 5000
 ```
 
 You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
diff --git a/llama_toolchain/__init__.py b/llama_stack/__init__.py
similarity index 100%
rename from llama_toolchain/__init__.py
rename to llama_stack/__init__.py
diff --git a/llama_toolchain/batch_inference/__init__.py b/llama_stack/agentic_system/__init__.py
similarity index 100%
rename from llama_toolchain/batch_inference/__init__.py
rename to llama_stack/agentic_system/__init__.py
diff --git a/llama_toolchain/agentic_system/api/__init__.py b/llama_stack/agentic_system/api/__init__.py
similarity index 100%
rename from llama_toolchain/agentic_system/api/__init__.py
rename to llama_stack/agentic_system/api/__init__.py
diff --git a/llama_toolchain/agentic_system/api/api.py b/llama_stack/agentic_system/api/api.py
similarity index 98%
rename from llama_toolchain/agentic_system/api/api.py
rename to llama_stack/agentic_system/api/api.py
index 95af3727b..50920315f 100644
--- a/llama_toolchain/agentic_system/api/api.py
+++ b/llama_stack/agentic_system/api/api.py
@@ -14,10 +14,10 @@ from pydantic import BaseModel, ConfigDict, Field
 from typing_extensions import Annotated
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.common.deployment_types import *  # noqa: F403
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.safety.api import *  # noqa: F403
-from llama_toolchain.memory.api import *  # noqa: F403
+from llama_stack.common.deployment_types import *  # noqa: F403
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.safety.api import *  # noqa: F403
+from llama_stack.memory.api import *  # noqa: F403
 
 
 @json_schema_type
diff --git a/llama_toolchain/agentic_system/client.py b/llama_stack/agentic_system/client.py
similarity index 97%
rename from llama_toolchain/agentic_system/client.py
rename to llama_stack/agentic_system/client.py
index 52cf0dee2..49d887439 100644
--- a/llama_toolchain/agentic_system/client.py
+++ b/llama_stack/agentic_system/client.py
@@ -16,7 +16,7 @@ from pydantic import BaseModel
 from termcolor import cprint
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.core.datatypes import RemoteProviderConfig
+from llama_stack.core.datatypes import RemoteProviderConfig
 
 from .api import *  # noqa: F403
 from .event_logger import EventLogger
@@ -186,7 +186,7 @@ async def run_rag(host: str, port: int):
     ]
 
     # Alternatively, you can pre-populate the memory bank with documents for example,
-    # using `llama_toolchain.memory.client`. Then you can grab the bank_id
+    # using `llama_stack.memory.client`. Then you can grab the bank_id
     # from the output of that run.
     tool_definitions = [
         MemoryToolDefinition(
diff --git a/llama_toolchain/agentic_system/event_logger.py b/llama_stack/agentic_system/event_logger.py
similarity index 98%
rename from llama_toolchain/agentic_system/event_logger.py
rename to llama_stack/agentic_system/event_logger.py
index 3d15ee239..c0bd89ee2 100644
--- a/llama_toolchain/agentic_system/event_logger.py
+++ b/llama_stack/agentic_system/event_logger.py
@@ -11,10 +11,7 @@ from llama_models.llama3.api.tool_utils import ToolUtils
 
 from termcolor import cprint
 
-from llama_toolchain.agentic_system.api import (
-    AgenticSystemTurnResponseEventType,
-    StepType,
-)
+from llama_stack.agentic_system.api import AgenticSystemTurnResponseEventType, StepType
 
 
 class LogEvent:
diff --git a/llama_toolchain/agentic_system/execute_with_custom_tools.py b/llama_stack/agentic_system/execute_with_custom_tools.py
similarity index 91%
rename from llama_toolchain/agentic_system/execute_with_custom_tools.py
rename to llama_stack/agentic_system/execute_with_custom_tools.py
index e8038bc20..fe9ef17b3 100644
--- a/llama_toolchain/agentic_system/execute_with_custom_tools.py
+++ b/llama_stack/agentic_system/execute_with_custom_tools.py
@@ -7,14 +7,14 @@
 from typing import AsyncGenerator, List
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.agentic_system.api import *  # noqa: F403
-from llama_toolchain.memory.api import *  # noqa: F403
-from llama_toolchain.safety.api import *  # noqa: F403
+from llama_stack.agentic_system.api import *  # noqa: F403
+from llama_stack.memory.api import *  # noqa: F403
+from llama_stack.safety.api import *  # noqa: F403
 
-from llama_toolchain.agentic_system.api import (
+from llama_stack.agentic_system.api import (
     AgenticSystemTurnResponseEventType as EventType,
 )
-from llama_toolchain.tools.custom.datatypes import CustomTool
+from llama_stack.tools.custom.datatypes import CustomTool
 
 
 class AgentWithCustomToolExecutor:
diff --git a/llama_toolchain/agentic_system/meta_reference/__init__.py b/llama_stack/agentic_system/meta_reference/__init__.py
similarity index 92%
rename from llama_toolchain/agentic_system/meta_reference/__init__.py
rename to llama_stack/agentic_system/meta_reference/__init__.py
index b49cc4c84..686708c0c 100644
--- a/llama_toolchain/agentic_system/meta_reference/__init__.py
+++ b/llama_stack/agentic_system/meta_reference/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Dict
 
-from llama_toolchain.core.datatypes import Api, ProviderSpec
+from llama_stack.core.datatypes import Api, ProviderSpec
 
 from .config import MetaReferenceImplConfig
 
diff --git a/llama_toolchain/agentic_system/meta_reference/agent_instance.py b/llama_stack/agentic_system/meta_reference/agent_instance.py
similarity index 98%
rename from llama_toolchain/agentic_system/meta_reference/agent_instance.py
rename to llama_stack/agentic_system/meta_reference/agent_instance.py
index 202f42a3c..141174cce 100644
--- a/llama_toolchain/agentic_system/meta_reference/agent_instance.py
+++ b/llama_stack/agentic_system/meta_reference/agent_instance.py
@@ -20,13 +20,13 @@ import httpx
 
 from termcolor import cprint
 
-from llama_toolchain.agentic_system.api import *  # noqa: F403
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.memory.api import *  # noqa: F403
-from llama_toolchain.safety.api import *  # noqa: F403
+from llama_stack.agentic_system.api import *  # noqa: F403
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.memory.api import *  # noqa: F403
+from llama_stack.safety.api import *  # noqa: F403
 
-from llama_toolchain.tools.base import BaseTool
-from llama_toolchain.tools.builtin import (
+from llama_stack.tools.base import BaseTool
+from llama_stack.tools.builtin import (
     interpret_content_as_attachment,
     SingleMessageBuiltinTool,
 )
diff --git a/llama_toolchain/agentic_system/meta_reference/agentic_system.py b/llama_stack/agentic_system/meta_reference/agentic_system.py
similarity index 93%
rename from llama_toolchain/agentic_system/meta_reference/agentic_system.py
rename to llama_stack/agentic_system/meta_reference/agentic_system.py
index 3990ab58a..7f624cfa6 100644
--- a/llama_toolchain/agentic_system/meta_reference/agentic_system.py
+++ b/llama_stack/agentic_system/meta_reference/agentic_system.py
@@ -10,17 +10,17 @@ import tempfile
 import uuid
 from typing import AsyncGenerator
 
-from llama_toolchain.inference.api import Inference
-from llama_toolchain.memory.api import Memory
-from llama_toolchain.safety.api import Safety
-from llama_toolchain.agentic_system.api import *  # noqa: F403
-from llama_toolchain.tools.builtin import (
+from llama_stack.inference.api import Inference
+from llama_stack.memory.api import Memory
+from llama_stack.safety.api import Safety
+from llama_stack.agentic_system.api import *  # noqa: F403
+from llama_stack.tools.builtin import (
     CodeInterpreterTool,
     PhotogenTool,
     SearchTool,
     WolframAlphaTool,
 )
-from llama_toolchain.tools.safety import with_safety
+from llama_stack.tools.safety import with_safety
 
 from .agent_instance import ChatAgent
 from .config import MetaReferenceImplConfig
diff --git a/llama_toolchain/agentic_system/meta_reference/config.py b/llama_stack/agentic_system/meta_reference/config.py
similarity index 100%
rename from llama_toolchain/agentic_system/meta_reference/config.py
rename to llama_stack/agentic_system/meta_reference/config.py
diff --git a/llama_toolchain/agentic_system/meta_reference/rag/context_retriever.py b/llama_stack/agentic_system/meta_reference/rag/context_retriever.py
similarity index 95%
rename from llama_toolchain/agentic_system/meta_reference/rag/context_retriever.py
rename to llama_stack/agentic_system/meta_reference/rag/context_retriever.py
index afcc6afd1..7723fe5a5 100644
--- a/llama_toolchain/agentic_system/meta_reference/rag/context_retriever.py
+++ b/llama_stack/agentic_system/meta_reference/rag/context_retriever.py
@@ -10,14 +10,14 @@ from jinja2 import Template
 from llama_models.llama3.api import *  # noqa: F403
 
 
-from llama_toolchain.agentic_system.api import (
+from llama_stack.agentic_system.api import (
     DefaultMemoryQueryGeneratorConfig,
     LLMMemoryQueryGeneratorConfig,
     MemoryQueryGenerator,
     MemoryQueryGeneratorConfig,
 )
 from termcolor import cprint  # noqa: F401
-from llama_toolchain.inference.api import *  # noqa: F403
+from llama_stack.inference.api import *  # noqa: F403
 
 
 async def generate_rag_query(
diff --git a/llama_toolchain/agentic_system/meta_reference/safety.py b/llama_stack/agentic_system/meta_reference/safety.py
similarity index 98%
rename from llama_toolchain/agentic_system/meta_reference/safety.py
rename to llama_stack/agentic_system/meta_reference/safety.py
index 4bbb1f2f1..74da5c2cc 100644
--- a/llama_toolchain/agentic_system/meta_reference/safety.py
+++ b/llama_stack/agentic_system/meta_reference/safety.py
@@ -9,7 +9,7 @@ from typing import List
 from llama_models.llama3.api.datatypes import Message, Role, UserMessage
 from termcolor import cprint
 
-from llama_toolchain.safety.api import (
+from llama_stack.safety.api import (
     OnViolationAction,
     RunShieldRequest,
     Safety,
diff --git a/llama_toolchain/agentic_system/providers.py b/llama_stack/agentic_system/providers.py
similarity index 76%
rename from llama_toolchain/agentic_system/providers.py
rename to llama_stack/agentic_system/providers.py
index 79e66d15e..7db33c95d 100644
--- a/llama_toolchain/agentic_system/providers.py
+++ b/llama_stack/agentic_system/providers.py
@@ -6,7 +6,7 @@
 
 from typing import List
 
-from llama_toolchain.core.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack.core.datatypes import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> List[ProviderSpec]:
@@ -23,8 +23,8 @@ def available_providers() -> List[ProviderSpec]:
                 "torch",
                 "transformers",
             ],
-            module="llama_toolchain.agentic_system.meta_reference",
-            config_class="llama_toolchain.agentic_system.meta_reference.MetaReferenceImplConfig",
+            module="llama_stack.agentic_system.meta_reference",
+            config_class="llama_stack.agentic_system.meta_reference.MetaReferenceImplConfig",
             api_dependencies=[
                 Api.inference,
                 Api.safety,
diff --git a/llama_toolchain/cli/__init__.py b/llama_stack/batch_inference/__init__.py
similarity index 100%
rename from llama_toolchain/cli/__init__.py
rename to llama_stack/batch_inference/__init__.py
diff --git a/llama_toolchain/batch_inference/api/__init__.py b/llama_stack/batch_inference/api/__init__.py
similarity index 100%
rename from llama_toolchain/batch_inference/api/__init__.py
rename to llama_stack/batch_inference/api/__init__.py
diff --git a/llama_toolchain/batch_inference/api/api.py b/llama_stack/batch_inference/api/api.py
similarity index 97%
rename from llama_toolchain/batch_inference/api/api.py
rename to llama_stack/batch_inference/api/api.py
index 3d67120dd..3c8f9c18b 100644
--- a/llama_toolchain/batch_inference/api/api.py
+++ b/llama_stack/batch_inference/api/api.py
@@ -11,7 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
 from pydantic import BaseModel, Field
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.inference.api import *  # noqa: F403
+from llama_stack.inference.api import *  # noqa: F403
 
 
 @json_schema_type
diff --git a/llama_toolchain/cli/scripts/__init__.py b/llama_stack/cli/__init__.py
similarity index 100%
rename from llama_toolchain/cli/scripts/__init__.py
rename to llama_stack/cli/__init__.py
diff --git a/llama_toolchain/cli/download.py b/llama_stack/cli/download.py
similarity index 97%
rename from llama_toolchain/cli/download.py
rename to llama_stack/cli/download.py
index 1bfa89fc6..1e75459a1 100644
--- a/llama_toolchain/cli/download.py
+++ b/llama_stack/cli/download.py
@@ -20,7 +20,7 @@ from pydantic import BaseModel
 
 from termcolor import cprint
 
-from llama_toolchain.cli.subcommand import Subcommand
+from llama_stack.cli.subcommand import Subcommand
 
 
 class Download(Subcommand):
@@ -92,7 +92,7 @@ def _hf_download(
     from huggingface_hub import snapshot_download
     from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
 
-    from llama_toolchain.common.model_utils import model_local_dir
+    from llama_stack.common.model_utils import model_local_dir
 
     repo_id = model.huggingface_repo
     if repo_id is None:
@@ -126,7 +126,7 @@ def _hf_download(
 def _meta_download(model: "Model", meta_url: str):
     from llama_models.sku_list import llama_meta_net_info
 
-    from llama_toolchain.common.model_utils import model_local_dir
+    from llama_stack.common.model_utils import model_local_dir
 
     output_dir = Path(model_local_dir(model.descriptor()))
     os.makedirs(output_dir, exist_ok=True)
@@ -188,7 +188,7 @@ class Manifest(BaseModel):
 
 
 def _download_from_manifest(manifest_file: str):
-    from llama_toolchain.common.model_utils import model_local_dir
+    from llama_stack.common.model_utils import model_local_dir
 
     with open(manifest_file, "r") as f:
         d = json.load(f)
diff --git a/llama_toolchain/cli/llama.py b/llama_stack/cli/llama.py
similarity index 100%
rename from llama_toolchain/cli/llama.py
rename to llama_stack/cli/llama.py
diff --git a/llama_toolchain/cli/model/__init__.py b/llama_stack/cli/model/__init__.py
similarity index 100%
rename from llama_toolchain/cli/model/__init__.py
rename to llama_stack/cli/model/__init__.py
diff --git a/llama_toolchain/cli/model/describe.py b/llama_stack/cli/model/describe.py
similarity index 93%
rename from llama_toolchain/cli/model/describe.py
rename to llama_stack/cli/model/describe.py
index 683995f7b..24af7dd14 100644
--- a/llama_toolchain/cli/model/describe.py
+++ b/llama_stack/cli/model/describe.py
@@ -9,12 +9,12 @@ import json
 
 from llama_models.sku_list import resolve_model
 
-from llama_toolchain.cli.subcommand import Subcommand
-from llama_toolchain.cli.table import print_table
-from llama_toolchain.common.serialize import EnumEncoder
-
 from termcolor import colored
 
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
+from llama_stack.common.serialize import EnumEncoder
+
 
 class ModelDescribe(Subcommand):
     """Show details about a model"""
diff --git a/llama_toolchain/cli/model/download.py b/llama_stack/cli/model/download.py
similarity index 83%
rename from llama_toolchain/cli/model/download.py
rename to llama_stack/cli/model/download.py
index ac3c791b4..a3b8f7796 100644
--- a/llama_toolchain/cli/model/download.py
+++ b/llama_stack/cli/model/download.py
@@ -6,7 +6,7 @@
 
 import argparse
 
-from llama_toolchain.cli.subcommand import Subcommand
+from llama_stack.cli.subcommand import Subcommand
 
 
 class ModelDownload(Subcommand):
@@ -19,6 +19,6 @@ class ModelDownload(Subcommand):
             formatter_class=argparse.RawTextHelpFormatter,
         )
 
-        from llama_toolchain.cli.download import setup_download_parser
+        from llama_stack.cli.download import setup_download_parser
 
         setup_download_parser(self.parser)
diff --git a/llama_toolchain/cli/model/list.py b/llama_stack/cli/model/list.py
similarity index 94%
rename from llama_toolchain/cli/model/list.py
rename to llama_stack/cli/model/list.py
index f989260ab..977590d7a 100644
--- a/llama_toolchain/cli/model/list.py
+++ b/llama_stack/cli/model/list.py
@@ -8,8 +8,8 @@ import argparse
 
 from llama_models.sku_list import all_registered_models
 
-from llama_toolchain.cli.subcommand import Subcommand
-from llama_toolchain.cli.table import print_table
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.cli.table import print_table
 
 
 class ModelList(Subcommand):
diff --git a/llama_toolchain/cli/model/model.py b/llama_stack/cli/model/model.py
similarity index 73%
rename from llama_toolchain/cli/model/model.py
rename to llama_stack/cli/model/model.py
index 9a14450ad..c222c1d63 100644
--- a/llama_toolchain/cli/model/model.py
+++ b/llama_stack/cli/model/model.py
@@ -6,12 +6,12 @@
 
 import argparse
 
-from llama_toolchain.cli.model.describe import ModelDescribe
-from llama_toolchain.cli.model.download import ModelDownload
-from llama_toolchain.cli.model.list import ModelList
-from llama_toolchain.cli.model.template import ModelTemplate
+from llama_stack.cli.model.describe import ModelDescribe
+from llama_stack.cli.model.download import ModelDownload
+from llama_stack.cli.model.list import ModelList
+from llama_stack.cli.model.template import ModelTemplate
 
-from llama_toolchain.cli.subcommand import Subcommand
+from llama_stack.cli.subcommand import Subcommand
 
 
 class ModelParser(Subcommand):
diff --git a/llama_toolchain/cli/model/template.py b/llama_stack/cli/model/template.py
similarity index 97%
rename from llama_toolchain/cli/model/template.py
rename to llama_stack/cli/model/template.py
index 2776d9703..d828660bb 100644
--- a/llama_toolchain/cli/model/template.py
+++ b/llama_stack/cli/model/template.py
@@ -9,7 +9,7 @@ import textwrap
 
 from termcolor import colored
 
-from llama_toolchain.cli.subcommand import Subcommand
+from llama_stack.cli.subcommand import Subcommand
 
 
 class ModelTemplate(Subcommand):
@@ -75,7 +75,7 @@ class ModelTemplate(Subcommand):
             render_jinja_template,
         )
 
-        from llama_toolchain.cli.table import print_table
+        from llama_stack.cli.table import print_table
 
         if args.name:
             tool_prompt_format = self._prompt_type(args.format)
diff --git a/llama_toolchain/common/__init__.py b/llama_stack/cli/scripts/__init__.py
similarity index 100%
rename from llama_toolchain/common/__init__.py
rename to llama_stack/cli/scripts/__init__.py
diff --git a/llama_toolchain/cli/scripts/install-wheel-from-presigned.sh b/llama_stack/cli/scripts/install-wheel-from-presigned.sh
similarity index 100%
rename from llama_toolchain/cli/scripts/install-wheel-from-presigned.sh
rename to llama_stack/cli/scripts/install-wheel-from-presigned.sh
diff --git a/llama_toolchain/cli/scripts/run.py b/llama_stack/cli/scripts/run.py
similarity index 100%
rename from llama_toolchain/cli/scripts/run.py
rename to llama_stack/cli/scripts/run.py
diff --git a/llama_toolchain/cli/stack/__init__.py b/llama_stack/cli/stack/__init__.py
similarity index 100%
rename from llama_toolchain/cli/stack/__init__.py
rename to llama_stack/cli/stack/__init__.py
diff --git a/llama_toolchain/cli/stack/build.py b/llama_stack/cli/stack/build.py
similarity index 78%
rename from llama_toolchain/cli/stack/build.py
rename to llama_stack/cli/stack/build.py
index 78e013219..da4cb6161 100644
--- a/llama_toolchain/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@@ -6,8 +6,8 @@
 
 import argparse
 
-from llama_toolchain.cli.subcommand import Subcommand
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.core.datatypes import *  # noqa: F403
 from pathlib import Path
 
 import yaml
@@ -29,7 +29,7 @@ class StackBuild(Subcommand):
         self.parser.add_argument(
             "config",
             type=str,
-            help="Path to a config file to use for the build. You may find example configs in llama_toolchain/configs/distributions",
+            help="Path to a config file to use for the build. You may find example configs in llama_stack/configs/distributions",
         )
 
         self.parser.add_argument(
@@ -44,17 +44,17 @@ class StackBuild(Subcommand):
         import json
         import os
 
-        from llama_toolchain.common.config_dirs import DISTRIBS_BASE_DIR
-        from llama_toolchain.common.serialize import EnumEncoder
-        from llama_toolchain.core.package import ApiInput, build_image, ImageType
+        from llama_stack.common.config_dirs import DISTRIBS_BASE_DIR
+        from llama_stack.common.serialize import EnumEncoder
+        from llama_stack.core.package import ApiInput, build_image, ImageType
         from termcolor import cprint
 
         # save build.yaml spec for building same distribution again
         if build_config.image_type == ImageType.docker.value:
             # docker needs build file to be in the llama-stack repo dir to be able to copy over to the image
-            llama_toolchain_path = Path(os.path.relpath(__file__)).parent.parent.parent
+            llama_stack_path = Path(os.path.relpath(__file__)).parent.parent.parent
             build_dir = (
-                llama_toolchain_path / "configs/distributions" / build_config.image_type
+                llama_stack_path / "configs/distributions" / build_config.image_type
             )
         else:
             build_dir = DISTRIBS_BASE_DIR / build_config.image_type
@@ -74,12 +74,12 @@ class StackBuild(Subcommand):
         )
 
     def _run_stack_build_command(self, args: argparse.Namespace) -> None:
-        from llama_toolchain.common.prompt_for_config import prompt_for_config
-        from llama_toolchain.core.dynamic import instantiate_class_type
+        from llama_stack.common.prompt_for_config import prompt_for_config
+        from llama_stack.core.dynamic import instantiate_class_type
 
         if not args.config:
             self.parser.error(
-                "No config file specified. Please use `llama stack build /path/to/*-build.yaml`. Example config files can be found in llama_toolchain/configs/distributions"
+                "No config file specified. Please use `llama stack build /path/to/*-build.yaml`. Example config files can be found in llama_stack/configs/distributions"
             )
             return
 
diff --git a/llama_toolchain/cli/stack/configure.py b/llama_stack/cli/stack/configure.py
similarity index 90%
rename from llama_toolchain/cli/stack/configure.py
rename to llama_stack/cli/stack/configure.py
index 53c9622e7..6a1e7c740 100644
--- a/llama_toolchain/cli/stack/configure.py
+++ b/llama_stack/cli/stack/configure.py
@@ -13,11 +13,11 @@ import pkg_resources
 import yaml
 from termcolor import cprint
 
-from llama_toolchain.cli.subcommand import Subcommand
-from llama_toolchain.common.config_dirs import BUILDS_BASE_DIR
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.common.config_dirs import BUILDS_BASE_DIR
 
-from llama_toolchain.common.exec import run_with_pty
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.common.exec import run_with_pty
+from llama_stack.core.datatypes import *  # noqa: F403
 import os
 
 
@@ -49,7 +49,7 @@ class StackConfigure(Subcommand):
         )
 
     def _run_stack_configure_cmd(self, args: argparse.Namespace) -> None:
-        from llama_toolchain.core.package import ImageType
+        from llama_stack.core.package import ImageType
 
         docker_image = None
         build_config_file = Path(args.config)
@@ -66,7 +66,7 @@ class StackConfigure(Subcommand):
             os.makedirs(builds_dir, exist_ok=True)
 
             script = pkg_resources.resource_filename(
-                "llama_toolchain", "core/configure_container.sh"
+                "llama_stack", "core/configure_container.sh"
             )
             script_args = [script, docker_image, str(builds_dir)]
 
@@ -95,8 +95,8 @@ class StackConfigure(Subcommand):
         build_config: BuildConfig,
         output_dir: Optional[str] = None,
     ):
-        from llama_toolchain.common.serialize import EnumEncoder
-        from llama_toolchain.core.configure import configure_api_providers
+        from llama_stack.common.serialize import EnumEncoder
+        from llama_stack.core.configure import configure_api_providers
 
         builds_dir = BUILDS_BASE_DIR / build_config.image_type
         if output_dir:
diff --git a/llama_toolchain/cli/stack/list_apis.py b/llama_stack/cli/stack/list_apis.py
similarity index 87%
rename from llama_toolchain/cli/stack/list_apis.py
rename to llama_stack/cli/stack/list_apis.py
index f13ecefe9..6eed5ca51 100644
--- a/llama_toolchain/cli/stack/list_apis.py
+++ b/llama_stack/cli/stack/list_apis.py
@@ -6,7 +6,7 @@
 
 import argparse
 
-from llama_toolchain.cli.subcommand import Subcommand
+from llama_stack.cli.subcommand import Subcommand
 
 
 class StackListApis(Subcommand):
@@ -25,8 +25,8 @@ class StackListApis(Subcommand):
         pass
 
     def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
-        from llama_toolchain.cli.table import print_table
-        from llama_toolchain.core.distribution import stack_apis
+        from llama_stack.cli.table import print_table
+        from llama_stack.core.distribution import stack_apis
 
         # eventually, this should query a registry at llama.meta.com/llamastack/distributions
         headers = [
diff --git a/llama_toolchain/cli/stack/list_providers.py b/llama_stack/cli/stack/list_providers.py
similarity index 87%
rename from llama_toolchain/cli/stack/list_providers.py
rename to llama_stack/cli/stack/list_providers.py
index a5640677d..7568c69c9 100644
--- a/llama_toolchain/cli/stack/list_providers.py
+++ b/llama_stack/cli/stack/list_providers.py
@@ -6,7 +6,7 @@
 
 import argparse
 
-from llama_toolchain.cli.subcommand import Subcommand
+from llama_stack.cli.subcommand import Subcommand
 
 
 class StackListProviders(Subcommand):
@@ -22,7 +22,7 @@ class StackListProviders(Subcommand):
         self.parser.set_defaults(func=self._run_providers_list_cmd)
 
     def _add_arguments(self):
-        from llama_toolchain.core.distribution import stack_apis
+        from llama_stack.core.distribution import stack_apis
 
         api_values = [a.value for a in stack_apis()]
         self.parser.add_argument(
@@ -33,8 +33,8 @@ class StackListProviders(Subcommand):
         )
 
     def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
-        from llama_toolchain.cli.table import print_table
-        from llama_toolchain.core.distribution import Api, api_providers
+        from llama_stack.cli.table import print_table
+        from llama_stack.core.distribution import Api, api_providers
 
         all_providers = api_providers()
         providers_for_api = all_providers[Api(args.api)]
diff --git a/llama_toolchain/cli/stack/run.py b/llama_stack/cli/stack/run.py
similarity index 91%
rename from llama_toolchain/cli/stack/run.py
rename to llama_stack/cli/stack/run.py
index b5900eaba..614132f61 100644
--- a/llama_toolchain/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -11,8 +11,8 @@ from pathlib import Path
 import pkg_resources
 import yaml
 
-from llama_toolchain.cli.subcommand import Subcommand
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.cli.subcommand import Subcommand
+from llama_stack.core.datatypes import *  # noqa: F403
 
 
 class StackRun(Subcommand):
@@ -47,7 +47,7 @@ class StackRun(Subcommand):
         )
 
     def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
-        from llama_toolchain.common.exec import run_with_pty
+        from llama_stack.common.exec import run_with_pty
 
         if not args.config:
             self.parser.error("Must specify a config file to run")
@@ -67,13 +67,13 @@ class StackRun(Subcommand):
 
         if config.docker_image:
             script = pkg_resources.resource_filename(
-                "llama_toolchain",
+                "llama_stack",
                 "core/start_container.sh",
             )
             run_args = [script, config.docker_image]
         else:
             script = pkg_resources.resource_filename(
-                "llama_toolchain",
+                "llama_stack",
                 "core/start_conda_env.sh",
             )
             run_args = [
diff --git a/llama_toolchain/cli/stack/stack.py b/llama_stack/cli/stack/stack.py
similarity index 94%
rename from llama_toolchain/cli/stack/stack.py
rename to llama_stack/cli/stack/stack.py
index 0e4abb5a2..c359d27ec 100644
--- a/llama_toolchain/cli/stack/stack.py
+++ b/llama_stack/cli/stack/stack.py
@@ -6,7 +6,7 @@
 
 import argparse
 
-from llama_toolchain.cli.subcommand import Subcommand
+from llama_stack.cli.subcommand import Subcommand
 
 from .build import StackBuild
 from .configure import StackConfigure
diff --git a/llama_toolchain/cli/subcommand.py b/llama_stack/cli/subcommand.py
similarity index 100%
rename from llama_toolchain/cli/subcommand.py
rename to llama_stack/cli/subcommand.py
diff --git a/llama_toolchain/cli/table.py b/llama_stack/cli/table.py
similarity index 100%
rename from llama_toolchain/cli/table.py
rename to llama_stack/cli/table.py
diff --git a/llama_toolchain/core/__init__.py b/llama_stack/common/__init__.py
similarity index 100%
rename from llama_toolchain/core/__init__.py
rename to llama_stack/common/__init__.py
diff --git a/llama_toolchain/common/config_dirs.py b/llama_stack/common/config_dirs.py
similarity index 100%
rename from llama_toolchain/common/config_dirs.py
rename to llama_stack/common/config_dirs.py
diff --git a/llama_toolchain/common/deployment_types.py b/llama_stack/common/deployment_types.py
similarity index 100%
rename from llama_toolchain/common/deployment_types.py
rename to llama_stack/common/deployment_types.py
diff --git a/llama_toolchain/common/exec.py b/llama_stack/common/exec.py
similarity index 100%
rename from llama_toolchain/common/exec.py
rename to llama_stack/common/exec.py
diff --git a/llama_toolchain/common/model_utils.py b/llama_stack/common/model_utils.py
similarity index 100%
rename from llama_toolchain/common/model_utils.py
rename to llama_stack/common/model_utils.py
diff --git a/llama_toolchain/common/prompt_for_config.py b/llama_stack/common/prompt_for_config.py
similarity index 100%
rename from llama_toolchain/common/prompt_for_config.py
rename to llama_stack/common/prompt_for_config.py
diff --git a/llama_toolchain/common/serialize.py b/llama_stack/common/serialize.py
similarity index 100%
rename from llama_toolchain/common/serialize.py
rename to llama_stack/common/serialize.py
diff --git a/llama_toolchain/common/training_types.py b/llama_stack/common/training_types.py
similarity index 100%
rename from llama_toolchain/common/training_types.py
rename to llama_stack/common/training_types.py
diff --git a/llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-conda-example-build.yaml
similarity index 72%
rename from llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml
rename to llama_stack/configs/distributions/conda/local-conda-example-build.yaml
index 2a25cb9dd..d588b6b5f 100644
--- a/llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml
+++ b/llama_stack/configs/distributions/conda/local-conda-example-build.yaml
@@ -1,6 +1,6 @@
 name: local-conda-example
 distribution_spec:
-  description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
   providers:
     inference: meta-reference
     memory: meta-reference-faiss
diff --git a/llama_toolchain/configs/distributions/conda/local-fireworks-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-fireworks-conda-example-build.yaml
similarity index 100%
rename from llama_toolchain/configs/distributions/conda/local-fireworks-conda-example-build.yaml
rename to llama_stack/configs/distributions/conda/local-fireworks-conda-example-build.yaml
diff --git a/llama_toolchain/configs/distributions/conda/local-ollama-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-ollama-conda-example-build.yaml
similarity index 100%
rename from llama_toolchain/configs/distributions/conda/local-ollama-conda-example-build.yaml
rename to llama_stack/configs/distributions/conda/local-ollama-conda-example-build.yaml
diff --git a/llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml
similarity index 100%
rename from llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml
rename to llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml
diff --git a/llama_toolchain/configs/distributions/conda/local-together-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-together-conda-example-build.yaml
similarity index 100%
rename from llama_toolchain/configs/distributions/conda/local-together-conda-example-build.yaml
rename to llama_stack/configs/distributions/conda/local-together-conda-example-build.yaml
diff --git a/llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml b/llama_stack/configs/distributions/docker/local-docker-example-build.yaml
similarity index 72%
rename from llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml
rename to llama_stack/configs/distributions/docker/local-docker-example-build.yaml
index 0bdb18802..f868aa98f 100644
--- a/llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml
+++ b/llama_stack/configs/distributions/docker/local-docker-example-build.yaml
@@ -1,6 +1,6 @@
 name: local-docker-example
 distribution_spec:
-  description: Use code from `llama_toolchain` itself to serve all llama stack APIs
+  description: Use code from `llama_stack` itself to serve all llama stack APIs
   providers:
     inference: meta-reference
     memory: meta-reference-faiss
diff --git a/llama_toolchain/inference/__init__.py b/llama_stack/core/__init__.py
similarity index 100%
rename from llama_toolchain/inference/__init__.py
rename to llama_stack/core/__init__.py
diff --git a/llama_toolchain/core/build_conda_env.sh b/llama_stack/core/build_conda_env.sh
similarity index 100%
rename from llama_toolchain/core/build_conda_env.sh
rename to llama_stack/core/build_conda_env.sh
diff --git a/llama_toolchain/core/build_container.sh b/llama_stack/core/build_container.sh
similarity index 97%
rename from llama_toolchain/core/build_container.sh
rename to llama_stack/core/build_container.sh
index d829e8399..81cb5d40c 100755
--- a/llama_toolchain/core/build_container.sh
+++ b/llama_stack/core/build_container.sh
@@ -90,7 +90,7 @@ add_to_docker <<EOF
 # This would be good in production but for debugging flexibility lets not add it right now
 # We need a more solid production ready entrypoint.sh anyway
 #
-# ENTRYPOINT ["python", "-m", "llama_toolchain.core.server"]
+# ENTRYPOINT ["python", "-m", "llama_stack.core.server"]
 
 EOF
 
diff --git a/llama_toolchain/core/common.sh b/llama_stack/core/common.sh
similarity index 100%
rename from llama_toolchain/core/common.sh
rename to llama_stack/core/common.sh
diff --git a/llama_toolchain/core/configure.py b/llama_stack/core/configure.py
similarity index 92%
rename from llama_toolchain/core/configure.py
rename to llama_stack/core/configure.py
index 1499c872a..ed0ba61dc 100644
--- a/llama_toolchain/core/configure.py
+++ b/llama_stack/core/configure.py
@@ -8,12 +8,12 @@ from typing import Any
 
 from pydantic import BaseModel
 
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.core.datatypes import *  # noqa: F403
 from termcolor import cprint
 
-from llama_toolchain.common.prompt_for_config import prompt_for_config
-from llama_toolchain.core.distribution import api_providers, stack_apis
-from llama_toolchain.core.dynamic import instantiate_class_type
+from llama_stack.common.prompt_for_config import prompt_for_config
+from llama_stack.core.distribution import api_providers, stack_apis
+from llama_stack.core.dynamic import instantiate_class_type
 
 
 # These are hacks so we can re-use the `prompt_for_config` utility
diff --git a/llama_toolchain/core/configure_container.sh b/llama_stack/core/configure_container.sh
similarity index 100%
rename from llama_toolchain/core/configure_container.sh
rename to llama_stack/core/configure_container.sh
diff --git a/llama_toolchain/core/datatypes.py b/llama_stack/core/datatypes.py
similarity index 98%
rename from llama_toolchain/core/datatypes.py
rename to llama_stack/core/datatypes.py
index 2821bf403..17c848d3f 100644
--- a/llama_toolchain/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@@ -151,7 +151,7 @@ as being "Llama Stack compatible"
     def module(self) -> str:
         if self.adapter:
             return self.adapter.module
-        return f"llama_toolchain.{self.api.value}.client"
+        return f"llama_stack.{self.api.value}.client"
 
     @property
     def pip_packages(self) -> List[str]:
@@ -167,7 +167,7 @@ def remote_provider_spec(
     config_class = (
         adapter.config_class
         if adapter and adapter.config_class
-        else "llama_toolchain.core.datatypes.RemoteProviderConfig"
+        else "llama_stack.core.datatypes.RemoteProviderConfig"
     )
     provider_id = remote_provider_id(adapter.adapter_id) if adapter else "remote"
 
diff --git a/llama_toolchain/core/distribution.py b/llama_stack/core/distribution.py
similarity index 84%
rename from llama_toolchain/core/distribution.py
rename to llama_stack/core/distribution.py
index dc81b53f1..83ac4f045 100644
--- a/llama_toolchain/core/distribution.py
+++ b/llama_stack/core/distribution.py
@@ -8,11 +8,11 @@ import importlib
 import inspect
 from typing import Dict, List
 
-from llama_toolchain.agentic_system.api import AgenticSystem
-from llama_toolchain.inference.api import Inference
-from llama_toolchain.memory.api import Memory
-from llama_toolchain.safety.api import Safety
-from llama_toolchain.telemetry.api import Telemetry
+from llama_stack.agentic_system.api import AgenticSystem
+from llama_stack.inference.api import Inference
+from llama_stack.memory.api import Memory
+from llama_stack.safety.api import Safety
+from llama_stack.telemetry.api import Telemetry
 
 from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec
 
@@ -67,7 +67,7 @@ def api_providers() -> Dict[Api, Dict[str, ProviderSpec]]:
     ret = {}
     for api in stack_apis():
         name = api.name.lower()
-        module = importlib.import_module(f"llama_toolchain.{name}.providers")
+        module = importlib.import_module(f"llama_stack.{name}.providers")
         ret[api] = {
             "remote": remote_provider_spec(api),
             **{a.provider_id: a for a in module.available_providers()},
diff --git a/llama_toolchain/core/dynamic.py b/llama_stack/core/dynamic.py
similarity index 97%
rename from llama_toolchain/core/dynamic.py
rename to llama_stack/core/dynamic.py
index 42c0646da..3aee3b24a 100644
--- a/llama_toolchain/core/dynamic.py
+++ b/llama_stack/core/dynamic.py
@@ -7,7 +7,7 @@
 import importlib
 from typing import Any, Dict
 
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.core.datatypes import *  # noqa: F403
 
 
 def instantiate_class_type(fully_qualified_name):
diff --git a/llama_toolchain/core/package.py b/llama_stack/core/package.py
similarity index 88%
rename from llama_toolchain/core/package.py
rename to llama_stack/core/package.py
index 37dac091d..7e364f517 100644
--- a/llama_toolchain/core/package.py
+++ b/llama_stack/core/package.py
@@ -12,12 +12,12 @@ from pydantic import BaseModel
 
 from termcolor import cprint
 
-from llama_toolchain.common.exec import run_with_pty
+from llama_stack.common.exec import run_with_pty
 
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.core.datatypes import *  # noqa: F403
 from pathlib import Path
 
-from llama_toolchain.core.distribution import api_providers, SERVER_DEPENDENCIES
+from llama_stack.core.distribution import api_providers, SERVER_DEPENDENCIES
 
 
 class ImageType(Enum):
@@ -68,7 +68,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
 
     if build_config.image_type == ImageType.docker.value:
         script = pkg_resources.resource_filename(
-            "llama_toolchain", "core/build_container.sh"
+            "llama_stack", "core/build_container.sh"
         )
         args = [
             script,
@@ -79,7 +79,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
         ]
     else:
         script = pkg_resources.resource_filename(
-            "llama_toolchain", "core/build_conda_env.sh"
+            "llama_stack", "core/build_conda_env.sh"
         )
         args = [
             script,
diff --git a/llama_toolchain/core/server.py b/llama_stack/core/server.py
similarity index 98%
rename from llama_toolchain/core/server.py
rename to llama_stack/core/server.py
index 70273be16..7a40184ec 100644
--- a/llama_toolchain/core/server.py
+++ b/llama_stack/core/server.py
@@ -39,13 +39,13 @@ from pydantic import BaseModel, ValidationError
 from termcolor import cprint
 from typing_extensions import Annotated
 
-from llama_toolchain.telemetry.tracing import (
+from llama_stack.telemetry.tracing import (
     end_trace,
     setup_logger,
     SpanStatus,
     start_trace,
 )
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.core.datatypes import *  # noqa: F403
 
 from .distribution import api_endpoints, api_providers
 from .dynamic import instantiate_provider
@@ -309,7 +309,7 @@ async def resolve_impls(
 
             specs[api] = RouterProviderSpec(
                 api=api,
-                module=f"llama_toolchain.{api.value.lower()}.router",
+                module=f"llama_stack.{api.value.lower()}.router",
                 api_dependencies=[],
                 inner_specs=inner_specs,
             )
diff --git a/llama_toolchain/core/start_conda_env.sh b/llama_stack/core/start_conda_env.sh
similarity index 95%
rename from llama_toolchain/core/start_conda_env.sh
rename to llama_stack/core/start_conda_env.sh
index 120dda006..604b1267e 100755
--- a/llama_toolchain/core/start_conda_env.sh
+++ b/llama_stack/core/start_conda_env.sh
@@ -37,6 +37,6 @@ eval "$(conda shell.bash hook)"
 conda deactivate && conda activate "$env_name"
 
 $CONDA_PREFIX/bin/python \
-  -m llama_toolchain.core.server \
+  -m llama_stack.core.server \
   --yaml_config "$yaml_config" \
   --port "$port" "$@"
diff --git a/llama_toolchain/core/start_container.sh b/llama_stack/core/start_container.sh
similarity index 94%
rename from llama_toolchain/core/start_container.sh
rename to llama_stack/core/start_container.sh
index 676bcedcf..7835e2a79 100755
--- a/llama_toolchain/core/start_container.sh
+++ b/llama_stack/core/start_container.sh
@@ -38,6 +38,6 @@ podman run -it \
   -p $port:$port \
   -v "$yaml_config:/app/config.yaml" \
   $docker_image \
-  python -m llama_toolchain.core.server \
+  python -m llama_stack.core.server \
   --yaml_config /app/config.yaml \
   --port $port "$@"
diff --git a/llama_toolchain/dataset/api/__init__.py b/llama_stack/dataset/api/__init__.py
similarity index 100%
rename from llama_toolchain/dataset/api/__init__.py
rename to llama_stack/dataset/api/__init__.py
diff --git a/llama_toolchain/dataset/api/api.py b/llama_stack/dataset/api/api.py
similarity index 100%
rename from llama_toolchain/dataset/api/api.py
rename to llama_stack/dataset/api/api.py
diff --git a/llama_toolchain/evaluations/api/__init__.py b/llama_stack/evaluations/api/__init__.py
similarity index 100%
rename from llama_toolchain/evaluations/api/__init__.py
rename to llama_stack/evaluations/api/__init__.py
diff --git a/llama_toolchain/evaluations/api/api.py b/llama_stack/evaluations/api/api.py
similarity index 95%
rename from llama_toolchain/evaluations/api/api.py
rename to llama_stack/evaluations/api/api.py
index 898dc2822..c99ed0c72 100644
--- a/llama_toolchain/evaluations/api/api.py
+++ b/llama_stack/evaluations/api/api.py
@@ -12,8 +12,8 @@ from llama_models.schema_utils import webmethod
 from pydantic import BaseModel
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.dataset.api import *  # noqa: F403
-from llama_toolchain.common.training_types import *  # noqa: F403
+from llama_stack.dataset.api import *  # noqa: F403
+from llama_stack.common.training_types import *  # noqa: F403
 
 
 class TextGenerationMetric(Enum):
diff --git a/llama_toolchain/inference/adapters/__init__.py b/llama_stack/inference/__init__.py
similarity index 100%
rename from llama_toolchain/inference/adapters/__init__.py
rename to llama_stack/inference/__init__.py
diff --git a/llama_toolchain/memory/__init__.py b/llama_stack/inference/adapters/__init__.py
similarity index 100%
rename from llama_toolchain/memory/__init__.py
rename to llama_stack/inference/adapters/__init__.py
diff --git a/llama_toolchain/inference/adapters/fireworks/__init__.py b/llama_stack/inference/adapters/fireworks/__init__.py
similarity index 100%
rename from llama_toolchain/inference/adapters/fireworks/__init__.py
rename to llama_stack/inference/adapters/fireworks/__init__.py
diff --git a/llama_toolchain/inference/adapters/fireworks/config.py b/llama_stack/inference/adapters/fireworks/config.py
similarity index 100%
rename from llama_toolchain/inference/adapters/fireworks/config.py
rename to llama_stack/inference/adapters/fireworks/config.py
diff --git a/llama_toolchain/inference/adapters/fireworks/fireworks.py b/llama_stack/inference/adapters/fireworks/fireworks.py
similarity index 98%
rename from llama_toolchain/inference/adapters/fireworks/fireworks.py
rename to llama_stack/inference/adapters/fireworks/fireworks.py
index e51a730de..6ebb8d5a3 100644
--- a/llama_toolchain/inference/adapters/fireworks/fireworks.py
+++ b/llama_stack/inference/adapters/fireworks/fireworks.py
@@ -13,8 +13,8 @@ from llama_models.llama3.api.datatypes import Message, StopReason
 from llama_models.llama3.api.tokenizer import Tokenizer
 from llama_models.sku_list import resolve_model
 
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.inference.prepare_messages import prepare_messages
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.inference.prepare_messages import prepare_messages
 
 from .config import FireworksImplConfig
 
diff --git a/llama_toolchain/inference/adapters/ollama/__init__.py b/llama_stack/inference/adapters/ollama/__init__.py
similarity index 86%
rename from llama_toolchain/inference/adapters/ollama/__init__.py
rename to llama_stack/inference/adapters/ollama/__init__.py
index 8369a00a5..c9d0348ec 100644
--- a/llama_toolchain/inference/adapters/ollama/__init__.py
+++ b/llama_stack/inference/adapters/ollama/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_toolchain.core.datatypes import RemoteProviderConfig
+from llama_stack.core.datatypes import RemoteProviderConfig
 
 
 async def get_adapter_impl(config: RemoteProviderConfig, _deps):
diff --git a/llama_toolchain/inference/adapters/ollama/ollama.py b/llama_stack/inference/adapters/ollama/ollama.py
similarity index 98%
rename from llama_toolchain/inference/adapters/ollama/ollama.py
rename to llama_stack/inference/adapters/ollama/ollama.py
index 92fbf7585..236421a2f 100644
--- a/llama_toolchain/inference/adapters/ollama/ollama.py
+++ b/llama_stack/inference/adapters/ollama/ollama.py
@@ -14,8 +14,8 @@ from llama_models.llama3.api.tokenizer import Tokenizer
 from llama_models.sku_list import resolve_model
 from ollama import AsyncClient
 
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.inference.prepare_messages import prepare_messages
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.inference.prepare_messages import prepare_messages
 
 # TODO: Eventually this will move to the llama cli model list command
 # mapping of Model SKUs to ollama models
diff --git a/llama_toolchain/inference/adapters/tgi/__init__.py b/llama_stack/inference/adapters/tgi/__init__.py
similarity index 100%
rename from llama_toolchain/inference/adapters/tgi/__init__.py
rename to llama_stack/inference/adapters/tgi/__init__.py
diff --git a/llama_toolchain/inference/adapters/tgi/config.py b/llama_stack/inference/adapters/tgi/config.py
similarity index 100%
rename from llama_toolchain/inference/adapters/tgi/config.py
rename to llama_stack/inference/adapters/tgi/config.py
diff --git a/llama_toolchain/inference/adapters/tgi/tgi.py b/llama_stack/inference/adapters/tgi/tgi.py
similarity index 98%
rename from llama_toolchain/inference/adapters/tgi/tgi.py
rename to llama_stack/inference/adapters/tgi/tgi.py
index 7b1028817..7ca2e1b14 100644
--- a/llama_toolchain/inference/adapters/tgi/tgi.py
+++ b/llama_stack/inference/adapters/tgi/tgi.py
@@ -13,8 +13,8 @@ from huggingface_hub import HfApi, InferenceClient
 from llama_models.llama3.api.chat_format import ChatFormat
 from llama_models.llama3.api.datatypes import StopReason
 from llama_models.llama3.api.tokenizer import Tokenizer
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.inference.prepare_messages import prepare_messages
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.inference.prepare_messages import prepare_messages
 
 from .config import TGIImplConfig
 
diff --git a/llama_toolchain/inference/adapters/together/__init__.py b/llama_stack/inference/adapters/together/__init__.py
similarity index 100%
rename from llama_toolchain/inference/adapters/together/__init__.py
rename to llama_stack/inference/adapters/together/__init__.py
diff --git a/llama_toolchain/inference/adapters/together/config.py b/llama_stack/inference/adapters/together/config.py
similarity index 100%
rename from llama_toolchain/inference/adapters/together/config.py
rename to llama_stack/inference/adapters/together/config.py
diff --git a/llama_toolchain/inference/adapters/together/together.py b/llama_stack/inference/adapters/together/together.py
similarity index 98%
rename from llama_toolchain/inference/adapters/together/together.py
rename to llama_stack/inference/adapters/together/together.py
index 76403a85b..739b437ca 100644
--- a/llama_toolchain/inference/adapters/together/together.py
+++ b/llama_stack/inference/adapters/together/together.py
@@ -13,8 +13,8 @@ from llama_models.llama3.api.tokenizer import Tokenizer
 from llama_models.sku_list import resolve_model
 from together import Together
 
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.inference.prepare_messages import prepare_messages
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.inference.prepare_messages import prepare_messages
 
 from .config import TogetherImplConfig
 
diff --git a/llama_toolchain/inference/api/__init__.py b/llama_stack/inference/api/__init__.py
similarity index 100%
rename from llama_toolchain/inference/api/__init__.py
rename to llama_stack/inference/api/__init__.py
diff --git a/llama_toolchain/inference/api/api.py b/llama_stack/inference/api/api.py
similarity index 100%
rename from llama_toolchain/inference/api/api.py
rename to llama_stack/inference/api/api.py
diff --git a/llama_toolchain/inference/client.py b/llama_stack/inference/client.py
similarity index 98%
rename from llama_toolchain/inference/client.py
rename to llama_stack/inference/client.py
index c57433a8f..8e9b77030 100644
--- a/llama_toolchain/inference/client.py
+++ b/llama_stack/inference/client.py
@@ -10,11 +10,11 @@ from typing import Any, AsyncGenerator
 
 import fire
 import httpx
-
-from llama_toolchain.core.datatypes import RemoteProviderConfig
 from pydantic import BaseModel
 from termcolor import cprint
 
+from llama_stack.core.datatypes import RemoteProviderConfig
+
 from .api import (
     ChatCompletionRequest,
     ChatCompletionResponse,
diff --git a/llama_toolchain/inference/event_logger.py b/llama_stack/inference/event_logger.py
similarity index 97%
rename from llama_toolchain/inference/event_logger.py
rename to llama_stack/inference/event_logger.py
index 248ceae27..6e27998a5 100644
--- a/llama_toolchain/inference/event_logger.py
+++ b/llama_stack/inference/event_logger.py
@@ -4,11 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_toolchain.inference.api import (
+from termcolor import cprint
+
+from llama_stack.inference.api import (
     ChatCompletionResponseEventType,
     ChatCompletionResponseStreamChunk,
 )
-from termcolor import cprint
 
 
 class LogEvent:
diff --git a/llama_toolchain/inference/meta_reference/__init__.py b/llama_stack/inference/meta_reference/__init__.py
similarity index 100%
rename from llama_toolchain/inference/meta_reference/__init__.py
rename to llama_stack/inference/meta_reference/__init__.py
diff --git a/llama_toolchain/inference/meta_reference/config.py b/llama_stack/inference/meta_reference/config.py
similarity index 96%
rename from llama_toolchain/inference/meta_reference/config.py
rename to llama_stack/inference/meta_reference/config.py
index a0bbc5820..7da01a0f4 100644
--- a/llama_toolchain/inference/meta_reference/config.py
+++ b/llama_stack/inference/meta_reference/config.py
@@ -13,7 +13,7 @@ from llama_models.sku_list import all_registered_models, resolve_model
 
 from pydantic import BaseModel, Field, field_validator
 
-from llama_toolchain.inference.api import QuantizationConfig
+from llama_stack.inference.api import QuantizationConfig
 
 
 @json_schema_type
diff --git a/llama_toolchain/inference/meta_reference/generation.py b/llama_stack/inference/meta_reference/generation.py
similarity index 98%
rename from llama_toolchain/inference/meta_reference/generation.py
rename to llama_stack/inference/meta_reference/generation.py
index d13b9570d..f7b077f54 100644
--- a/llama_toolchain/inference/meta_reference/generation.py
+++ b/llama_stack/inference/meta_reference/generation.py
@@ -28,11 +28,11 @@ from llama_models.llama3.api.datatypes import Message, ToolPromptFormat
 from llama_models.llama3.api.tokenizer import Tokenizer
 from llama_models.llama3.reference_impl.model import Transformer
 from llama_models.sku_list import resolve_model
-
-from llama_toolchain.common.model_utils import model_local_dir
-from llama_toolchain.inference.api import QuantizationType
 from termcolor import cprint
 
+from llama_stack.common.model_utils import model_local_dir
+from llama_stack.inference.api import QuantizationType
+
 from .config import MetaReferenceImplConfig
 
 
diff --git a/llama_toolchain/inference/meta_reference/inference.py b/llama_stack/inference/meta_reference/inference.py
similarity index 97%
rename from llama_toolchain/inference/meta_reference/inference.py
rename to llama_stack/inference/meta_reference/inference.py
index 247c08f23..66a97f7fb 100644
--- a/llama_toolchain/inference/meta_reference/inference.py
+++ b/llama_stack/inference/meta_reference/inference.py
@@ -11,7 +11,7 @@ from typing import AsyncIterator, Union
 from llama_models.llama3.api.datatypes import StopReason
 from llama_models.sku_list import resolve_model
 
-from llama_toolchain.inference.api import (
+from llama_stack.inference.api import (
     ChatCompletionRequest,
     ChatCompletionResponse,
     ChatCompletionResponseEvent,
@@ -21,13 +21,13 @@ from llama_toolchain.inference.api import (
     ToolCallDelta,
     ToolCallParseStatus,
 )
-from llama_toolchain.inference.prepare_messages import prepare_messages
+from llama_stack.inference.prepare_messages import prepare_messages
 
 from .config import MetaReferenceImplConfig
 from .model_parallel import LlamaModelParallelGenerator
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.inference.api import *  # noqa: F403
+from llama_stack.inference.api import *  # noqa: F403
 
 # there's a single model parallel process running serving the model. for now,
 # we don't support multiple concurrent requests to this process.
diff --git a/llama_toolchain/inference/meta_reference/model_parallel.py b/llama_stack/inference/meta_reference/model_parallel.py
similarity index 100%
rename from llama_toolchain/inference/meta_reference/model_parallel.py
rename to llama_stack/inference/meta_reference/model_parallel.py
diff --git a/llama_toolchain/inference/meta_reference/parallel_utils.py b/llama_stack/inference/meta_reference/parallel_utils.py
similarity index 100%
rename from llama_toolchain/inference/meta_reference/parallel_utils.py
rename to llama_stack/inference/meta_reference/parallel_utils.py
diff --git a/llama_toolchain/inference/prepare_messages.py b/llama_stack/inference/prepare_messages.py
similarity index 97%
rename from llama_toolchain/inference/prepare_messages.py
rename to llama_stack/inference/prepare_messages.py
index 92e94f8d2..ef818daa1 100644
--- a/llama_toolchain/inference/prepare_messages.py
+++ b/llama_stack/inference/prepare_messages.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.inference.api import *  # noqa: F403
+from llama_stack.inference.api import *  # noqa: F403
 from llama_models.llama3.prompt_templates import (
     BuiltinToolGenerator,
     FunctionTagCustomToolGenerator,
diff --git a/llama_toolchain/inference/providers.py b/llama_stack/inference/providers.py
similarity index 67%
rename from llama_toolchain/inference/providers.py
rename to llama_stack/inference/providers.py
index 928c6ef57..069cda1d2 100644
--- a/llama_toolchain/inference/providers.py
+++ b/llama_stack/inference/providers.py
@@ -6,7 +6,7 @@
 
 from typing import List
 
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.core.datatypes import *  # noqa: F403
 
 
 def available_providers() -> List[ProviderSpec]:
@@ -24,15 +24,15 @@ def available_providers() -> List[ProviderSpec]:
                 "transformers",
                 "zmq",
             ],
-            module="llama_toolchain.inference.meta_reference",
-            config_class="llama_toolchain.inference.meta_reference.MetaReferenceImplConfig",
+            module="llama_stack.inference.meta_reference",
+            config_class="llama_stack.inference.meta_reference.MetaReferenceImplConfig",
         ),
         remote_provider_spec(
             api=Api.inference,
             adapter=AdapterSpec(
                 adapter_id="ollama",
                 pip_packages=["ollama"],
-                module="llama_toolchain.inference.adapters.ollama",
+                module="llama_stack.inference.adapters.ollama",
             ),
         ),
         remote_provider_spec(
@@ -40,8 +40,8 @@ def available_providers() -> List[ProviderSpec]:
             adapter=AdapterSpec(
                 adapter_id="tgi",
                 pip_packages=["huggingface_hub"],
-                module="llama_toolchain.inference.adapters.tgi",
-                config_class="llama_toolchain.inference.adapters.tgi.TGIImplConfig",
+                module="llama_stack.inference.adapters.tgi",
+                config_class="llama_stack.inference.adapters.tgi.TGIImplConfig",
             ),
         ),
         remote_provider_spec(
@@ -51,8 +51,8 @@ def available_providers() -> List[ProviderSpec]:
                 pip_packages=[
                     "fireworks-ai",
                 ],
-                module="llama_toolchain.inference.adapters.fireworks",
-                config_class="llama_toolchain.inference.adapters.fireworks.FireworksImplConfig",
+                module="llama_stack.inference.adapters.fireworks",
+                config_class="llama_stack.inference.adapters.fireworks.FireworksImplConfig",
             ),
         ),
         remote_provider_spec(
@@ -62,8 +62,8 @@ def available_providers() -> List[ProviderSpec]:
                 pip_packages=[
                     "together",
                 ],
-                module="llama_toolchain.inference.adapters.together",
-                config_class="llama_toolchain.inference.adapters.together.TogetherImplConfig",
+                module="llama_stack.inference.adapters.together",
+                config_class="llama_stack.inference.adapters.together.TogetherImplConfig",
             ),
         ),
     ]
diff --git a/llama_toolchain/inference/quantization/fp8_impls.py b/llama_stack/inference/quantization/fp8_impls.py
similarity index 100%
rename from llama_toolchain/inference/quantization/fp8_impls.py
rename to llama_stack/inference/quantization/fp8_impls.py
diff --git a/llama_toolchain/inference/quantization/loader.py b/llama_stack/inference/quantization/loader.py
similarity index 97%
rename from llama_toolchain/inference/quantization/loader.py
rename to llama_stack/inference/quantization/loader.py
index 54827dce9..1b98bb9ad 100644
--- a/llama_toolchain/inference/quantization/loader.py
+++ b/llama_stack/inference/quantization/loader.py
@@ -14,9 +14,9 @@ import torch
 
 from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region
 from llama_models.llama3.api.model import Transformer, TransformerBlock
-from llama_toolchain.inference.api import QuantizationType
+from llama_stack.inference.api import QuantizationType
 
-from llama_toolchain.inference.api.config import (
+from llama_stack.inference.api.config import (
     CheckpointQuantizationFormat,
     MetaReferenceImplConfig,
 )
diff --git a/llama_toolchain/inference/quantization/scripts/build_conda.sh b/llama_stack/inference/quantization/scripts/build_conda.sh
similarity index 100%
rename from llama_toolchain/inference/quantization/scripts/build_conda.sh
rename to llama_stack/inference/quantization/scripts/build_conda.sh
diff --git a/llama_toolchain/inference/quantization/scripts/quantize_checkpoint.py b/llama_stack/inference/quantization/scripts/quantize_checkpoint.py
similarity index 100%
rename from llama_toolchain/inference/quantization/scripts/quantize_checkpoint.py
rename to llama_stack/inference/quantization/scripts/quantize_checkpoint.py
diff --git a/llama_toolchain/inference/quantization/scripts/run_quantize_checkpoint.sh b/llama_stack/inference/quantization/scripts/run_quantize_checkpoint.sh
similarity index 100%
rename from llama_toolchain/inference/quantization/scripts/run_quantize_checkpoint.sh
rename to llama_stack/inference/quantization/scripts/run_quantize_checkpoint.sh
diff --git a/llama_toolchain/inference/quantization/test_fp8.py b/llama_stack/inference/quantization/test_fp8.py
similarity index 100%
rename from llama_toolchain/inference/quantization/test_fp8.py
rename to llama_stack/inference/quantization/test_fp8.py
diff --git a/llama_toolchain/memory/common/__init__.py b/llama_stack/memory/__init__.py
similarity index 100%
rename from llama_toolchain/memory/common/__init__.py
rename to llama_stack/memory/__init__.py
diff --git a/llama_toolchain/memory/adapters/chroma/__init__.py b/llama_stack/memory/adapters/chroma/__init__.py
similarity index 86%
rename from llama_toolchain/memory/adapters/chroma/__init__.py
rename to llama_stack/memory/adapters/chroma/__init__.py
index c90a8e8ac..32a9c65a3 100644
--- a/llama_toolchain/memory/adapters/chroma/__init__.py
+++ b/llama_stack/memory/adapters/chroma/__init__.py
@@ -4,7 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-from llama_toolchain.core.datatypes import RemoteProviderConfig
+from llama_stack.core.datatypes import RemoteProviderConfig
 
 
 async def get_adapter_impl(config: RemoteProviderConfig, _deps):
diff --git a/llama_toolchain/memory/adapters/chroma/chroma.py b/llama_stack/memory/adapters/chroma/chroma.py
similarity index 97%
rename from llama_toolchain/memory/adapters/chroma/chroma.py
rename to llama_stack/memory/adapters/chroma/chroma.py
index f4952cd0e..0c39fd9c0 100644
--- a/llama_toolchain/memory/adapters/chroma/chroma.py
+++ b/llama_stack/memory/adapters/chroma/chroma.py
@@ -12,10 +12,10 @@ from urllib.parse import urlparse
 import chromadb
 from numpy.typing import NDArray
 
-from llama_toolchain.memory.api import *  # noqa: F403
+from llama_stack.memory.api import *  # noqa: F403
 
 
-from llama_toolchain.memory.common.vector_store import BankWithIndex, EmbeddingIndex
+from llama_stack.memory.common.vector_store import BankWithIndex, EmbeddingIndex
 
 
 class ChromaIndex(EmbeddingIndex):
diff --git a/llama_toolchain/memory/adapters/pgvector/__init__.py b/llama_stack/memory/adapters/pgvector/__init__.py
similarity index 100%
rename from llama_toolchain/memory/adapters/pgvector/__init__.py
rename to llama_stack/memory/adapters/pgvector/__init__.py
diff --git a/llama_toolchain/memory/adapters/pgvector/config.py b/llama_stack/memory/adapters/pgvector/config.py
similarity index 100%
rename from llama_toolchain/memory/adapters/pgvector/config.py
rename to llama_stack/memory/adapters/pgvector/config.py
diff --git a/llama_toolchain/memory/adapters/pgvector/pgvector.py b/llama_stack/memory/adapters/pgvector/pgvector.py
similarity index 98%
rename from llama_toolchain/memory/adapters/pgvector/pgvector.py
rename to llama_stack/memory/adapters/pgvector/pgvector.py
index 930d7720f..a8dadb0f3 100644
--- a/llama_toolchain/memory/adapters/pgvector/pgvector.py
+++ b/llama_stack/memory/adapters/pgvector/pgvector.py
@@ -13,10 +13,10 @@ from numpy.typing import NDArray
 from psycopg2 import sql
 from psycopg2.extras import execute_values, Json
 from pydantic import BaseModel
-from llama_toolchain.memory.api import *  # noqa: F403
+from llama_stack.memory.api import *  # noqa: F403
 
 
-from llama_toolchain.memory.common.vector_store import (
+from llama_stack.memory.common.vector_store import (
     ALL_MINILM_L6_V2_DIMENSION,
     BankWithIndex,
     EmbeddingIndex,
diff --git a/llama_toolchain/memory/api/__init__.py b/llama_stack/memory/api/__init__.py
similarity index 100%
rename from llama_toolchain/memory/api/__init__.py
rename to llama_stack/memory/api/__init__.py
diff --git a/llama_toolchain/memory/api/api.py b/llama_stack/memory/api/api.py
similarity index 100%
rename from llama_toolchain/memory/api/api.py
rename to llama_stack/memory/api/api.py
diff --git a/llama_toolchain/memory/client.py b/llama_stack/memory/client.py
similarity index 98%
rename from llama_toolchain/memory/client.py
rename to llama_stack/memory/client.py
index c2c04b213..f629fafe8 100644
--- a/llama_toolchain/memory/client.py
+++ b/llama_stack/memory/client.py
@@ -15,7 +15,7 @@ import fire
 import httpx
 from termcolor import cprint
 
-from llama_toolchain.core.datatypes import RemoteProviderConfig
+from llama_stack.core.datatypes import RemoteProviderConfig
 
 from .api import *  # noqa: F403
 from .common.file_utils import data_url_from_file
diff --git a/llama_toolchain/memory/meta_reference/__init__.py b/llama_stack/memory/common/__init__.py
similarity index 100%
rename from llama_toolchain/memory/meta_reference/__init__.py
rename to llama_stack/memory/common/__init__.py
diff --git a/llama_toolchain/memory/common/file_utils.py b/llama_stack/memory/common/file_utils.py
similarity index 100%
rename from llama_toolchain/memory/common/file_utils.py
rename to llama_stack/memory/common/file_utils.py
diff --git a/llama_toolchain/memory/common/vector_store.py b/llama_stack/memory/common/vector_store.py
similarity index 98%
rename from llama_toolchain/memory/common/vector_store.py
rename to llama_stack/memory/common/vector_store.py
index baa3fbf21..aca3fd13d 100644
--- a/llama_toolchain/memory/common/vector_store.py
+++ b/llama_stack/memory/common/vector_store.py
@@ -20,7 +20,7 @@ from pypdf import PdfReader
 from llama_models.llama3.api.datatypes import *  # noqa: F403
 from llama_models.llama3.api.tokenizer import Tokenizer
 
-from llama_toolchain.memory.api import *  # noqa: F403
+from llama_stack.memory.api import *  # noqa: F403
 
 ALL_MINILM_L6_V2_DIMENSION = 384
 
diff --git a/llama_toolchain/safety/__init__.py b/llama_stack/memory/meta_reference/__init__.py
similarity index 100%
rename from llama_toolchain/safety/__init__.py
rename to llama_stack/memory/meta_reference/__init__.py
diff --git a/llama_toolchain/memory/meta_reference/faiss/__init__.py b/llama_stack/memory/meta_reference/faiss/__init__.py
similarity index 100%
rename from llama_toolchain/memory/meta_reference/faiss/__init__.py
rename to llama_stack/memory/meta_reference/faiss/__init__.py
diff --git a/llama_toolchain/memory/meta_reference/faiss/config.py b/llama_stack/memory/meta_reference/faiss/config.py
similarity index 100%
rename from llama_toolchain/memory/meta_reference/faiss/config.py
rename to llama_stack/memory/meta_reference/faiss/config.py
diff --git a/llama_toolchain/memory/meta_reference/faiss/faiss.py b/llama_stack/memory/meta_reference/faiss/faiss.py
similarity index 95%
rename from llama_toolchain/memory/meta_reference/faiss/faiss.py
rename to llama_stack/memory/meta_reference/faiss/faiss.py
index 2dcff4d25..dec8bd2df 100644
--- a/llama_toolchain/memory/meta_reference/faiss/faiss.py
+++ b/llama_stack/memory/meta_reference/faiss/faiss.py
@@ -15,13 +15,13 @@ from numpy.typing import NDArray
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
 
-from llama_toolchain.memory.api import *  # noqa: F403
-from llama_toolchain.memory.common.vector_store import (
+from llama_stack.memory.api import *  # noqa: F403
+from llama_stack.memory.common.vector_store import (
     ALL_MINILM_L6_V2_DIMENSION,
     BankWithIndex,
     EmbeddingIndex,
 )
-from llama_toolchain.telemetry import tracing
+from llama_stack.telemetry import tracing
 from .config import FaissImplConfig
 
 logger = logging.getLogger(__name__)
diff --git a/llama_toolchain/memory/providers.py b/llama_stack/memory/providers.py
similarity index 69%
rename from llama_toolchain/memory/providers.py
rename to llama_stack/memory/providers.py
index cf443f5f3..bffd9eb64 100644
--- a/llama_toolchain/memory/providers.py
+++ b/llama_stack/memory/providers.py
@@ -6,7 +6,7 @@
 
 from typing import List
 
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.core.datatypes import *  # noqa: F403
 
 EMBEDDING_DEPS = [
     "blobfile",
@@ -22,15 +22,15 @@ def available_providers() -> List[ProviderSpec]:
             api=Api.memory,
             provider_id="meta-reference-faiss",
             pip_packages=EMBEDDING_DEPS + ["faiss-cpu"],
-            module="llama_toolchain.memory.meta_reference.faiss",
-            config_class="llama_toolchain.memory.meta_reference.faiss.FaissImplConfig",
+            module="llama_stack.memory.meta_reference.faiss",
+            config_class="llama_stack.memory.meta_reference.faiss.FaissImplConfig",
         ),
         remote_provider_spec(
             Api.memory,
             AdapterSpec(
                 adapter_id="chromadb",
                 pip_packages=EMBEDDING_DEPS + ["chromadb-client"],
-                module="llama_toolchain.memory.adapters.chroma",
+                module="llama_stack.memory.adapters.chroma",
             ),
         ),
         remote_provider_spec(
@@ -38,8 +38,8 @@ def available_providers() -> List[ProviderSpec]:
             AdapterSpec(
                 adapter_id="pgvector",
                 pip_packages=EMBEDDING_DEPS + ["psycopg2-binary"],
-                module="llama_toolchain.memory.adapters.pgvector",
-                config_class="llama_toolchain.memory.adapters.pgvector.PGVectorConfig",
+                module="llama_stack.memory.adapters.pgvector",
+                config_class="llama_stack.memory.adapters.pgvector.PGVectorConfig",
             ),
         ),
     ]
diff --git a/llama_toolchain/memory/router/__init__.py b/llama_stack/memory/router/__init__.py
similarity index 90%
rename from llama_toolchain/memory/router/__init__.py
rename to llama_stack/memory/router/__init__.py
index 25c5ac2a8..828be53a8 100644
--- a/llama_toolchain/memory/router/__init__.py
+++ b/llama_stack/memory/router/__init__.py
@@ -6,7 +6,7 @@
 
 from typing import Any, List, Tuple
 
-from llama_toolchain.core.datatypes import Api
+from llama_stack.core.datatypes import Api
 
 
 async def get_router_impl(inner_impls: List[Tuple[str, Any]], deps: List[Api]):
diff --git a/llama_toolchain/memory/router/router.py b/llama_stack/memory/router/router.py
similarity index 96%
rename from llama_toolchain/memory/router/router.py
rename to llama_stack/memory/router/router.py
index b415fbb96..c63d85d66 100644
--- a/llama_toolchain/memory/router/router.py
+++ b/llama_stack/memory/router/router.py
@@ -6,8 +6,8 @@
 
 from typing import Any, Dict, List, Tuple
 
-from llama_toolchain.core.datatypes import Api
-from llama_toolchain.memory.api import *  # noqa: F403
+from llama_stack.core.datatypes import Api
+from llama_stack.memory.api import *  # noqa: F403
 
 
 class MemoryRouterImpl(Memory):
diff --git a/llama_toolchain/models/api/endpoints.py b/llama_stack/models/api/endpoints.py
similarity index 100%
rename from llama_toolchain/models/api/endpoints.py
rename to llama_stack/models/api/endpoints.py
diff --git a/llama_toolchain/post_training/api/__init__.py b/llama_stack/post_training/api/__init__.py
similarity index 100%
rename from llama_toolchain/post_training/api/__init__.py
rename to llama_stack/post_training/api/__init__.py
diff --git a/llama_toolchain/post_training/api/api.py b/llama_stack/post_training/api/api.py
similarity index 97%
rename from llama_toolchain/post_training/api/api.py
rename to llama_stack/post_training/api/api.py
index 378515f83..d2f183c2b 100644
--- a/llama_toolchain/post_training/api/api.py
+++ b/llama_stack/post_training/api/api.py
@@ -14,8 +14,8 @@ from llama_models.schema_utils import json_schema_type, webmethod
 from pydantic import BaseModel, Field
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.dataset.api import *  # noqa: F403
-from llama_toolchain.common.training_types import *  # noqa: F403
+from llama_stack.dataset.api import *  # noqa: F403
+from llama_stack.common.training_types import *  # noqa: F403
 
 
 class OptimizerType(Enum):
diff --git a/llama_toolchain/reward_scoring/api/__init__.py b/llama_stack/reward_scoring/api/__init__.py
similarity index 100%
rename from llama_toolchain/reward_scoring/api/__init__.py
rename to llama_stack/reward_scoring/api/__init__.py
diff --git a/llama_toolchain/reward_scoring/api/api.py b/llama_stack/reward_scoring/api/api.py
similarity index 100%
rename from llama_toolchain/reward_scoring/api/api.py
rename to llama_stack/reward_scoring/api/api.py
diff --git a/llama_toolchain/safety/meta_reference/shields/contrib/__init__.py b/llama_stack/safety/__init__.py
similarity index 100%
rename from llama_toolchain/safety/meta_reference/shields/contrib/__init__.py
rename to llama_stack/safety/__init__.py
diff --git a/llama_toolchain/safety/api/__init__.py b/llama_stack/safety/api/__init__.py
similarity index 100%
rename from llama_toolchain/safety/api/__init__.py
rename to llama_stack/safety/api/__init__.py
diff --git a/llama_toolchain/safety/api/api.py b/llama_stack/safety/api/api.py
similarity index 96%
rename from llama_toolchain/safety/api/api.py
rename to llama_stack/safety/api/api.py
index 631cfa992..f1abac409 100644
--- a/llama_toolchain/safety/api/api.py
+++ b/llama_stack/safety/api/api.py
@@ -11,7 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
 from pydantic import BaseModel, validator
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.common.deployment_types import RestAPIExecutionConfig
+from llama_stack.common.deployment_types import RestAPIExecutionConfig
 
 
 @json_schema_type
diff --git a/llama_toolchain/safety/client.py b/llama_stack/safety/client.py
similarity index 97%
rename from llama_toolchain/safety/client.py
rename to llama_stack/safety/client.py
index 26a9813b3..5ae59c66f 100644
--- a/llama_toolchain/safety/client.py
+++ b/llama_stack/safety/client.py
@@ -13,11 +13,11 @@ import fire
 import httpx
 
 from llama_models.llama3.api.datatypes import UserMessage
-
-from llama_toolchain.core.datatypes import RemoteProviderConfig
 from pydantic import BaseModel
 from termcolor import cprint
 
+from llama_stack.core.datatypes import RemoteProviderConfig
+
 from .api import *  # noqa: F403
 
 
diff --git a/llama_toolchain/safety/meta_reference/__init__.py b/llama_stack/safety/meta_reference/__init__.py
similarity index 100%
rename from llama_toolchain/safety/meta_reference/__init__.py
rename to llama_stack/safety/meta_reference/__init__.py
diff --git a/llama_toolchain/safety/meta_reference/config.py b/llama_stack/safety/meta_reference/config.py
similarity index 100%
rename from llama_toolchain/safety/meta_reference/config.py
rename to llama_stack/safety/meta_reference/config.py
diff --git a/llama_toolchain/safety/meta_reference/safety.py b/llama_stack/safety/meta_reference/safety.py
similarity index 96%
rename from llama_toolchain/safety/meta_reference/safety.py
rename to llama_stack/safety/meta_reference/safety.py
index 6c75e74e8..209b2975b 100644
--- a/llama_toolchain/safety/meta_reference/safety.py
+++ b/llama_stack/safety/meta_reference/safety.py
@@ -8,8 +8,8 @@ import asyncio
 
 from llama_models.sku_list import resolve_model
 
-from llama_toolchain.common.model_utils import model_local_dir
-from llama_toolchain.safety.api import *  # noqa
+from llama_stack.common.model_utils import model_local_dir
+from llama_stack.safety.api import *  # noqa
 
 from .config import SafetyConfig
 from .shields import (
diff --git a/llama_toolchain/safety/meta_reference/shields/__init__.py b/llama_stack/safety/meta_reference/shields/__init__.py
similarity index 100%
rename from llama_toolchain/safety/meta_reference/shields/__init__.py
rename to llama_stack/safety/meta_reference/shields/__init__.py
diff --git a/llama_toolchain/safety/meta_reference/shields/base.py b/llama_stack/safety/meta_reference/shields/base.py
similarity index 97%
rename from llama_toolchain/safety/meta_reference/shields/base.py
rename to llama_stack/safety/meta_reference/shields/base.py
index ed939212d..d6480259a 100644
--- a/llama_toolchain/safety/meta_reference/shields/base.py
+++ b/llama_stack/safety/meta_reference/shields/base.py
@@ -8,7 +8,7 @@ from abc import ABC, abstractmethod
 from typing import List
 
 from llama_models.llama3.api.datatypes import interleaved_text_media_as_str, Message
-from llama_toolchain.safety.api import *  # noqa: F403
+from llama_stack.safety.api import *  # noqa: F403
 
 CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"
 
diff --git a/llama_toolchain/safety/meta_reference/shields/code_scanner.py b/llama_stack/safety/meta_reference/shields/code_scanner.py
similarity index 95%
rename from llama_toolchain/safety/meta_reference/shields/code_scanner.py
rename to llama_stack/safety/meta_reference/shields/code_scanner.py
index 564d15a53..32f52a6dc 100644
--- a/llama_toolchain/safety/meta_reference/shields/code_scanner.py
+++ b/llama_stack/safety/meta_reference/shields/code_scanner.py
@@ -8,7 +8,7 @@ from codeshield.cs import CodeShield
 from termcolor import cprint
 
 from .base import ShieldResponse, TextShield
-from llama_toolchain.safety.api import *  # noqa: F403
+from llama_stack.safety.api import *  # noqa: F403
 
 
 class CodeScannerShield(TextShield):
diff --git a/llama_toolchain/telemetry/__init__.py b/llama_stack/safety/meta_reference/shields/contrib/__init__.py
similarity index 100%
rename from llama_toolchain/telemetry/__init__.py
rename to llama_stack/safety/meta_reference/shields/contrib/__init__.py
diff --git a/llama_toolchain/safety/meta_reference/shields/contrib/third_party_shield.py b/llama_stack/safety/meta_reference/shields/contrib/third_party_shield.py
similarity index 93%
rename from llama_toolchain/safety/meta_reference/shields/contrib/third_party_shield.py
rename to llama_stack/safety/meta_reference/shields/contrib/third_party_shield.py
index 61a5977ed..9aa8adea8 100644
--- a/llama_toolchain/safety/meta_reference/shields/contrib/third_party_shield.py
+++ b/llama_stack/safety/meta_reference/shields/contrib/third_party_shield.py
@@ -8,7 +8,7 @@ from typing import List
 
 from llama_models.llama3.api.datatypes import Message
 
-from llama_toolchain.safety.meta_reference.shields.base import (
+from llama_stack.safety.meta_reference.shields.base import (
     OnViolationAction,
     ShieldBase,
     ShieldResponse,
diff --git a/llama_toolchain/safety/meta_reference/shields/llama_guard.py b/llama_stack/safety/meta_reference/shields/llama_guard.py
similarity index 99%
rename from llama_toolchain/safety/meta_reference/shields/llama_guard.py
rename to llama_stack/safety/meta_reference/shields/llama_guard.py
index fe04baa00..3cdfeac13 100644
--- a/llama_toolchain/safety/meta_reference/shields/llama_guard.py
+++ b/llama_stack/safety/meta_reference/shields/llama_guard.py
@@ -14,7 +14,7 @@ from llama_models.llama3.api.datatypes import Message, Role
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 from .base import CANNED_RESPONSE_TEXT, OnViolationAction, ShieldBase, ShieldResponse
-from llama_toolchain.safety.api import *  # noqa: F403
+from llama_stack.safety.api import *  # noqa: F403
 
 SAFE_RESPONSE = "safe"
 _INSTANCE = None
diff --git a/llama_toolchain/safety/meta_reference/shields/prompt_guard.py b/llama_stack/safety/meta_reference/shields/prompt_guard.py
similarity index 99%
rename from llama_toolchain/safety/meta_reference/shields/prompt_guard.py
rename to llama_stack/safety/meta_reference/shields/prompt_guard.py
index a1097a6f7..2e5683a3d 100644
--- a/llama_toolchain/safety/meta_reference/shields/prompt_guard.py
+++ b/llama_stack/safety/meta_reference/shields/prompt_guard.py
@@ -14,7 +14,7 @@ from termcolor import cprint
 from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
 from .base import message_content_as_str, OnViolationAction, ShieldResponse, TextShield
-from llama_toolchain.safety.api import *  # noqa: F403
+from llama_stack.safety.api import *  # noqa: F403
 
 
 class PromptGuardShield(TextShield):
diff --git a/llama_toolchain/safety/providers.py b/llama_stack/safety/providers.py
similarity index 71%
rename from llama_toolchain/safety/providers.py
rename to llama_stack/safety/providers.py
index c523e628e..3fb653a34 100644
--- a/llama_toolchain/safety/providers.py
+++ b/llama_stack/safety/providers.py
@@ -6,7 +6,7 @@
 
 from typing import List
 
-from llama_toolchain.core.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack.core.datatypes import Api, InlineProviderSpec, ProviderSpec
 
 
 def available_providers() -> List[ProviderSpec]:
@@ -20,7 +20,7 @@ def available_providers() -> List[ProviderSpec]:
                 "torch",
                 "transformers",
             ],
-            module="llama_toolchain.safety.meta_reference",
-            config_class="llama_toolchain.safety.meta_reference.SafetyConfig",
+            module="llama_stack.safety.meta_reference",
+            config_class="llama_stack.safety.meta_reference.SafetyConfig",
         ),
     ]
diff --git a/llama_stack/stack.py b/llama_stack/stack.py
new file mode 100644
index 000000000..69ce8bcd1
--- /dev/null
+++ b/llama_stack/stack.py
@@ -0,0 +1,34 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_models.llama3.api.datatypes import *  # noqa: F403
+from llama_stack.agentic_system.api import *  # noqa: F403
+from llama_stack.dataset.api import *  # noqa: F403
+from llama_stack.evaluations.api import *  # noqa: F403
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.batch_inference.api import *  # noqa: F403
+from llama_stack.memory.api import *  # noqa: F403
+from llama_stack.telemetry.api import *  # noqa: F403
+from llama_stack.post_training.api import *  # noqa: F403
+from llama_stack.reward_scoring.api import *  # noqa: F403
+from llama_stack.synthetic_data_generation.api import *  # noqa: F403
+from llama_stack.safety.api import *  # noqa: F403
+
+
+class LlamaStack(
+    Inference,
+    BatchInference,
+    AgenticSystem,
+    RewardScoring,
+    Safety,
+    SyntheticDataGeneration,
+    Datasets,
+    Telemetry,
+    PostTraining,
+    Memory,
+    Evaluations,
+):
+    pass
diff --git a/llama_toolchain/synthetic_data_generation/api/__init__.py b/llama_stack/synthetic_data_generation/api/__init__.py
similarity index 100%
rename from llama_toolchain/synthetic_data_generation/api/__init__.py
rename to llama_stack/synthetic_data_generation/api/__init__.py
diff --git a/llama_toolchain/synthetic_data_generation/api/api.py b/llama_stack/synthetic_data_generation/api/api.py
similarity index 96%
rename from llama_toolchain/synthetic_data_generation/api/api.py
rename to llama_stack/synthetic_data_generation/api/api.py
index 9a6c487af..f6059fc46 100644
--- a/llama_toolchain/synthetic_data_generation/api/api.py
+++ b/llama_stack/synthetic_data_generation/api/api.py
@@ -13,7 +13,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
 from pydantic import BaseModel
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.reward_scoring.api import *  # noqa: F403
+from llama_stack.reward_scoring.api import *  # noqa: F403
 
 
 class FilteringFunction(Enum):
diff --git a/llama_toolchain/tools/__init__.py b/llama_stack/telemetry/__init__.py
similarity index 100%
rename from llama_toolchain/tools/__init__.py
rename to llama_stack/telemetry/__init__.py
diff --git a/llama_toolchain/telemetry/api/__init__.py b/llama_stack/telemetry/api/__init__.py
similarity index 100%
rename from llama_toolchain/telemetry/api/__init__.py
rename to llama_stack/telemetry/api/__init__.py
diff --git a/llama_toolchain/telemetry/api/api.py b/llama_stack/telemetry/api/api.py
similarity index 100%
rename from llama_toolchain/telemetry/api/api.py
rename to llama_stack/telemetry/api/api.py
diff --git a/llama_toolchain/telemetry/console/__init__.py b/llama_stack/telemetry/console/__init__.py
similarity index 100%
rename from llama_toolchain/telemetry/console/__init__.py
rename to llama_stack/telemetry/console/__init__.py
diff --git a/llama_toolchain/telemetry/console/config.py b/llama_stack/telemetry/console/config.py
similarity index 100%
rename from llama_toolchain/telemetry/console/config.py
rename to llama_stack/telemetry/console/config.py
diff --git a/llama_toolchain/telemetry/console/console.py b/llama_stack/telemetry/console/console.py
similarity index 97%
rename from llama_toolchain/telemetry/console/console.py
rename to llama_stack/telemetry/console/console.py
index 2e7b9980d..9b855818e 100644
--- a/llama_toolchain/telemetry/console/console.py
+++ b/llama_stack/telemetry/console/console.py
@@ -6,7 +6,7 @@
 
 from typing import Optional
 
-from llama_toolchain.telemetry.api import *  # noqa: F403
+from llama_stack.telemetry.api import *  # noqa: F403
 from .config import ConsoleConfig
 
 
diff --git a/llama_toolchain/telemetry/providers.py b/llama_stack/telemetry/providers.py
similarity index 69%
rename from llama_toolchain/telemetry/providers.py
rename to llama_stack/telemetry/providers.py
index 00038e569..1359dbbbd 100644
--- a/llama_toolchain/telemetry/providers.py
+++ b/llama_stack/telemetry/providers.py
@@ -6,7 +6,7 @@
 
 from typing import List
 
-from llama_toolchain.core.datatypes import *  # noqa: F403
+from llama_stack.core.datatypes import *  # noqa: F403
 
 
 def available_providers() -> List[ProviderSpec]:
@@ -15,7 +15,7 @@ def available_providers() -> List[ProviderSpec]:
             api=Api.telemetry,
             provider_id="console",
             pip_packages=[],
-            module="llama_toolchain.telemetry.console",
-            config_class="llama_toolchain.telemetry.console.ConsoleConfig",
+            module="llama_stack.telemetry.console",
+            config_class="llama_stack.telemetry.console.ConsoleConfig",
         ),
     ]
diff --git a/llama_toolchain/telemetry/tracing.py b/llama_stack/telemetry/tracing.py
similarity index 99%
rename from llama_toolchain/telemetry/tracing.py
rename to llama_stack/telemetry/tracing.py
index 6afe5c2fb..8375ff32c 100644
--- a/llama_toolchain/telemetry/tracing.py
+++ b/llama_stack/telemetry/tracing.py
@@ -15,7 +15,7 @@ from functools import wraps
 from typing import Any, Dict, List
 
 
-from llama_toolchain.telemetry.api import *  # noqa: F403
+from llama_stack.telemetry.api import *  # noqa: F403
 
 
 def generate_short_uuid(len: int = 12):
diff --git a/llama_toolchain/tools/custom/__init__.py b/llama_stack/tools/__init__.py
similarity index 100%
rename from llama_toolchain/tools/custom/__init__.py
rename to llama_stack/tools/__init__.py
diff --git a/llama_toolchain/tools/base.py b/llama_stack/tools/base.py
similarity index 90%
rename from llama_toolchain/tools/base.py
rename to llama_stack/tools/base.py
index 324cce0e2..f81085a15 100644
--- a/llama_toolchain/tools/base.py
+++ b/llama_stack/tools/base.py
@@ -7,7 +7,7 @@
 from abc import ABC, abstractmethod
 from typing import List
 
-from llama_toolchain.inference.api import Message
+from llama_stack.inference.api import Message
 
 
 class BaseTool(ABC):
diff --git a/llama_toolchain/tools/builtin.py b/llama_stack/tools/builtin.py
similarity index 99%
rename from llama_toolchain/tools/builtin.py
rename to llama_stack/tools/builtin.py
index 56fda3723..45ac97b88 100644
--- a/llama_toolchain/tools/builtin.py
+++ b/llama_stack/tools/builtin.py
@@ -21,8 +21,8 @@ from .ipython_tool.code_execution import (
     TOOLS_ATTACHMENT_KEY_REGEX,
 )
 
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.agentic_system.api import *  # noqa: F403
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.agentic_system.api import *  # noqa: F403
 
 from .base import BaseTool
 
diff --git a/llama_toolchain/tools/ipython_tool/__init__.py b/llama_stack/tools/custom/__init__.py
similarity index 100%
rename from llama_toolchain/tools/ipython_tool/__init__.py
rename to llama_stack/tools/custom/__init__.py
diff --git a/llama_toolchain/tools/custom/datatypes.py b/llama_stack/tools/custom/datatypes.py
similarity index 97%
rename from llama_toolchain/tools/custom/datatypes.py
rename to llama_stack/tools/custom/datatypes.py
index 05b142d6f..8ae1d678d 100644
--- a/llama_toolchain/tools/custom/datatypes.py
+++ b/llama_stack/tools/custom/datatypes.py
@@ -10,7 +10,7 @@ from abc import abstractmethod
 from typing import Dict, List
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.agentic_system.api import *  # noqa: F403
+from llama_stack.agentic_system.api import *  # noqa: F403
 
 
 class CustomTool:
diff --git a/llama_stack/tools/ipython_tool/__init__.py b/llama_stack/tools/ipython_tool/__init__.py
new file mode 100644
index 000000000..756f351d8
--- /dev/null
+++ b/llama_stack/tools/ipython_tool/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
diff --git a/llama_toolchain/tools/ipython_tool/code_env_prefix.py b/llama_stack/tools/ipython_tool/code_env_prefix.py
similarity index 100%
rename from llama_toolchain/tools/ipython_tool/code_env_prefix.py
rename to llama_stack/tools/ipython_tool/code_env_prefix.py
diff --git a/llama_toolchain/tools/ipython_tool/code_execution.py b/llama_stack/tools/ipython_tool/code_execution.py
similarity index 100%
rename from llama_toolchain/tools/ipython_tool/code_execution.py
rename to llama_stack/tools/ipython_tool/code_execution.py
diff --git a/llama_toolchain/tools/ipython_tool/matplotlib_custom_backend.py b/llama_stack/tools/ipython_tool/matplotlib_custom_backend.py
similarity index 100%
rename from llama_toolchain/tools/ipython_tool/matplotlib_custom_backend.py
rename to llama_stack/tools/ipython_tool/matplotlib_custom_backend.py
diff --git a/llama_toolchain/tools/ipython_tool/utils.py b/llama_stack/tools/ipython_tool/utils.py
similarity index 100%
rename from llama_toolchain/tools/ipython_tool/utils.py
rename to llama_stack/tools/ipython_tool/utils.py
diff --git a/llama_toolchain/tools/safety.py b/llama_stack/tools/safety.py
similarity index 88%
rename from llama_toolchain/tools/safety.py
rename to llama_stack/tools/safety.py
index 24051af8a..5a33bbadb 100644
--- a/llama_toolchain/tools/safety.py
+++ b/llama_stack/tools/safety.py
@@ -6,10 +6,10 @@
 
 from typing import List
 
-from llama_toolchain.agentic_system.meta_reference.safety import ShieldRunnerMixin
+from llama_stack.agentic_system.meta_reference.safety import ShieldRunnerMixin
 
-from llama_toolchain.inference.api import Message
-from llama_toolchain.safety.api import Safety, ShieldDefinition
+from llama_stack.inference.api import Message
+from llama_stack.safety.api import Safety, ShieldDefinition
 
 from .builtin import BaseTool
 
diff --git a/llama_toolchain/agentic_system/__init__.py b/llama_toolchain/agentic_system/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/llama_toolchain/stack.py b/llama_toolchain/stack.py
deleted file mode 100644
index 1e2976ab3..000000000
--- a/llama_toolchain/stack.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.agentic_system.api import *  # noqa: F403
-from llama_toolchain.dataset.api import *  # noqa: F403
-from llama_toolchain.evaluations.api import *  # noqa: F403
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.batch_inference.api import *  # noqa: F403
-from llama_toolchain.memory.api import *  # noqa: F403
-from llama_toolchain.telemetry.api import *  # noqa: F403
-from llama_toolchain.post_training.api import *  # noqa: F403
-from llama_toolchain.reward_scoring.api import *  # noqa: F403
-from llama_toolchain.synthetic_data_generation.api import *  # noqa: F403
-from llama_toolchain.safety.api import *  # noqa: F403
-
-
-class LlamaStack(
-    Inference,
-    BatchInference,
-    AgenticSystem,
-    RewardScoring,
-    Safety,
-    SyntheticDataGeneration,
-    Datasets,
-    Telemetry,
-    PostTraining,
-    Memory,
-    Evaluations,
-):
-    pass
diff --git a/rfcs/RFC-0001-llama-stack.md b/rfcs/RFC-0001-llama-stack.md
index 805e8cd84..a5fd83075 100644
--- a/rfcs/RFC-0001-llama-stack.md
+++ b/rfcs/RFC-0001-llama-stack.md
@@ -1,19 +1,19 @@
 # The Llama Stack API
 
 **Authors:**
-* Meta: @raghotham, @ashwinb, @hjshah, @jspisak 
+* Meta: @raghotham, @ashwinb, @hjshah, @jspisak
 
 ## Summary
 As part of the Llama 3.1 release, Meta is releasing an RFC for ‘Llama Stack’, a comprehensive set of interfaces / API for ML developers building on top of Llama foundation models. We are looking for feedback on where the API can be improved, any corner cases we may have missed and your general thoughts on how useful this will be. Ultimately, our hope is to create a standard for working with Llama models in order to simplify the developer experience and foster innovation across the Llama ecosystem.
 
 ## Motivation
-Llama models were always intended to work as part of an overall system that can orchestrate several components, including calling external tools. Our vision is to go beyond the foundation models and give developers access to a broader system that gives them the flexibility to design and create custom offerings that align with their vision. This thinking started last year when we first introduced a system-level safety model. Meta has continued to release new components for orchestration at the system level and, most recently in Llama 3.1, we’ve introduced the Llama Guard 3 safety model that is multilingual, a prompt injection filter, Prompt Guard and refreshed v3 of our CyberSec Evals. We are also releasing a reference implementation of an agentic system to demonstrate how all the pieces fit together. 
+Llama models were always intended to work as part of an overall system that can orchestrate several components, including calling external tools. Our vision is to go beyond the foundation models and give developers access to a broader system that gives them the flexibility to design and create custom offerings that align with their vision. This thinking started last year when we first introduced a system-level safety model. Meta has continued to release new components for orchestration at the system level and, most recently in Llama 3.1, we’ve introduced the Llama Guard 3 safety model that is multilingual, a prompt injection filter, Prompt Guard and refreshed v3 of our CyberSec Evals. We are also releasing a reference implementation of an agentic system to demonstrate how all the pieces fit together.
 
-While building the reference implementation, we realized that having a clean and consistent way to interface between components could be valuable not only for us but for anyone leveraging Llama models and other components as part of their system. We’ve also heard from the community as they face a similar challenge as components exist with overlapping functionality and there are incompatible interfaces and yet don't cover the end-to-end model life cycle. 
+While building the reference implementation, we realized that having a clean and consistent way to interface between components could be valuable not only for us but for anyone leveraging Llama models and other components as part of their system. We’ve also heard from the community as they face a similar challenge as components exist with overlapping functionality and there are incompatible interfaces and yet don't cover the end-to-end model life cycle.
 
 With these motivations, we engaged folks in industry, startups, and the broader developer community to help better define the interfaces of these components. We’re releasing this Llama Stack RFC as a set of standardized and opinionated interfaces for how to surface canonical toolchain components (like inference, fine-tuning, evals, synthetic data generation) and agentic applications to ML developers. Our hope is to have these become well adopted across the ecosystem, which should help with easier interoperability. We would like for builders of multiple components to provide implementations to these standard APIs so that there can be vertically integrated “distributions” of the Llama Stack that can work out of the box easily.
 
-We welcome feedback and ways to improve the proposal. We’re excited to grow the ecosystem around Llama and lower barriers for both developers and platform providers. 
+We welcome feedback and ways to improve the proposal. We’re excited to grow the ecosystem around Llama and lower barriers for both developers and platform providers.
 
 ## Design decisions
 Meta releases weights of both the pretrained and instruction fine-tuned Llama models to support several use cases. These weights can be improved  -  fine tuned and aligned - with curated datasets to then be deployed for inference to support specific applications. The curated datasets can be produced manually by humans or synthetically by other models or by leveraging human feedback by collecting usage data of the application itself. This results in a continuous improvement cycle where the model gets better over time. This is the model life cycle.
@@ -42,8 +42,8 @@ Note that as of today, in the OSS world, such a “loop” is often coded explic
 
 **Let's consider an example:**
 1. The user asks the system "Who played the NBA finals last year?"
-1. The model "understands" that this question needs to be answered using web search. It answers this abstractly with a message of the form "Please call the search tool for me with the query: 'List finalist teams for NBA in the last year' ". Note that the model by itself does not call the tool (of course!) 
-1. The executor consults the set of tool implementations which have been configured by the developer to find an implementation for the "search tool". If it does not find it, it returns an error to the model. Otherwise, it executes this tool and returns the result of this tool back to the model. 
+1. The model "understands" that this question needs to be answered using web search. It answers this abstractly with a message of the form "Please call the search tool for me with the query: 'List finalist teams for NBA in the last year' ". Note that the model by itself does not call the tool (of course!)
+1. The executor consults the set of tool implementations which have been configured by the developer to find an implementation for the "search tool". If it does not find it, it returns an error to the model. Otherwise, it executes this tool and returns the result of this tool back to the model.
 1. The model reasons once again (using all the messages above) and decides to send a final response "In 2023, Denver Nuggets played against the Miami Heat in the NBA finals." to the executor
 1. The executor returns the response directly to the user (since there is no tool call to be executed.)
 
@@ -73,14 +73,14 @@ The API is defined in the [YAML](RFC-0001-llama-stack-assets/llama-stack-spec.ya
 
 ## Sample implementations
 
-To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-agentic-system](https://github.com/meta-llama/llama-agentic-system) repository contains [6 different examples](https://github.com/meta-llama/llama-agentic-system/tree/main/examples/scripts) ranging from very basic to a multi turn agent. 
+To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-agentic-system](https://github.com/meta-llama/llama-agentic-system) repository contains [6 different examples](https://github.com/meta-llama/llama-agentic-system/tree/main/examples/scripts) ranging from very basic to a multi turn agent.
 
-There is also a sample inference endpoint implementation in the [llama-toolchain](https://github.com/meta-llama/llama-toolchain/blob/main/llama_toolchain/inference/server.py) repository.
+There is also a sample inference endpoint implementation in the [llama-toolchain](https://github.com/meta-llama/llama-toolchain/blob/main/llama_stack/inference/server.py) repository.
 
 
 ## Limitations
 The reference implementation for Llama Stack APIs to date only includes sample implementations using the inference API. We are planning to flesh out the design of Llama Stack Distributions (distros) by combining capabilities from different providers into a single vertically integrated stack. We plan to implement other APIs and, of course, we’d love contributions!!
 
-Thank you in advance for your feedback, support and contributions to make this a better API. 
+Thank you in advance for your feedback, support and contributions to make this a better API.
 
 Cheers!
diff --git a/rfcs/openapi_generator/README.md b/rfcs/openapi_generator/README.md
index 023486534..9d407905d 100644
--- a/rfcs/openapi_generator/README.md
+++ b/rfcs/openapi_generator/README.md
@@ -1,4 +1,4 @@
-The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_toolchain/[<subdir>]/api/endpoints.py` using the `generate.py` utility.
+The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/[<subdir>]/api/endpoints.py` using the `generate.py` utility.
 
 Please install the following packages before running the script:
 
diff --git a/rfcs/openapi_generator/generate.py b/rfcs/openapi_generator/generate.py
index 279389a47..bdc67fbed 100644
--- a/rfcs/openapi_generator/generate.py
+++ b/rfcs/openapi_generator/generate.py
@@ -31,7 +31,7 @@ from .pyopenapi.utility import Specification
 
 schema_utils.json_schema_type = json_schema_type
 
-from llama_toolchain.stack import LlamaStack
+from llama_stack.stack import LlamaStack
 
 
 # TODO: this should be fixed in the generator itself so it reads appropriate annotations
diff --git a/setup.py b/setup.py
index 7273bee51..f7f06bdf4 100644
--- a/setup.py
+++ b/setup.py
@@ -15,15 +15,15 @@ def read_requirements():
 
 
 setup(
-    name="llama_toolchain",
+    name="llama_stack",
     version="0.0.16",
     author="Meta Llama",
     author_email="llama-oss@meta.com",
     description="Llama toolchain",
     entry_points={
         "console_scripts": [
-            "llama = llama_toolchain.cli.llama:main",
-            "install-wheel-from-presigned = llama_toolchain.cli.scripts.run:install_wheel_from_presigned",
+            "llama = llama_stack.cli.llama:main",
+            "install-wheel-from-presigned = llama_stack.cli.scripts.run:install_wheel_from_presigned",
         ]
     },
     long_description=open("README.md").read(),
diff --git a/tests/example_custom_tool.py b/tests/example_custom_tool.py
index ec338982e..f03f18e39 100644
--- a/tests/example_custom_tool.py
+++ b/tests/example_custom_tool.py
@@ -7,7 +7,7 @@
 from typing import Dict
 
 from llama_models.llama3.api.datatypes import ToolParamDefinition
-from llama_toolchain.tools.custom.datatypes import SingleMessageCustomTool
+from llama_stack.tools.custom.datatypes import SingleMessageCustomTool
 
 
 class GetBoilingPointTool(SingleMessageCustomTool):
diff --git a/tests/test_e2e.py b/tests/test_e2e.py
index ea0246f20..24fc651bd 100644
--- a/tests/test_e2e.py
+++ b/tests/test_e2e.py
@@ -11,12 +11,12 @@
 import os
 import unittest
 
-from llama_toolchain.agentic_system.event_logger import EventLogger, LogEvent
-from llama_toolchain.agentic_system.utils import get_agent_system_instance
+from llama_stack.agentic_system.event_logger import EventLogger, LogEvent
+from llama_stack.agentic_system.utils import get_agent_system_instance
 
 from llama_models.llama3.api.datatypes import *  # noqa: F403
-from llama_toolchain.agentic_system.api.datatypes import StepType
-from llama_toolchain.tools.custom.datatypes import CustomTool
+from llama_stack.agentic_system.api.datatypes import StepType
+from llama_stack.tools.custom.datatypes import CustomTool
 
 from tests.example_custom_tool import GetBoilingPointTool
 
diff --git a/tests/test_inference.py b/tests/test_inference.py
index 800046355..ba062046d 100644
--- a/tests/test_inference.py
+++ b/tests/test_inference.py
@@ -1,3 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 # Run this test using the following command:
 # python -m unittest tests/test_inference.py
 
@@ -19,12 +25,12 @@ from llama_models.llama3.api.datatypes import (
     UserMessage,
 )
 
-from llama_toolchain.inference.api import (
+from llama_stack.inference.api import (
     ChatCompletionRequest,
     ChatCompletionResponseEventType,
 )
-from llama_toolchain.inference.meta_reference.config import MetaReferenceImplConfig
-from llama_toolchain.inference.meta_reference.inference import get_provider_impl
+from llama_stack.inference.meta_reference.config import MetaReferenceImplConfig
+from llama_stack.inference.meta_reference.inference import get_provider_impl
 
 
 MODEL = "Meta-Llama3.1-8B-Instruct"
diff --git a/tests/test_ollama_inference.py b/tests/test_ollama_inference.py
index c3cef3a10..878e52991 100644
--- a/tests/test_ollama_inference.py
+++ b/tests/test_ollama_inference.py
@@ -1,3 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 import textwrap
 import unittest
 from datetime import datetime
@@ -14,12 +20,12 @@ from llama_models.llama3.api.datatypes import (
     ToolResponseMessage,
     UserMessage,
 )
-from llama_toolchain.inference.api import (
+from llama_stack.inference.api import (
     ChatCompletionRequest,
     ChatCompletionResponseEventType,
 )
-from llama_toolchain.inference.ollama.config import OllamaImplConfig
-from llama_toolchain.inference.ollama.ollama import get_provider_impl
+from llama_stack.inference.ollama.config import OllamaImplConfig
+from llama_stack.inference.ollama.ollama import get_provider_impl
 
 
 class OllamaInferenceTests(unittest.IsolatedAsyncioTestCase):
diff --git a/tests/test_prepare_messages.py b/tests/test_prepare_messages.py
index 49624b04d..df3473b4c 100644
--- a/tests/test_prepare_messages.py
+++ b/tests/test_prepare_messages.py
@@ -1,8 +1,14 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
 import unittest
 
 from llama_models.llama3.api import *  # noqa: F403
-from llama_toolchain.inference.api import *  # noqa: F403
-from llama_toolchain.inference.prepare_messages import prepare_messages
+from llama_stack.inference.api import *  # noqa: F403
+from llama_stack.inference.prepare_messages import prepare_messages
 
 MODEL = "Meta-Llama3.1-8B-Instruct"