From 2cf731faea3ea6250eda938010793e1bf9912a6f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 16 Sep 2024 17:21:08 -0700 Subject: [PATCH] llama_toolchain -> llama_stack --- MANIFEST.in | 6 ++-- docs/cli_reference.md | 20 +++++------ docs/getting_started.md | 22 ++++++------ {llama_toolchain => llama_stack}/__init__.py | 0 .../agentic_system}/__init__.py | 0 .../agentic_system/api/__init__.py | 0 .../agentic_system/api/api.py | 8 ++--- .../agentic_system/client.py | 4 +-- .../agentic_system/event_logger.py | 5 +-- .../execute_with_custom_tools.py | 10 +++--- .../agentic_system/meta_reference/__init__.py | 2 +- .../meta_reference/agent_instance.py | 12 +++---- .../meta_reference/agentic_system.py | 12 +++---- .../agentic_system/meta_reference/config.py | 0 .../meta_reference/rag/context_retriever.py | 4 +-- .../agentic_system/meta_reference/safety.py | 2 +- .../agentic_system/providers.py | 6 ++-- .../batch_inference}/__init__.py | 0 .../batch_inference/api/__init__.py | 0 .../batch_inference/api/api.py | 2 +- .../scripts => llama_stack/cli}/__init__.py | 0 .../cli/download.py | 8 ++--- {llama_toolchain => llama_stack}/cli/llama.py | 0 .../cli/model/__init__.py | 0 .../cli/model/describe.py | 8 ++--- .../cli/model/download.py | 4 +-- .../cli/model/list.py | 4 +-- .../cli/model/model.py | 10 +++--- .../cli/model/template.py | 4 +-- .../cli/scripts}/__init__.py | 0 .../scripts/install-wheel-from-presigned.sh | 0 .../cli/scripts/run.py | 0 .../cli/stack/__init__.py | 0 .../cli/stack/build.py | 22 ++++++------ .../cli/stack/configure.py | 16 ++++----- .../cli/stack/list_apis.py | 6 ++-- .../cli/stack/list_providers.py | 8 ++--- .../cli/stack/run.py | 10 +++--- .../cli/stack/stack.py | 2 +- .../cli/subcommand.py | 0 {llama_toolchain => llama_stack}/cli/table.py | 0 .../core => llama_stack/common}/__init__.py | 0 .../common/config_dirs.py | 0 .../common/deployment_types.py | 0 .../common/exec.py | 0 .../common/model_utils.py | 0 .../common/prompt_for_config.py | 0 .../common/serialize.py | 0 .../common/training_types.py | 0 .../conda/local-conda-example-build.yaml | 2 +- .../local-fireworks-conda-example-build.yaml | 0 .../local-ollama-conda-example-build.yaml | 0 .../conda/local-tgi-conda-example-build.yaml | 0 .../local-together-conda-example-build.yaml | 0 .../docker/local-docker-example-build.yaml | 2 +- .../core}/__init__.py | 0 .../core/build_conda_env.sh | 0 .../core/build_container.sh | 2 +- .../core/common.sh | 0 .../core/configure.py | 8 ++--- .../core/configure_container.sh | 0 .../core/datatypes.py | 4 +-- .../core/distribution.py | 12 +++---- .../core/dynamic.py | 2 +- .../core/package.py | 10 +++--- .../core/server.py | 6 ++-- .../core/start_conda_env.sh | 2 +- .../core/start_container.sh | 2 +- .../dataset/api/__init__.py | 0 .../dataset/api/api.py | 0 .../evaluations/api/__init__.py | 0 .../evaluations/api/api.py | 4 +-- .../inference}/__init__.py | 0 .../inference/adapters}/__init__.py | 0 .../inference/adapters/fireworks/__init__.py | 0 .../inference/adapters/fireworks/config.py | 0 .../inference/adapters/fireworks/fireworks.py | 4 +-- .../inference/adapters/ollama/__init__.py | 2 +- .../inference/adapters/ollama/ollama.py | 4 +-- .../inference/adapters/tgi/__init__.py | 0 .../inference/adapters/tgi/config.py | 0 .../inference/adapters/tgi/tgi.py | 4 +-- .../inference/adapters/together/__init__.py | 0 .../inference/adapters/together/config.py | 0 .../inference/adapters/together/together.py | 4 +-- .../inference/api/__init__.py | 0 .../inference/api/api.py | 0 .../inference/client.py | 4 +-- .../inference/event_logger.py | 5 +-- .../inference/meta_reference/__init__.py | 0 .../inference/meta_reference/config.py | 2 +- .../inference/meta_reference/generation.py | 6 ++-- .../inference/meta_reference/inference.py | 6 ++-- .../meta_reference/model_parallel.py | 0 .../meta_reference/parallel_utils.py | 0 .../inference/prepare_messages.py | 2 +- .../inference/providers.py | 20 +++++------ .../inference/quantization/fp8_impls.py | 0 .../inference/quantization/loader.py | 4 +-- .../quantization/scripts/build_conda.sh | 0 .../scripts/quantize_checkpoint.py | 0 .../scripts/run_quantize_checkpoint.sh | 0 .../inference/quantization/test_fp8.py | 0 .../common => llama_stack/memory}/__init__.py | 0 .../memory/adapters/chroma/__init__.py | 2 +- .../memory/adapters/chroma/chroma.py | 4 +-- .../memory/adapters/pgvector/__init__.py | 0 .../memory/adapters/pgvector/config.py | 0 .../memory/adapters/pgvector/pgvector.py | 4 +-- .../memory/api/__init__.py | 0 .../memory/api/api.py | 0 .../memory/client.py | 2 +- .../memory/common}/__init__.py | 0 .../memory/common/file_utils.py | 0 .../memory/common/vector_store.py | 2 +- .../memory/meta_reference}/__init__.py | 0 .../memory/meta_reference/faiss/__init__.py | 0 .../memory/meta_reference/faiss/config.py | 0 .../memory/meta_reference/faiss/faiss.py | 6 ++-- .../memory/providers.py | 12 +++---- .../memory/router/__init__.py | 2 +- .../memory/router/router.py | 4 +-- .../models/api/endpoints.py | 0 .../post_training/api/__init__.py | 0 .../post_training/api/api.py | 4 +-- .../reward_scoring/api/__init__.py | 0 .../reward_scoring/api/api.py | 0 .../safety}/__init__.py | 0 .../safety/api/__init__.py | 0 .../safety/api/api.py | 2 +- .../safety/client.py | 4 +-- .../safety/meta_reference/__init__.py | 0 .../safety/meta_reference/config.py | 0 .../safety/meta_reference/safety.py | 4 +-- .../safety/meta_reference/shields/__init__.py | 0 .../safety/meta_reference/shields/base.py | 2 +- .../meta_reference/shields/code_scanner.py | 2 +- .../shields/contrib}/__init__.py | 0 .../shields/contrib/third_party_shield.py | 2 +- .../meta_reference/shields/llama_guard.py | 2 +- .../meta_reference/shields/prompt_guard.py | 2 +- .../safety/providers.py | 6 ++-- llama_stack/stack.py | 34 +++++++++++++++++++ .../synthetic_data_generation/api/__init__.py | 0 .../synthetic_data_generation/api/api.py | 2 +- .../telemetry}/__init__.py | 0 .../telemetry/api/__init__.py | 0 .../telemetry/api/api.py | 0 .../telemetry/console/__init__.py | 0 .../telemetry/console/config.py | 0 .../telemetry/console/console.py | 2 +- .../telemetry/providers.py | 6 ++-- .../telemetry/tracing.py | 2 +- .../custom => llama_stack/tools}/__init__.py | 0 .../tools/base.py | 2 +- .../tools/builtin.py | 4 +-- .../tools/custom}/__init__.py | 0 .../tools/custom/datatypes.py | 2 +- llama_stack/tools/ipython_tool/__init__.py | 5 +++ .../tools/ipython_tool/code_env_prefix.py | 0 .../tools/ipython_tool/code_execution.py | 0 .../ipython_tool/matplotlib_custom_backend.py | 0 .../tools/ipython_tool/utils.py | 0 .../tools/safety.py | 6 ++-- llama_toolchain/agentic_system/__init__.py | 0 llama_toolchain/stack.py | 34 ------------------- rfcs/RFC-0001-llama-stack.md | 18 +++++----- rfcs/openapi_generator/README.md | 2 +- rfcs/openapi_generator/generate.py | 2 +- setup.py | 6 ++-- tests/example_custom_tool.py | 2 +- tests/test_e2e.py | 8 ++--- tests/test_inference.py | 12 +++++-- tests/test_ollama_inference.py | 12 +++++-- tests/test_prepare_messages.py | 10 ++++-- 175 files changed, 300 insertions(+), 279 deletions(-) rename {llama_toolchain => llama_stack}/__init__.py (100%) rename {llama_toolchain/batch_inference => llama_stack/agentic_system}/__init__.py (100%) rename {llama_toolchain => llama_stack}/agentic_system/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/agentic_system/api/api.py (98%) rename {llama_toolchain => llama_stack}/agentic_system/client.py (97%) rename {llama_toolchain => llama_stack}/agentic_system/event_logger.py (98%) rename {llama_toolchain => llama_stack}/agentic_system/execute_with_custom_tools.py (91%) rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/__init__.py (92%) rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/agent_instance.py (98%) rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/agentic_system.py (93%) rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/config.py (100%) rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/rag/context_retriever.py (95%) rename {llama_toolchain => llama_stack}/agentic_system/meta_reference/safety.py (98%) rename {llama_toolchain => llama_stack}/agentic_system/providers.py (76%) rename {llama_toolchain/cli => llama_stack/batch_inference}/__init__.py (100%) rename {llama_toolchain => llama_stack}/batch_inference/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/batch_inference/api/api.py (97%) rename {llama_toolchain/cli/scripts => llama_stack/cli}/__init__.py (100%) rename {llama_toolchain => llama_stack}/cli/download.py (97%) rename {llama_toolchain => llama_stack}/cli/llama.py (100%) rename {llama_toolchain => llama_stack}/cli/model/__init__.py (100%) rename {llama_toolchain => llama_stack}/cli/model/describe.py (93%) rename {llama_toolchain => llama_stack}/cli/model/download.py (83%) rename {llama_toolchain => llama_stack}/cli/model/list.py (94%) rename {llama_toolchain => llama_stack}/cli/model/model.py (73%) rename {llama_toolchain => llama_stack}/cli/model/template.py (97%) rename {llama_toolchain/common => llama_stack/cli/scripts}/__init__.py (100%) rename {llama_toolchain => llama_stack}/cli/scripts/install-wheel-from-presigned.sh (100%) rename {llama_toolchain => llama_stack}/cli/scripts/run.py (100%) rename {llama_toolchain => llama_stack}/cli/stack/__init__.py (100%) rename {llama_toolchain => llama_stack}/cli/stack/build.py (78%) rename {llama_toolchain => llama_stack}/cli/stack/configure.py (90%) rename {llama_toolchain => llama_stack}/cli/stack/list_apis.py (87%) rename {llama_toolchain => llama_stack}/cli/stack/list_providers.py (87%) rename {llama_toolchain => llama_stack}/cli/stack/run.py (91%) rename {llama_toolchain => llama_stack}/cli/stack/stack.py (94%) rename {llama_toolchain => llama_stack}/cli/subcommand.py (100%) rename {llama_toolchain => llama_stack}/cli/table.py (100%) rename {llama_toolchain/core => llama_stack/common}/__init__.py (100%) rename {llama_toolchain => llama_stack}/common/config_dirs.py (100%) rename {llama_toolchain => llama_stack}/common/deployment_types.py (100%) rename {llama_toolchain => llama_stack}/common/exec.py (100%) rename {llama_toolchain => llama_stack}/common/model_utils.py (100%) rename {llama_toolchain => llama_stack}/common/prompt_for_config.py (100%) rename {llama_toolchain => llama_stack}/common/serialize.py (100%) rename {llama_toolchain => llama_stack}/common/training_types.py (100%) rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-conda-example-build.yaml (72%) rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-fireworks-conda-example-build.yaml (100%) rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-ollama-conda-example-build.yaml (100%) rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-tgi-conda-example-build.yaml (100%) rename {llama_toolchain => llama_stack}/configs/distributions/conda/local-together-conda-example-build.yaml (100%) rename {llama_toolchain => llama_stack}/configs/distributions/docker/local-docker-example-build.yaml (72%) rename {llama_toolchain/inference => llama_stack/core}/__init__.py (100%) rename {llama_toolchain => llama_stack}/core/build_conda_env.sh (100%) rename {llama_toolchain => llama_stack}/core/build_container.sh (97%) rename {llama_toolchain => llama_stack}/core/common.sh (100%) rename {llama_toolchain => llama_stack}/core/configure.py (92%) rename {llama_toolchain => llama_stack}/core/configure_container.sh (100%) rename {llama_toolchain => llama_stack}/core/datatypes.py (98%) rename {llama_toolchain => llama_stack}/core/distribution.py (84%) rename {llama_toolchain => llama_stack}/core/dynamic.py (97%) rename {llama_toolchain => llama_stack}/core/package.py (88%) rename {llama_toolchain => llama_stack}/core/server.py (98%) rename {llama_toolchain => llama_stack}/core/start_conda_env.sh (95%) rename {llama_toolchain => llama_stack}/core/start_container.sh (94%) rename {llama_toolchain => llama_stack}/dataset/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/dataset/api/api.py (100%) rename {llama_toolchain => llama_stack}/evaluations/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/evaluations/api/api.py (95%) rename {llama_toolchain/inference/adapters => llama_stack/inference}/__init__.py (100%) rename {llama_toolchain/memory => llama_stack/inference/adapters}/__init__.py (100%) rename {llama_toolchain => llama_stack}/inference/adapters/fireworks/__init__.py (100%) rename {llama_toolchain => llama_stack}/inference/adapters/fireworks/config.py (100%) rename {llama_toolchain => llama_stack}/inference/adapters/fireworks/fireworks.py (98%) rename {llama_toolchain => llama_stack}/inference/adapters/ollama/__init__.py (86%) rename {llama_toolchain => llama_stack}/inference/adapters/ollama/ollama.py (98%) rename {llama_toolchain => llama_stack}/inference/adapters/tgi/__init__.py (100%) rename {llama_toolchain => llama_stack}/inference/adapters/tgi/config.py (100%) rename {llama_toolchain => llama_stack}/inference/adapters/tgi/tgi.py (98%) rename {llama_toolchain => llama_stack}/inference/adapters/together/__init__.py (100%) rename {llama_toolchain => llama_stack}/inference/adapters/together/config.py (100%) rename {llama_toolchain => llama_stack}/inference/adapters/together/together.py (98%) rename {llama_toolchain => llama_stack}/inference/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/inference/api/api.py (100%) rename {llama_toolchain => llama_stack}/inference/client.py (98%) rename {llama_toolchain => llama_stack}/inference/event_logger.py (97%) rename {llama_toolchain => llama_stack}/inference/meta_reference/__init__.py (100%) rename {llama_toolchain => llama_stack}/inference/meta_reference/config.py (96%) rename {llama_toolchain => llama_stack}/inference/meta_reference/generation.py (98%) rename {llama_toolchain => llama_stack}/inference/meta_reference/inference.py (97%) rename {llama_toolchain => llama_stack}/inference/meta_reference/model_parallel.py (100%) rename {llama_toolchain => llama_stack}/inference/meta_reference/parallel_utils.py (100%) rename {llama_toolchain => llama_stack}/inference/prepare_messages.py (97%) rename {llama_toolchain => llama_stack}/inference/providers.py (67%) rename {llama_toolchain => llama_stack}/inference/quantization/fp8_impls.py (100%) rename {llama_toolchain => llama_stack}/inference/quantization/loader.py (97%) rename {llama_toolchain => llama_stack}/inference/quantization/scripts/build_conda.sh (100%) rename {llama_toolchain => llama_stack}/inference/quantization/scripts/quantize_checkpoint.py (100%) rename {llama_toolchain => llama_stack}/inference/quantization/scripts/run_quantize_checkpoint.sh (100%) rename {llama_toolchain => llama_stack}/inference/quantization/test_fp8.py (100%) rename {llama_toolchain/memory/common => llama_stack/memory}/__init__.py (100%) rename {llama_toolchain => llama_stack}/memory/adapters/chroma/__init__.py (86%) rename {llama_toolchain => llama_stack}/memory/adapters/chroma/chroma.py (97%) rename {llama_toolchain => llama_stack}/memory/adapters/pgvector/__init__.py (100%) rename {llama_toolchain => llama_stack}/memory/adapters/pgvector/config.py (100%) rename {llama_toolchain => llama_stack}/memory/adapters/pgvector/pgvector.py (98%) rename {llama_toolchain => llama_stack}/memory/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/memory/api/api.py (100%) rename {llama_toolchain => llama_stack}/memory/client.py (98%) rename {llama_toolchain/memory/meta_reference => llama_stack/memory/common}/__init__.py (100%) rename {llama_toolchain => llama_stack}/memory/common/file_utils.py (100%) rename {llama_toolchain => llama_stack}/memory/common/vector_store.py (98%) rename {llama_toolchain/safety => llama_stack/memory/meta_reference}/__init__.py (100%) rename {llama_toolchain => llama_stack}/memory/meta_reference/faiss/__init__.py (100%) rename {llama_toolchain => llama_stack}/memory/meta_reference/faiss/config.py (100%) rename {llama_toolchain => llama_stack}/memory/meta_reference/faiss/faiss.py (95%) rename {llama_toolchain => llama_stack}/memory/providers.py (69%) rename {llama_toolchain => llama_stack}/memory/router/__init__.py (90%) rename {llama_toolchain => llama_stack}/memory/router/router.py (96%) rename {llama_toolchain => llama_stack}/models/api/endpoints.py (100%) rename {llama_toolchain => llama_stack}/post_training/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/post_training/api/api.py (97%) rename {llama_toolchain => llama_stack}/reward_scoring/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/reward_scoring/api/api.py (100%) rename {llama_toolchain/safety/meta_reference/shields/contrib => llama_stack/safety}/__init__.py (100%) rename {llama_toolchain => llama_stack}/safety/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/safety/api/api.py (96%) rename {llama_toolchain => llama_stack}/safety/client.py (97%) rename {llama_toolchain => llama_stack}/safety/meta_reference/__init__.py (100%) rename {llama_toolchain => llama_stack}/safety/meta_reference/config.py (100%) rename {llama_toolchain => llama_stack}/safety/meta_reference/safety.py (96%) rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/__init__.py (100%) rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/base.py (97%) rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/code_scanner.py (95%) rename {llama_toolchain/telemetry => llama_stack/safety/meta_reference/shields/contrib}/__init__.py (100%) rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/contrib/third_party_shield.py (93%) rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/llama_guard.py (99%) rename {llama_toolchain => llama_stack}/safety/meta_reference/shields/prompt_guard.py (99%) rename {llama_toolchain => llama_stack}/safety/providers.py (71%) create mode 100644 llama_stack/stack.py rename {llama_toolchain => llama_stack}/synthetic_data_generation/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/synthetic_data_generation/api/api.py (96%) rename {llama_toolchain/tools => llama_stack/telemetry}/__init__.py (100%) rename {llama_toolchain => llama_stack}/telemetry/api/__init__.py (100%) rename {llama_toolchain => llama_stack}/telemetry/api/api.py (100%) rename {llama_toolchain => llama_stack}/telemetry/console/__init__.py (100%) rename {llama_toolchain => llama_stack}/telemetry/console/config.py (100%) rename {llama_toolchain => llama_stack}/telemetry/console/console.py (97%) rename {llama_toolchain => llama_stack}/telemetry/providers.py (69%) rename {llama_toolchain => llama_stack}/telemetry/tracing.py (99%) rename {llama_toolchain/tools/custom => llama_stack/tools}/__init__.py (100%) rename {llama_toolchain => llama_stack}/tools/base.py (90%) rename {llama_toolchain => llama_stack}/tools/builtin.py (99%) rename {llama_toolchain/tools/ipython_tool => llama_stack/tools/custom}/__init__.py (100%) rename {llama_toolchain => llama_stack}/tools/custom/datatypes.py (97%) create mode 100644 llama_stack/tools/ipython_tool/__init__.py rename {llama_toolchain => llama_stack}/tools/ipython_tool/code_env_prefix.py (100%) rename {llama_toolchain => llama_stack}/tools/ipython_tool/code_execution.py (100%) rename {llama_toolchain => llama_stack}/tools/ipython_tool/matplotlib_custom_backend.py (100%) rename {llama_toolchain => llama_stack}/tools/ipython_tool/utils.py (100%) rename {llama_toolchain => llama_stack}/tools/safety.py (88%) delete mode 100644 llama_toolchain/agentic_system/__init__.py delete mode 100644 llama_toolchain/stack.py diff --git a/MANIFEST.in b/MANIFEST.in index 4b76f85fe..bacea3148 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,4 @@ include requirements.txt -include llama_toolchain/data/*.yaml -include llama_toolchain/core/*.sh -include llama_toolchain/cli/scripts/*.sh +include llama_stack/data/*.yaml +include llama_stack/core/*.sh +include llama_stack/cli/scripts/*.sh diff --git a/docs/cli_reference.md b/docs/cli_reference.md index 8921fc941..e62d03117 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -276,16 +276,16 @@ The following command and specifications allows you to get started with building ``` llama stack build ``` -- You will be required to pass in a file path to the build.config file (e.g. `./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_toolchain/configs/distributions/` folder. +- You will be required to pass in a file path to the build.config file (e.g. `./llama_stack/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_stack/configs/distributions/` folder. The file will be of the contents ``` -$ cat ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml +$ cat ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml name: 8b-instruct distribution_spec: distribution_type: local - description: Use code from `llama_toolchain` itself to serve all llama stack APIs + description: Use code from `llama_stack` itself to serve all llama stack APIs docker_image: null providers: inference: meta-reference @@ -311,7 +311,7 @@ After this step is complete, a file named `8b-instruct-build.yaml` will be gener To specify a different API provider, we can change the `distribution_spec` in our `-build.yaml` config. For example, the following build spec allows you to build a distribution using TGI as the inference API provider. ``` -$ cat ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml +$ cat ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml name: local-tgi-conda-example distribution_spec: @@ -328,7 +328,7 @@ image_type: conda The following command allows you to build a distribution with TGI as the inference API provider, with the name `tgi`. ``` -llama stack build --config ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi +llama stack build --config ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi ``` We provide some example build configs to help you get started with building with different API providers. @@ -337,11 +337,11 @@ We provide some example build configs to help you get started with building with To build a docker image, simply change the `image_type` to `docker` in our `-build.yaml` file, and run `llama stack build --config -build.yaml`. ``` -$ cat ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml +$ cat ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml name: local-docker-example distribution_spec: - description: Use code from `llama_toolchain` itself to serve all llama stack APIs + description: Use code from `llama_stack` itself to serve all llama stack APIs docker_image: null providers: inference: meta-reference @@ -354,7 +354,7 @@ image_type: docker The following command allows you to build a Docker image with the name `docker-local` ``` -llama stack build --config ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml --name docker-local +llama stack build --config ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml --name docker-local Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim WORKDIR /app @@ -482,7 +482,7 @@ Once the server is setup, we can test it with a client to see the example output cd /path/to/llama-stack conda activate # any environment containing the llama-toolchain pip package will work -python -m llama_toolchain.inference.client localhost 5000 +python -m llama_stack.inference.client localhost 5000 ``` This will run the chat completion client and query the distribution’s /inference/chat_completion API. @@ -500,7 +500,7 @@ You know what's even more hilarious? People like you who think they can just Goo Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by: ``` -python -m llama_toolchain.safety.client localhost 5000 +python -m llama_stack.safety.client localhost 5000 ``` You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo. diff --git a/docs/getting_started.md b/docs/getting_started.md index a312b8f33..fe6b1c573 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -9,7 +9,7 @@ This guides allows you to quickly get started with building and running a Llama **`llama stack build`** ``` -llama stack build --config ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml --name my-local-llama-stack +llama stack build --config ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml --name my-local-llama-stack ... ... Build spec configuration saved at ~/.llama/distributions/conda/my-local-llama-stack-build.yaml @@ -97,16 +97,16 @@ The following command and specifications allows you to get started with building ``` llama stack build ``` -- You will be required to pass in a file path to the build.config file (e.g. `./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_toolchain/configs/distributions/` folder. +- You will be required to pass in a file path to the build.config file (e.g. `./llama_stack/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_stack/configs/distributions/` folder. The file will be of the contents ``` -$ cat ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml +$ cat ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml name: 8b-instruct distribution_spec: distribution_type: local - description: Use code from `llama_toolchain` itself to serve all llama stack APIs + description: Use code from `llama_stack` itself to serve all llama stack APIs docker_image: null providers: inference: meta-reference @@ -132,7 +132,7 @@ After this step is complete, a file named `8b-instruct-build.yaml` will be gener To specify a different API provider, we can change the `distribution_spec` in our `-build.yaml` config. For example, the following build spec allows you to build a distribution using TGI as the inference API provider. ``` -$ cat ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml +$ cat ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml name: local-tgi-conda-example distribution_spec: @@ -149,7 +149,7 @@ image_type: conda The following command allows you to build a distribution with TGI as the inference API provider, with the name `tgi`. ``` -llama stack build --config ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi +llama stack build --config ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi ``` We provide some example build configs to help you get started with building with different API providers. @@ -158,11 +158,11 @@ We provide some example build configs to help you get started with building with To build a docker image, simply change the `image_type` to `docker` in our `-build.yaml` file, and run `llama stack build --config -build.yaml`. ``` -$ cat ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml +$ cat ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml name: local-docker-example distribution_spec: - description: Use code from `llama_toolchain` itself to serve all llama stack APIs + description: Use code from `llama_stack` itself to serve all llama stack APIs docker_image: null providers: inference: meta-reference @@ -175,7 +175,7 @@ image_type: docker The following command allows you to build a Docker image with the name `docker-local` ``` -llama stack build --config ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml --name docker-local +llama stack build --config ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml --name docker-local Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim WORKDIR /app @@ -296,7 +296,7 @@ Once the server is setup, we can test it with a client to see the example output cd /path/to/llama-stack conda activate # any environment containing the llama-toolchain pip package will work -python -m llama_toolchain.inference.client localhost 5000 +python -m llama_stack.inference.client localhost 5000 ``` This will run the chat completion client and query the distribution’s /inference/chat_completion API. @@ -314,7 +314,7 @@ You know what's even more hilarious? People like you who think they can just Goo Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by: ``` -python -m llama_toolchain.safety.client localhost 5000 +python -m llama_stack.safety.client localhost 5000 ``` You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo. diff --git a/llama_toolchain/__init__.py b/llama_stack/__init__.py similarity index 100% rename from llama_toolchain/__init__.py rename to llama_stack/__init__.py diff --git a/llama_toolchain/batch_inference/__init__.py b/llama_stack/agentic_system/__init__.py similarity index 100% rename from llama_toolchain/batch_inference/__init__.py rename to llama_stack/agentic_system/__init__.py diff --git a/llama_toolchain/agentic_system/api/__init__.py b/llama_stack/agentic_system/api/__init__.py similarity index 100% rename from llama_toolchain/agentic_system/api/__init__.py rename to llama_stack/agentic_system/api/__init__.py diff --git a/llama_toolchain/agentic_system/api/api.py b/llama_stack/agentic_system/api/api.py similarity index 98% rename from llama_toolchain/agentic_system/api/api.py rename to llama_stack/agentic_system/api/api.py index 95af3727b..50920315f 100644 --- a/llama_toolchain/agentic_system/api/api.py +++ b/llama_stack/agentic_system/api/api.py @@ -14,10 +14,10 @@ from pydantic import BaseModel, ConfigDict, Field from typing_extensions import Annotated from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.common.deployment_types import * # noqa: F403 -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.safety.api import * # noqa: F403 -from llama_toolchain.memory.api import * # noqa: F403 +from llama_stack.common.deployment_types import * # noqa: F403 +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.safety.api import * # noqa: F403 +from llama_stack.memory.api import * # noqa: F403 @json_schema_type diff --git a/llama_toolchain/agentic_system/client.py b/llama_stack/agentic_system/client.py similarity index 97% rename from llama_toolchain/agentic_system/client.py rename to llama_stack/agentic_system/client.py index 52cf0dee2..49d887439 100644 --- a/llama_toolchain/agentic_system/client.py +++ b/llama_stack/agentic_system/client.py @@ -16,7 +16,7 @@ from pydantic import BaseModel from termcolor import cprint from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.core.datatypes import RemoteProviderConfig +from llama_stack.core.datatypes import RemoteProviderConfig from .api import * # noqa: F403 from .event_logger import EventLogger @@ -186,7 +186,7 @@ async def run_rag(host: str, port: int): ] # Alternatively, you can pre-populate the memory bank with documents for example, - # using `llama_toolchain.memory.client`. Then you can grab the bank_id + # using `llama_stack.memory.client`. Then you can grab the bank_id # from the output of that run. tool_definitions = [ MemoryToolDefinition( diff --git a/llama_toolchain/agentic_system/event_logger.py b/llama_stack/agentic_system/event_logger.py similarity index 98% rename from llama_toolchain/agentic_system/event_logger.py rename to llama_stack/agentic_system/event_logger.py index 3d15ee239..c0bd89ee2 100644 --- a/llama_toolchain/agentic_system/event_logger.py +++ b/llama_stack/agentic_system/event_logger.py @@ -11,10 +11,7 @@ from llama_models.llama3.api.tool_utils import ToolUtils from termcolor import cprint -from llama_toolchain.agentic_system.api import ( - AgenticSystemTurnResponseEventType, - StepType, -) +from llama_stack.agentic_system.api import AgenticSystemTurnResponseEventType, StepType class LogEvent: diff --git a/llama_toolchain/agentic_system/execute_with_custom_tools.py b/llama_stack/agentic_system/execute_with_custom_tools.py similarity index 91% rename from llama_toolchain/agentic_system/execute_with_custom_tools.py rename to llama_stack/agentic_system/execute_with_custom_tools.py index e8038bc20..fe9ef17b3 100644 --- a/llama_toolchain/agentic_system/execute_with_custom_tools.py +++ b/llama_stack/agentic_system/execute_with_custom_tools.py @@ -7,14 +7,14 @@ from typing import AsyncGenerator, List from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.agentic_system.api import * # noqa: F403 -from llama_toolchain.memory.api import * # noqa: F403 -from llama_toolchain.safety.api import * # noqa: F403 +from llama_stack.agentic_system.api import * # noqa: F403 +from llama_stack.memory.api import * # noqa: F403 +from llama_stack.safety.api import * # noqa: F403 -from llama_toolchain.agentic_system.api import ( +from llama_stack.agentic_system.api import ( AgenticSystemTurnResponseEventType as EventType, ) -from llama_toolchain.tools.custom.datatypes import CustomTool +from llama_stack.tools.custom.datatypes import CustomTool class AgentWithCustomToolExecutor: diff --git a/llama_toolchain/agentic_system/meta_reference/__init__.py b/llama_stack/agentic_system/meta_reference/__init__.py similarity index 92% rename from llama_toolchain/agentic_system/meta_reference/__init__.py rename to llama_stack/agentic_system/meta_reference/__init__.py index b49cc4c84..686708c0c 100644 --- a/llama_toolchain/agentic_system/meta_reference/__init__.py +++ b/llama_stack/agentic_system/meta_reference/__init__.py @@ -6,7 +6,7 @@ from typing import Dict -from llama_toolchain.core.datatypes import Api, ProviderSpec +from llama_stack.core.datatypes import Api, ProviderSpec from .config import MetaReferenceImplConfig diff --git a/llama_toolchain/agentic_system/meta_reference/agent_instance.py b/llama_stack/agentic_system/meta_reference/agent_instance.py similarity index 98% rename from llama_toolchain/agentic_system/meta_reference/agent_instance.py rename to llama_stack/agentic_system/meta_reference/agent_instance.py index 202f42a3c..141174cce 100644 --- a/llama_toolchain/agentic_system/meta_reference/agent_instance.py +++ b/llama_stack/agentic_system/meta_reference/agent_instance.py @@ -20,13 +20,13 @@ import httpx from termcolor import cprint -from llama_toolchain.agentic_system.api import * # noqa: F403 -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.memory.api import * # noqa: F403 -from llama_toolchain.safety.api import * # noqa: F403 +from llama_stack.agentic_system.api import * # noqa: F403 +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.memory.api import * # noqa: F403 +from llama_stack.safety.api import * # noqa: F403 -from llama_toolchain.tools.base import BaseTool -from llama_toolchain.tools.builtin import ( +from llama_stack.tools.base import BaseTool +from llama_stack.tools.builtin import ( interpret_content_as_attachment, SingleMessageBuiltinTool, ) diff --git a/llama_toolchain/agentic_system/meta_reference/agentic_system.py b/llama_stack/agentic_system/meta_reference/agentic_system.py similarity index 93% rename from llama_toolchain/agentic_system/meta_reference/agentic_system.py rename to llama_stack/agentic_system/meta_reference/agentic_system.py index 3990ab58a..7f624cfa6 100644 --- a/llama_toolchain/agentic_system/meta_reference/agentic_system.py +++ b/llama_stack/agentic_system/meta_reference/agentic_system.py @@ -10,17 +10,17 @@ import tempfile import uuid from typing import AsyncGenerator -from llama_toolchain.inference.api import Inference -from llama_toolchain.memory.api import Memory -from llama_toolchain.safety.api import Safety -from llama_toolchain.agentic_system.api import * # noqa: F403 -from llama_toolchain.tools.builtin import ( +from llama_stack.inference.api import Inference +from llama_stack.memory.api import Memory +from llama_stack.safety.api import Safety +from llama_stack.agentic_system.api import * # noqa: F403 +from llama_stack.tools.builtin import ( CodeInterpreterTool, PhotogenTool, SearchTool, WolframAlphaTool, ) -from llama_toolchain.tools.safety import with_safety +from llama_stack.tools.safety import with_safety from .agent_instance import ChatAgent from .config import MetaReferenceImplConfig diff --git a/llama_toolchain/agentic_system/meta_reference/config.py b/llama_stack/agentic_system/meta_reference/config.py similarity index 100% rename from llama_toolchain/agentic_system/meta_reference/config.py rename to llama_stack/agentic_system/meta_reference/config.py diff --git a/llama_toolchain/agentic_system/meta_reference/rag/context_retriever.py b/llama_stack/agentic_system/meta_reference/rag/context_retriever.py similarity index 95% rename from llama_toolchain/agentic_system/meta_reference/rag/context_retriever.py rename to llama_stack/agentic_system/meta_reference/rag/context_retriever.py index afcc6afd1..7723fe5a5 100644 --- a/llama_toolchain/agentic_system/meta_reference/rag/context_retriever.py +++ b/llama_stack/agentic_system/meta_reference/rag/context_retriever.py @@ -10,14 +10,14 @@ from jinja2 import Template from llama_models.llama3.api import * # noqa: F403 -from llama_toolchain.agentic_system.api import ( +from llama_stack.agentic_system.api import ( DefaultMemoryQueryGeneratorConfig, LLMMemoryQueryGeneratorConfig, MemoryQueryGenerator, MemoryQueryGeneratorConfig, ) from termcolor import cprint # noqa: F401 -from llama_toolchain.inference.api import * # noqa: F403 +from llama_stack.inference.api import * # noqa: F403 async def generate_rag_query( diff --git a/llama_toolchain/agentic_system/meta_reference/safety.py b/llama_stack/agentic_system/meta_reference/safety.py similarity index 98% rename from llama_toolchain/agentic_system/meta_reference/safety.py rename to llama_stack/agentic_system/meta_reference/safety.py index 4bbb1f2f1..74da5c2cc 100644 --- a/llama_toolchain/agentic_system/meta_reference/safety.py +++ b/llama_stack/agentic_system/meta_reference/safety.py @@ -9,7 +9,7 @@ from typing import List from llama_models.llama3.api.datatypes import Message, Role, UserMessage from termcolor import cprint -from llama_toolchain.safety.api import ( +from llama_stack.safety.api import ( OnViolationAction, RunShieldRequest, Safety, diff --git a/llama_toolchain/agentic_system/providers.py b/llama_stack/agentic_system/providers.py similarity index 76% rename from llama_toolchain/agentic_system/providers.py rename to llama_stack/agentic_system/providers.py index 79e66d15e..7db33c95d 100644 --- a/llama_toolchain/agentic_system/providers.py +++ b/llama_stack/agentic_system/providers.py @@ -6,7 +6,7 @@ from typing import List -from llama_toolchain.core.datatypes import Api, InlineProviderSpec, ProviderSpec +from llama_stack.core.datatypes import Api, InlineProviderSpec, ProviderSpec def available_providers() -> List[ProviderSpec]: @@ -23,8 +23,8 @@ def available_providers() -> List[ProviderSpec]: "torch", "transformers", ], - module="llama_toolchain.agentic_system.meta_reference", - config_class="llama_toolchain.agentic_system.meta_reference.MetaReferenceImplConfig", + module="llama_stack.agentic_system.meta_reference", + config_class="llama_stack.agentic_system.meta_reference.MetaReferenceImplConfig", api_dependencies=[ Api.inference, Api.safety, diff --git a/llama_toolchain/cli/__init__.py b/llama_stack/batch_inference/__init__.py similarity index 100% rename from llama_toolchain/cli/__init__.py rename to llama_stack/batch_inference/__init__.py diff --git a/llama_toolchain/batch_inference/api/__init__.py b/llama_stack/batch_inference/api/__init__.py similarity index 100% rename from llama_toolchain/batch_inference/api/__init__.py rename to llama_stack/batch_inference/api/__init__.py diff --git a/llama_toolchain/batch_inference/api/api.py b/llama_stack/batch_inference/api/api.py similarity index 97% rename from llama_toolchain/batch_inference/api/api.py rename to llama_stack/batch_inference/api/api.py index 3d67120dd..3c8f9c18b 100644 --- a/llama_toolchain/batch_inference/api/api.py +++ b/llama_stack/batch_inference/api/api.py @@ -11,7 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.inference.api import * # noqa: F403 +from llama_stack.inference.api import * # noqa: F403 @json_schema_type diff --git a/llama_toolchain/cli/scripts/__init__.py b/llama_stack/cli/__init__.py similarity index 100% rename from llama_toolchain/cli/scripts/__init__.py rename to llama_stack/cli/__init__.py diff --git a/llama_toolchain/cli/download.py b/llama_stack/cli/download.py similarity index 97% rename from llama_toolchain/cli/download.py rename to llama_stack/cli/download.py index 1bfa89fc6..1e75459a1 100644 --- a/llama_toolchain/cli/download.py +++ b/llama_stack/cli/download.py @@ -20,7 +20,7 @@ from pydantic import BaseModel from termcolor import cprint -from llama_toolchain.cli.subcommand import Subcommand +from llama_stack.cli.subcommand import Subcommand class Download(Subcommand): @@ -92,7 +92,7 @@ def _hf_download( from huggingface_hub import snapshot_download from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError - from llama_toolchain.common.model_utils import model_local_dir + from llama_stack.common.model_utils import model_local_dir repo_id = model.huggingface_repo if repo_id is None: @@ -126,7 +126,7 @@ def _hf_download( def _meta_download(model: "Model", meta_url: str): from llama_models.sku_list import llama_meta_net_info - from llama_toolchain.common.model_utils import model_local_dir + from llama_stack.common.model_utils import model_local_dir output_dir = Path(model_local_dir(model.descriptor())) os.makedirs(output_dir, exist_ok=True) @@ -188,7 +188,7 @@ class Manifest(BaseModel): def _download_from_manifest(manifest_file: str): - from llama_toolchain.common.model_utils import model_local_dir + from llama_stack.common.model_utils import model_local_dir with open(manifest_file, "r") as f: d = json.load(f) diff --git a/llama_toolchain/cli/llama.py b/llama_stack/cli/llama.py similarity index 100% rename from llama_toolchain/cli/llama.py rename to llama_stack/cli/llama.py diff --git a/llama_toolchain/cli/model/__init__.py b/llama_stack/cli/model/__init__.py similarity index 100% rename from llama_toolchain/cli/model/__init__.py rename to llama_stack/cli/model/__init__.py diff --git a/llama_toolchain/cli/model/describe.py b/llama_stack/cli/model/describe.py similarity index 93% rename from llama_toolchain/cli/model/describe.py rename to llama_stack/cli/model/describe.py index 683995f7b..24af7dd14 100644 --- a/llama_toolchain/cli/model/describe.py +++ b/llama_stack/cli/model/describe.py @@ -9,12 +9,12 @@ import json from llama_models.sku_list import resolve_model -from llama_toolchain.cli.subcommand import Subcommand -from llama_toolchain.cli.table import print_table -from llama_toolchain.common.serialize import EnumEncoder - from termcolor import colored +from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table +from llama_stack.common.serialize import EnumEncoder + class ModelDescribe(Subcommand): """Show details about a model""" diff --git a/llama_toolchain/cli/model/download.py b/llama_stack/cli/model/download.py similarity index 83% rename from llama_toolchain/cli/model/download.py rename to llama_stack/cli/model/download.py index ac3c791b4..a3b8f7796 100644 --- a/llama_toolchain/cli/model/download.py +++ b/llama_stack/cli/model/download.py @@ -6,7 +6,7 @@ import argparse -from llama_toolchain.cli.subcommand import Subcommand +from llama_stack.cli.subcommand import Subcommand class ModelDownload(Subcommand): @@ -19,6 +19,6 @@ class ModelDownload(Subcommand): formatter_class=argparse.RawTextHelpFormatter, ) - from llama_toolchain.cli.download import setup_download_parser + from llama_stack.cli.download import setup_download_parser setup_download_parser(self.parser) diff --git a/llama_toolchain/cli/model/list.py b/llama_stack/cli/model/list.py similarity index 94% rename from llama_toolchain/cli/model/list.py rename to llama_stack/cli/model/list.py index f989260ab..977590d7a 100644 --- a/llama_toolchain/cli/model/list.py +++ b/llama_stack/cli/model/list.py @@ -8,8 +8,8 @@ import argparse from llama_models.sku_list import all_registered_models -from llama_toolchain.cli.subcommand import Subcommand -from llama_toolchain.cli.table import print_table +from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table class ModelList(Subcommand): diff --git a/llama_toolchain/cli/model/model.py b/llama_stack/cli/model/model.py similarity index 73% rename from llama_toolchain/cli/model/model.py rename to llama_stack/cli/model/model.py index 9a14450ad..c222c1d63 100644 --- a/llama_toolchain/cli/model/model.py +++ b/llama_stack/cli/model/model.py @@ -6,12 +6,12 @@ import argparse -from llama_toolchain.cli.model.describe import ModelDescribe -from llama_toolchain.cli.model.download import ModelDownload -from llama_toolchain.cli.model.list import ModelList -from llama_toolchain.cli.model.template import ModelTemplate +from llama_stack.cli.model.describe import ModelDescribe +from llama_stack.cli.model.download import ModelDownload +from llama_stack.cli.model.list import ModelList +from llama_stack.cli.model.template import ModelTemplate -from llama_toolchain.cli.subcommand import Subcommand +from llama_stack.cli.subcommand import Subcommand class ModelParser(Subcommand): diff --git a/llama_toolchain/cli/model/template.py b/llama_stack/cli/model/template.py similarity index 97% rename from llama_toolchain/cli/model/template.py rename to llama_stack/cli/model/template.py index 2776d9703..d828660bb 100644 --- a/llama_toolchain/cli/model/template.py +++ b/llama_stack/cli/model/template.py @@ -9,7 +9,7 @@ import textwrap from termcolor import colored -from llama_toolchain.cli.subcommand import Subcommand +from llama_stack.cli.subcommand import Subcommand class ModelTemplate(Subcommand): @@ -75,7 +75,7 @@ class ModelTemplate(Subcommand): render_jinja_template, ) - from llama_toolchain.cli.table import print_table + from llama_stack.cli.table import print_table if args.name: tool_prompt_format = self._prompt_type(args.format) diff --git a/llama_toolchain/common/__init__.py b/llama_stack/cli/scripts/__init__.py similarity index 100% rename from llama_toolchain/common/__init__.py rename to llama_stack/cli/scripts/__init__.py diff --git a/llama_toolchain/cli/scripts/install-wheel-from-presigned.sh b/llama_stack/cli/scripts/install-wheel-from-presigned.sh similarity index 100% rename from llama_toolchain/cli/scripts/install-wheel-from-presigned.sh rename to llama_stack/cli/scripts/install-wheel-from-presigned.sh diff --git a/llama_toolchain/cli/scripts/run.py b/llama_stack/cli/scripts/run.py similarity index 100% rename from llama_toolchain/cli/scripts/run.py rename to llama_stack/cli/scripts/run.py diff --git a/llama_toolchain/cli/stack/__init__.py b/llama_stack/cli/stack/__init__.py similarity index 100% rename from llama_toolchain/cli/stack/__init__.py rename to llama_stack/cli/stack/__init__.py diff --git a/llama_toolchain/cli/stack/build.py b/llama_stack/cli/stack/build.py similarity index 78% rename from llama_toolchain/cli/stack/build.py rename to llama_stack/cli/stack/build.py index 78e013219..da4cb6161 100644 --- a/llama_toolchain/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -6,8 +6,8 @@ import argparse -from llama_toolchain.cli.subcommand import Subcommand -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.cli.subcommand import Subcommand +from llama_stack.core.datatypes import * # noqa: F403 from pathlib import Path import yaml @@ -29,7 +29,7 @@ class StackBuild(Subcommand): self.parser.add_argument( "config", type=str, - help="Path to a config file to use for the build. You may find example configs in llama_toolchain/configs/distributions", + help="Path to a config file to use for the build. You may find example configs in llama_stack/configs/distributions", ) self.parser.add_argument( @@ -44,17 +44,17 @@ class StackBuild(Subcommand): import json import os - from llama_toolchain.common.config_dirs import DISTRIBS_BASE_DIR - from llama_toolchain.common.serialize import EnumEncoder - from llama_toolchain.core.package import ApiInput, build_image, ImageType + from llama_stack.common.config_dirs import DISTRIBS_BASE_DIR + from llama_stack.common.serialize import EnumEncoder + from llama_stack.core.package import ApiInput, build_image, ImageType from termcolor import cprint # save build.yaml spec for building same distribution again if build_config.image_type == ImageType.docker.value: # docker needs build file to be in the llama-stack repo dir to be able to copy over to the image - llama_toolchain_path = Path(os.path.relpath(__file__)).parent.parent.parent + llama_stack_path = Path(os.path.relpath(__file__)).parent.parent.parent build_dir = ( - llama_toolchain_path / "configs/distributions" / build_config.image_type + llama_stack_path / "configs/distributions" / build_config.image_type ) else: build_dir = DISTRIBS_BASE_DIR / build_config.image_type @@ -74,12 +74,12 @@ class StackBuild(Subcommand): ) def _run_stack_build_command(self, args: argparse.Namespace) -> None: - from llama_toolchain.common.prompt_for_config import prompt_for_config - from llama_toolchain.core.dynamic import instantiate_class_type + from llama_stack.common.prompt_for_config import prompt_for_config + from llama_stack.core.dynamic import instantiate_class_type if not args.config: self.parser.error( - "No config file specified. Please use `llama stack build /path/to/*-build.yaml`. Example config files can be found in llama_toolchain/configs/distributions" + "No config file specified. Please use `llama stack build /path/to/*-build.yaml`. Example config files can be found in llama_stack/configs/distributions" ) return diff --git a/llama_toolchain/cli/stack/configure.py b/llama_stack/cli/stack/configure.py similarity index 90% rename from llama_toolchain/cli/stack/configure.py rename to llama_stack/cli/stack/configure.py index 53c9622e7..6a1e7c740 100644 --- a/llama_toolchain/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -13,11 +13,11 @@ import pkg_resources import yaml from termcolor import cprint -from llama_toolchain.cli.subcommand import Subcommand -from llama_toolchain.common.config_dirs import BUILDS_BASE_DIR +from llama_stack.cli.subcommand import Subcommand +from llama_stack.common.config_dirs import BUILDS_BASE_DIR -from llama_toolchain.common.exec import run_with_pty -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.common.exec import run_with_pty +from llama_stack.core.datatypes import * # noqa: F403 import os @@ -49,7 +49,7 @@ class StackConfigure(Subcommand): ) def _run_stack_configure_cmd(self, args: argparse.Namespace) -> None: - from llama_toolchain.core.package import ImageType + from llama_stack.core.package import ImageType docker_image = None build_config_file = Path(args.config) @@ -66,7 +66,7 @@ class StackConfigure(Subcommand): os.makedirs(builds_dir, exist_ok=True) script = pkg_resources.resource_filename( - "llama_toolchain", "core/configure_container.sh" + "llama_stack", "core/configure_container.sh" ) script_args = [script, docker_image, str(builds_dir)] @@ -95,8 +95,8 @@ class StackConfigure(Subcommand): build_config: BuildConfig, output_dir: Optional[str] = None, ): - from llama_toolchain.common.serialize import EnumEncoder - from llama_toolchain.core.configure import configure_api_providers + from llama_stack.common.serialize import EnumEncoder + from llama_stack.core.configure import configure_api_providers builds_dir = BUILDS_BASE_DIR / build_config.image_type if output_dir: diff --git a/llama_toolchain/cli/stack/list_apis.py b/llama_stack/cli/stack/list_apis.py similarity index 87% rename from llama_toolchain/cli/stack/list_apis.py rename to llama_stack/cli/stack/list_apis.py index f13ecefe9..6eed5ca51 100644 --- a/llama_toolchain/cli/stack/list_apis.py +++ b/llama_stack/cli/stack/list_apis.py @@ -6,7 +6,7 @@ import argparse -from llama_toolchain.cli.subcommand import Subcommand +from llama_stack.cli.subcommand import Subcommand class StackListApis(Subcommand): @@ -25,8 +25,8 @@ class StackListApis(Subcommand): pass def _run_apis_list_cmd(self, args: argparse.Namespace) -> None: - from llama_toolchain.cli.table import print_table - from llama_toolchain.core.distribution import stack_apis + from llama_stack.cli.table import print_table + from llama_stack.core.distribution import stack_apis # eventually, this should query a registry at llama.meta.com/llamastack/distributions headers = [ diff --git a/llama_toolchain/cli/stack/list_providers.py b/llama_stack/cli/stack/list_providers.py similarity index 87% rename from llama_toolchain/cli/stack/list_providers.py rename to llama_stack/cli/stack/list_providers.py index a5640677d..7568c69c9 100644 --- a/llama_toolchain/cli/stack/list_providers.py +++ b/llama_stack/cli/stack/list_providers.py @@ -6,7 +6,7 @@ import argparse -from llama_toolchain.cli.subcommand import Subcommand +from llama_stack.cli.subcommand import Subcommand class StackListProviders(Subcommand): @@ -22,7 +22,7 @@ class StackListProviders(Subcommand): self.parser.set_defaults(func=self._run_providers_list_cmd) def _add_arguments(self): - from llama_toolchain.core.distribution import stack_apis + from llama_stack.core.distribution import stack_apis api_values = [a.value for a in stack_apis()] self.parser.add_argument( @@ -33,8 +33,8 @@ class StackListProviders(Subcommand): ) def _run_providers_list_cmd(self, args: argparse.Namespace) -> None: - from llama_toolchain.cli.table import print_table - from llama_toolchain.core.distribution import Api, api_providers + from llama_stack.cli.table import print_table + from llama_stack.core.distribution import Api, api_providers all_providers = api_providers() providers_for_api = all_providers[Api(args.api)] diff --git a/llama_toolchain/cli/stack/run.py b/llama_stack/cli/stack/run.py similarity index 91% rename from llama_toolchain/cli/stack/run.py rename to llama_stack/cli/stack/run.py index b5900eaba..614132f61 100644 --- a/llama_toolchain/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -11,8 +11,8 @@ from pathlib import Path import pkg_resources import yaml -from llama_toolchain.cli.subcommand import Subcommand -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.cli.subcommand import Subcommand +from llama_stack.core.datatypes import * # noqa: F403 class StackRun(Subcommand): @@ -47,7 +47,7 @@ class StackRun(Subcommand): ) def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: - from llama_toolchain.common.exec import run_with_pty + from llama_stack.common.exec import run_with_pty if not args.config: self.parser.error("Must specify a config file to run") @@ -67,13 +67,13 @@ class StackRun(Subcommand): if config.docker_image: script = pkg_resources.resource_filename( - "llama_toolchain", + "llama_stack", "core/start_container.sh", ) run_args = [script, config.docker_image] else: script = pkg_resources.resource_filename( - "llama_toolchain", + "llama_stack", "core/start_conda_env.sh", ) run_args = [ diff --git a/llama_toolchain/cli/stack/stack.py b/llama_stack/cli/stack/stack.py similarity index 94% rename from llama_toolchain/cli/stack/stack.py rename to llama_stack/cli/stack/stack.py index 0e4abb5a2..c359d27ec 100644 --- a/llama_toolchain/cli/stack/stack.py +++ b/llama_stack/cli/stack/stack.py @@ -6,7 +6,7 @@ import argparse -from llama_toolchain.cli.subcommand import Subcommand +from llama_stack.cli.subcommand import Subcommand from .build import StackBuild from .configure import StackConfigure diff --git a/llama_toolchain/cli/subcommand.py b/llama_stack/cli/subcommand.py similarity index 100% rename from llama_toolchain/cli/subcommand.py rename to llama_stack/cli/subcommand.py diff --git a/llama_toolchain/cli/table.py b/llama_stack/cli/table.py similarity index 100% rename from llama_toolchain/cli/table.py rename to llama_stack/cli/table.py diff --git a/llama_toolchain/core/__init__.py b/llama_stack/common/__init__.py similarity index 100% rename from llama_toolchain/core/__init__.py rename to llama_stack/common/__init__.py diff --git a/llama_toolchain/common/config_dirs.py b/llama_stack/common/config_dirs.py similarity index 100% rename from llama_toolchain/common/config_dirs.py rename to llama_stack/common/config_dirs.py diff --git a/llama_toolchain/common/deployment_types.py b/llama_stack/common/deployment_types.py similarity index 100% rename from llama_toolchain/common/deployment_types.py rename to llama_stack/common/deployment_types.py diff --git a/llama_toolchain/common/exec.py b/llama_stack/common/exec.py similarity index 100% rename from llama_toolchain/common/exec.py rename to llama_stack/common/exec.py diff --git a/llama_toolchain/common/model_utils.py b/llama_stack/common/model_utils.py similarity index 100% rename from llama_toolchain/common/model_utils.py rename to llama_stack/common/model_utils.py diff --git a/llama_toolchain/common/prompt_for_config.py b/llama_stack/common/prompt_for_config.py similarity index 100% rename from llama_toolchain/common/prompt_for_config.py rename to llama_stack/common/prompt_for_config.py diff --git a/llama_toolchain/common/serialize.py b/llama_stack/common/serialize.py similarity index 100% rename from llama_toolchain/common/serialize.py rename to llama_stack/common/serialize.py diff --git a/llama_toolchain/common/training_types.py b/llama_stack/common/training_types.py similarity index 100% rename from llama_toolchain/common/training_types.py rename to llama_stack/common/training_types.py diff --git a/llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-conda-example-build.yaml similarity index 72% rename from llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml rename to llama_stack/configs/distributions/conda/local-conda-example-build.yaml index 2a25cb9dd..d588b6b5f 100644 --- a/llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml +++ b/llama_stack/configs/distributions/conda/local-conda-example-build.yaml @@ -1,6 +1,6 @@ name: local-conda-example distribution_spec: - description: Use code from `llama_toolchain` itself to serve all llama stack APIs + description: Use code from `llama_stack` itself to serve all llama stack APIs providers: inference: meta-reference memory: meta-reference-faiss diff --git a/llama_toolchain/configs/distributions/conda/local-fireworks-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-fireworks-conda-example-build.yaml similarity index 100% rename from llama_toolchain/configs/distributions/conda/local-fireworks-conda-example-build.yaml rename to llama_stack/configs/distributions/conda/local-fireworks-conda-example-build.yaml diff --git a/llama_toolchain/configs/distributions/conda/local-ollama-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-ollama-conda-example-build.yaml similarity index 100% rename from llama_toolchain/configs/distributions/conda/local-ollama-conda-example-build.yaml rename to llama_stack/configs/distributions/conda/local-ollama-conda-example-build.yaml diff --git a/llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml similarity index 100% rename from llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml rename to llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml diff --git a/llama_toolchain/configs/distributions/conda/local-together-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-together-conda-example-build.yaml similarity index 100% rename from llama_toolchain/configs/distributions/conda/local-together-conda-example-build.yaml rename to llama_stack/configs/distributions/conda/local-together-conda-example-build.yaml diff --git a/llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml b/llama_stack/configs/distributions/docker/local-docker-example-build.yaml similarity index 72% rename from llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml rename to llama_stack/configs/distributions/docker/local-docker-example-build.yaml index 0bdb18802..f868aa98f 100644 --- a/llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml +++ b/llama_stack/configs/distributions/docker/local-docker-example-build.yaml @@ -1,6 +1,6 @@ name: local-docker-example distribution_spec: - description: Use code from `llama_toolchain` itself to serve all llama stack APIs + description: Use code from `llama_stack` itself to serve all llama stack APIs providers: inference: meta-reference memory: meta-reference-faiss diff --git a/llama_toolchain/inference/__init__.py b/llama_stack/core/__init__.py similarity index 100% rename from llama_toolchain/inference/__init__.py rename to llama_stack/core/__init__.py diff --git a/llama_toolchain/core/build_conda_env.sh b/llama_stack/core/build_conda_env.sh similarity index 100% rename from llama_toolchain/core/build_conda_env.sh rename to llama_stack/core/build_conda_env.sh diff --git a/llama_toolchain/core/build_container.sh b/llama_stack/core/build_container.sh similarity index 97% rename from llama_toolchain/core/build_container.sh rename to llama_stack/core/build_container.sh index d829e8399..81cb5d40c 100755 --- a/llama_toolchain/core/build_container.sh +++ b/llama_stack/core/build_container.sh @@ -90,7 +90,7 @@ add_to_docker < str: if self.adapter: return self.adapter.module - return f"llama_toolchain.{self.api.value}.client" + return f"llama_stack.{self.api.value}.client" @property def pip_packages(self) -> List[str]: @@ -167,7 +167,7 @@ def remote_provider_spec( config_class = ( adapter.config_class if adapter and adapter.config_class - else "llama_toolchain.core.datatypes.RemoteProviderConfig" + else "llama_stack.core.datatypes.RemoteProviderConfig" ) provider_id = remote_provider_id(adapter.adapter_id) if adapter else "remote" diff --git a/llama_toolchain/core/distribution.py b/llama_stack/core/distribution.py similarity index 84% rename from llama_toolchain/core/distribution.py rename to llama_stack/core/distribution.py index dc81b53f1..83ac4f045 100644 --- a/llama_toolchain/core/distribution.py +++ b/llama_stack/core/distribution.py @@ -8,11 +8,11 @@ import importlib import inspect from typing import Dict, List -from llama_toolchain.agentic_system.api import AgenticSystem -from llama_toolchain.inference.api import Inference -from llama_toolchain.memory.api import Memory -from llama_toolchain.safety.api import Safety -from llama_toolchain.telemetry.api import Telemetry +from llama_stack.agentic_system.api import AgenticSystem +from llama_stack.inference.api import Inference +from llama_stack.memory.api import Memory +from llama_stack.safety.api import Safety +from llama_stack.telemetry.api import Telemetry from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec @@ -67,7 +67,7 @@ def api_providers() -> Dict[Api, Dict[str, ProviderSpec]]: ret = {} for api in stack_apis(): name = api.name.lower() - module = importlib.import_module(f"llama_toolchain.{name}.providers") + module = importlib.import_module(f"llama_stack.{name}.providers") ret[api] = { "remote": remote_provider_spec(api), **{a.provider_id: a for a in module.available_providers()}, diff --git a/llama_toolchain/core/dynamic.py b/llama_stack/core/dynamic.py similarity index 97% rename from llama_toolchain/core/dynamic.py rename to llama_stack/core/dynamic.py index 42c0646da..3aee3b24a 100644 --- a/llama_toolchain/core/dynamic.py +++ b/llama_stack/core/dynamic.py @@ -7,7 +7,7 @@ import importlib from typing import Any, Dict -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.core.datatypes import * # noqa: F403 def instantiate_class_type(fully_qualified_name): diff --git a/llama_toolchain/core/package.py b/llama_stack/core/package.py similarity index 88% rename from llama_toolchain/core/package.py rename to llama_stack/core/package.py index 37dac091d..7e364f517 100644 --- a/llama_toolchain/core/package.py +++ b/llama_stack/core/package.py @@ -12,12 +12,12 @@ from pydantic import BaseModel from termcolor import cprint -from llama_toolchain.common.exec import run_with_pty +from llama_stack.common.exec import run_with_pty -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.core.datatypes import * # noqa: F403 from pathlib import Path -from llama_toolchain.core.distribution import api_providers, SERVER_DEPENDENCIES +from llama_stack.core.distribution import api_providers, SERVER_DEPENDENCIES class ImageType(Enum): @@ -68,7 +68,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path): if build_config.image_type == ImageType.docker.value: script = pkg_resources.resource_filename( - "llama_toolchain", "core/build_container.sh" + "llama_stack", "core/build_container.sh" ) args = [ script, @@ -79,7 +79,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path): ] else: script = pkg_resources.resource_filename( - "llama_toolchain", "core/build_conda_env.sh" + "llama_stack", "core/build_conda_env.sh" ) args = [ script, diff --git a/llama_toolchain/core/server.py b/llama_stack/core/server.py similarity index 98% rename from llama_toolchain/core/server.py rename to llama_stack/core/server.py index 70273be16..7a40184ec 100644 --- a/llama_toolchain/core/server.py +++ b/llama_stack/core/server.py @@ -39,13 +39,13 @@ from pydantic import BaseModel, ValidationError from termcolor import cprint from typing_extensions import Annotated -from llama_toolchain.telemetry.tracing import ( +from llama_stack.telemetry.tracing import ( end_trace, setup_logger, SpanStatus, start_trace, ) -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.core.datatypes import * # noqa: F403 from .distribution import api_endpoints, api_providers from .dynamic import instantiate_provider @@ -309,7 +309,7 @@ async def resolve_impls( specs[api] = RouterProviderSpec( api=api, - module=f"llama_toolchain.{api.value.lower()}.router", + module=f"llama_stack.{api.value.lower()}.router", api_dependencies=[], inner_specs=inner_specs, ) diff --git a/llama_toolchain/core/start_conda_env.sh b/llama_stack/core/start_conda_env.sh similarity index 95% rename from llama_toolchain/core/start_conda_env.sh rename to llama_stack/core/start_conda_env.sh index 120dda006..604b1267e 100755 --- a/llama_toolchain/core/start_conda_env.sh +++ b/llama_stack/core/start_conda_env.sh @@ -37,6 +37,6 @@ eval "$(conda shell.bash hook)" conda deactivate && conda activate "$env_name" $CONDA_PREFIX/bin/python \ - -m llama_toolchain.core.server \ + -m llama_stack.core.server \ --yaml_config "$yaml_config" \ --port "$port" "$@" diff --git a/llama_toolchain/core/start_container.sh b/llama_stack/core/start_container.sh similarity index 94% rename from llama_toolchain/core/start_container.sh rename to llama_stack/core/start_container.sh index 676bcedcf..7835e2a79 100755 --- a/llama_toolchain/core/start_container.sh +++ b/llama_stack/core/start_container.sh @@ -38,6 +38,6 @@ podman run -it \ -p $port:$port \ -v "$yaml_config:/app/config.yaml" \ $docker_image \ - python -m llama_toolchain.core.server \ + python -m llama_stack.core.server \ --yaml_config /app/config.yaml \ --port $port "$@" diff --git a/llama_toolchain/dataset/api/__init__.py b/llama_stack/dataset/api/__init__.py similarity index 100% rename from llama_toolchain/dataset/api/__init__.py rename to llama_stack/dataset/api/__init__.py diff --git a/llama_toolchain/dataset/api/api.py b/llama_stack/dataset/api/api.py similarity index 100% rename from llama_toolchain/dataset/api/api.py rename to llama_stack/dataset/api/api.py diff --git a/llama_toolchain/evaluations/api/__init__.py b/llama_stack/evaluations/api/__init__.py similarity index 100% rename from llama_toolchain/evaluations/api/__init__.py rename to llama_stack/evaluations/api/__init__.py diff --git a/llama_toolchain/evaluations/api/api.py b/llama_stack/evaluations/api/api.py similarity index 95% rename from llama_toolchain/evaluations/api/api.py rename to llama_stack/evaluations/api/api.py index 898dc2822..c99ed0c72 100644 --- a/llama_toolchain/evaluations/api/api.py +++ b/llama_stack/evaluations/api/api.py @@ -12,8 +12,8 @@ from llama_models.schema_utils import webmethod from pydantic import BaseModel from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.dataset.api import * # noqa: F403 -from llama_toolchain.common.training_types import * # noqa: F403 +from llama_stack.dataset.api import * # noqa: F403 +from llama_stack.common.training_types import * # noqa: F403 class TextGenerationMetric(Enum): diff --git a/llama_toolchain/inference/adapters/__init__.py b/llama_stack/inference/__init__.py similarity index 100% rename from llama_toolchain/inference/adapters/__init__.py rename to llama_stack/inference/__init__.py diff --git a/llama_toolchain/memory/__init__.py b/llama_stack/inference/adapters/__init__.py similarity index 100% rename from llama_toolchain/memory/__init__.py rename to llama_stack/inference/adapters/__init__.py diff --git a/llama_toolchain/inference/adapters/fireworks/__init__.py b/llama_stack/inference/adapters/fireworks/__init__.py similarity index 100% rename from llama_toolchain/inference/adapters/fireworks/__init__.py rename to llama_stack/inference/adapters/fireworks/__init__.py diff --git a/llama_toolchain/inference/adapters/fireworks/config.py b/llama_stack/inference/adapters/fireworks/config.py similarity index 100% rename from llama_toolchain/inference/adapters/fireworks/config.py rename to llama_stack/inference/adapters/fireworks/config.py diff --git a/llama_toolchain/inference/adapters/fireworks/fireworks.py b/llama_stack/inference/adapters/fireworks/fireworks.py similarity index 98% rename from llama_toolchain/inference/adapters/fireworks/fireworks.py rename to llama_stack/inference/adapters/fireworks/fireworks.py index e51a730de..6ebb8d5a3 100644 --- a/llama_toolchain/inference/adapters/fireworks/fireworks.py +++ b/llama_stack/inference/adapters/fireworks/fireworks.py @@ -13,8 +13,8 @@ from llama_models.llama3.api.datatypes import Message, StopReason from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.inference.prepare_messages import prepare_messages +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.inference.prepare_messages import prepare_messages from .config import FireworksImplConfig diff --git a/llama_toolchain/inference/adapters/ollama/__init__.py b/llama_stack/inference/adapters/ollama/__init__.py similarity index 86% rename from llama_toolchain/inference/adapters/ollama/__init__.py rename to llama_stack/inference/adapters/ollama/__init__.py index 8369a00a5..c9d0348ec 100644 --- a/llama_toolchain/inference/adapters/ollama/__init__.py +++ b/llama_stack/inference/adapters/ollama/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_toolchain.core.datatypes import RemoteProviderConfig +from llama_stack.core.datatypes import RemoteProviderConfig async def get_adapter_impl(config: RemoteProviderConfig, _deps): diff --git a/llama_toolchain/inference/adapters/ollama/ollama.py b/llama_stack/inference/adapters/ollama/ollama.py similarity index 98% rename from llama_toolchain/inference/adapters/ollama/ollama.py rename to llama_stack/inference/adapters/ollama/ollama.py index 92fbf7585..236421a2f 100644 --- a/llama_toolchain/inference/adapters/ollama/ollama.py +++ b/llama_stack/inference/adapters/ollama/ollama.py @@ -14,8 +14,8 @@ from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model from ollama import AsyncClient -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.inference.prepare_messages import prepare_messages +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.inference.prepare_messages import prepare_messages # TODO: Eventually this will move to the llama cli model list command # mapping of Model SKUs to ollama models diff --git a/llama_toolchain/inference/adapters/tgi/__init__.py b/llama_stack/inference/adapters/tgi/__init__.py similarity index 100% rename from llama_toolchain/inference/adapters/tgi/__init__.py rename to llama_stack/inference/adapters/tgi/__init__.py diff --git a/llama_toolchain/inference/adapters/tgi/config.py b/llama_stack/inference/adapters/tgi/config.py similarity index 100% rename from llama_toolchain/inference/adapters/tgi/config.py rename to llama_stack/inference/adapters/tgi/config.py diff --git a/llama_toolchain/inference/adapters/tgi/tgi.py b/llama_stack/inference/adapters/tgi/tgi.py similarity index 98% rename from llama_toolchain/inference/adapters/tgi/tgi.py rename to llama_stack/inference/adapters/tgi/tgi.py index 7b1028817..7ca2e1b14 100644 --- a/llama_toolchain/inference/adapters/tgi/tgi.py +++ b/llama_stack/inference/adapters/tgi/tgi.py @@ -13,8 +13,8 @@ from huggingface_hub import HfApi, InferenceClient from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import StopReason from llama_models.llama3.api.tokenizer import Tokenizer -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.inference.prepare_messages import prepare_messages +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.inference.prepare_messages import prepare_messages from .config import TGIImplConfig diff --git a/llama_toolchain/inference/adapters/together/__init__.py b/llama_stack/inference/adapters/together/__init__.py similarity index 100% rename from llama_toolchain/inference/adapters/together/__init__.py rename to llama_stack/inference/adapters/together/__init__.py diff --git a/llama_toolchain/inference/adapters/together/config.py b/llama_stack/inference/adapters/together/config.py similarity index 100% rename from llama_toolchain/inference/adapters/together/config.py rename to llama_stack/inference/adapters/together/config.py diff --git a/llama_toolchain/inference/adapters/together/together.py b/llama_stack/inference/adapters/together/together.py similarity index 98% rename from llama_toolchain/inference/adapters/together/together.py rename to llama_stack/inference/adapters/together/together.py index 76403a85b..739b437ca 100644 --- a/llama_toolchain/inference/adapters/together/together.py +++ b/llama_stack/inference/adapters/together/together.py @@ -13,8 +13,8 @@ from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model from together import Together -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.inference.prepare_messages import prepare_messages +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.inference.prepare_messages import prepare_messages from .config import TogetherImplConfig diff --git a/llama_toolchain/inference/api/__init__.py b/llama_stack/inference/api/__init__.py similarity index 100% rename from llama_toolchain/inference/api/__init__.py rename to llama_stack/inference/api/__init__.py diff --git a/llama_toolchain/inference/api/api.py b/llama_stack/inference/api/api.py similarity index 100% rename from llama_toolchain/inference/api/api.py rename to llama_stack/inference/api/api.py diff --git a/llama_toolchain/inference/client.py b/llama_stack/inference/client.py similarity index 98% rename from llama_toolchain/inference/client.py rename to llama_stack/inference/client.py index c57433a8f..8e9b77030 100644 --- a/llama_toolchain/inference/client.py +++ b/llama_stack/inference/client.py @@ -10,11 +10,11 @@ from typing import Any, AsyncGenerator import fire import httpx - -from llama_toolchain.core.datatypes import RemoteProviderConfig from pydantic import BaseModel from termcolor import cprint +from llama_stack.core.datatypes import RemoteProviderConfig + from .api import ( ChatCompletionRequest, ChatCompletionResponse, diff --git a/llama_toolchain/inference/event_logger.py b/llama_stack/inference/event_logger.py similarity index 97% rename from llama_toolchain/inference/event_logger.py rename to llama_stack/inference/event_logger.py index 248ceae27..6e27998a5 100644 --- a/llama_toolchain/inference/event_logger.py +++ b/llama_stack/inference/event_logger.py @@ -4,11 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_toolchain.inference.api import ( +from termcolor import cprint + +from llama_stack.inference.api import ( ChatCompletionResponseEventType, ChatCompletionResponseStreamChunk, ) -from termcolor import cprint class LogEvent: diff --git a/llama_toolchain/inference/meta_reference/__init__.py b/llama_stack/inference/meta_reference/__init__.py similarity index 100% rename from llama_toolchain/inference/meta_reference/__init__.py rename to llama_stack/inference/meta_reference/__init__.py diff --git a/llama_toolchain/inference/meta_reference/config.py b/llama_stack/inference/meta_reference/config.py similarity index 96% rename from llama_toolchain/inference/meta_reference/config.py rename to llama_stack/inference/meta_reference/config.py index a0bbc5820..7da01a0f4 100644 --- a/llama_toolchain/inference/meta_reference/config.py +++ b/llama_stack/inference/meta_reference/config.py @@ -13,7 +13,7 @@ from llama_models.sku_list import all_registered_models, resolve_model from pydantic import BaseModel, Field, field_validator -from llama_toolchain.inference.api import QuantizationConfig +from llama_stack.inference.api import QuantizationConfig @json_schema_type diff --git a/llama_toolchain/inference/meta_reference/generation.py b/llama_stack/inference/meta_reference/generation.py similarity index 98% rename from llama_toolchain/inference/meta_reference/generation.py rename to llama_stack/inference/meta_reference/generation.py index d13b9570d..f7b077f54 100644 --- a/llama_toolchain/inference/meta_reference/generation.py +++ b/llama_stack/inference/meta_reference/generation.py @@ -28,11 +28,11 @@ from llama_models.llama3.api.datatypes import Message, ToolPromptFormat from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.llama3.reference_impl.model import Transformer from llama_models.sku_list import resolve_model - -from llama_toolchain.common.model_utils import model_local_dir -from llama_toolchain.inference.api import QuantizationType from termcolor import cprint +from llama_stack.common.model_utils import model_local_dir +from llama_stack.inference.api import QuantizationType + from .config import MetaReferenceImplConfig diff --git a/llama_toolchain/inference/meta_reference/inference.py b/llama_stack/inference/meta_reference/inference.py similarity index 97% rename from llama_toolchain/inference/meta_reference/inference.py rename to llama_stack/inference/meta_reference/inference.py index 247c08f23..66a97f7fb 100644 --- a/llama_toolchain/inference/meta_reference/inference.py +++ b/llama_stack/inference/meta_reference/inference.py @@ -11,7 +11,7 @@ from typing import AsyncIterator, Union from llama_models.llama3.api.datatypes import StopReason from llama_models.sku_list import resolve_model -from llama_toolchain.inference.api import ( +from llama_stack.inference.api import ( ChatCompletionRequest, ChatCompletionResponse, ChatCompletionResponseEvent, @@ -21,13 +21,13 @@ from llama_toolchain.inference.api import ( ToolCallDelta, ToolCallParseStatus, ) -from llama_toolchain.inference.prepare_messages import prepare_messages +from llama_stack.inference.prepare_messages import prepare_messages from .config import MetaReferenceImplConfig from .model_parallel import LlamaModelParallelGenerator from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.inference.api import * # noqa: F403 +from llama_stack.inference.api import * # noqa: F403 # there's a single model parallel process running serving the model. for now, # we don't support multiple concurrent requests to this process. diff --git a/llama_toolchain/inference/meta_reference/model_parallel.py b/llama_stack/inference/meta_reference/model_parallel.py similarity index 100% rename from llama_toolchain/inference/meta_reference/model_parallel.py rename to llama_stack/inference/meta_reference/model_parallel.py diff --git a/llama_toolchain/inference/meta_reference/parallel_utils.py b/llama_stack/inference/meta_reference/parallel_utils.py similarity index 100% rename from llama_toolchain/inference/meta_reference/parallel_utils.py rename to llama_stack/inference/meta_reference/parallel_utils.py diff --git a/llama_toolchain/inference/prepare_messages.py b/llama_stack/inference/prepare_messages.py similarity index 97% rename from llama_toolchain/inference/prepare_messages.py rename to llama_stack/inference/prepare_messages.py index 92e94f8d2..ef818daa1 100644 --- a/llama_toolchain/inference/prepare_messages.py +++ b/llama_stack/inference/prepare_messages.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.inference.api import * # noqa: F403 +from llama_stack.inference.api import * # noqa: F403 from llama_models.llama3.prompt_templates import ( BuiltinToolGenerator, FunctionTagCustomToolGenerator, diff --git a/llama_toolchain/inference/providers.py b/llama_stack/inference/providers.py similarity index 67% rename from llama_toolchain/inference/providers.py rename to llama_stack/inference/providers.py index 928c6ef57..069cda1d2 100644 --- a/llama_toolchain/inference/providers.py +++ b/llama_stack/inference/providers.py @@ -6,7 +6,7 @@ from typing import List -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.core.datatypes import * # noqa: F403 def available_providers() -> List[ProviderSpec]: @@ -24,15 +24,15 @@ def available_providers() -> List[ProviderSpec]: "transformers", "zmq", ], - module="llama_toolchain.inference.meta_reference", - config_class="llama_toolchain.inference.meta_reference.MetaReferenceImplConfig", + module="llama_stack.inference.meta_reference", + config_class="llama_stack.inference.meta_reference.MetaReferenceImplConfig", ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_id="ollama", pip_packages=["ollama"], - module="llama_toolchain.inference.adapters.ollama", + module="llama_stack.inference.adapters.ollama", ), ), remote_provider_spec( @@ -40,8 +40,8 @@ def available_providers() -> List[ProviderSpec]: adapter=AdapterSpec( adapter_id="tgi", pip_packages=["huggingface_hub"], - module="llama_toolchain.inference.adapters.tgi", - config_class="llama_toolchain.inference.adapters.tgi.TGIImplConfig", + module="llama_stack.inference.adapters.tgi", + config_class="llama_stack.inference.adapters.tgi.TGIImplConfig", ), ), remote_provider_spec( @@ -51,8 +51,8 @@ def available_providers() -> List[ProviderSpec]: pip_packages=[ "fireworks-ai", ], - module="llama_toolchain.inference.adapters.fireworks", - config_class="llama_toolchain.inference.adapters.fireworks.FireworksImplConfig", + module="llama_stack.inference.adapters.fireworks", + config_class="llama_stack.inference.adapters.fireworks.FireworksImplConfig", ), ), remote_provider_spec( @@ -62,8 +62,8 @@ def available_providers() -> List[ProviderSpec]: pip_packages=[ "together", ], - module="llama_toolchain.inference.adapters.together", - config_class="llama_toolchain.inference.adapters.together.TogetherImplConfig", + module="llama_stack.inference.adapters.together", + config_class="llama_stack.inference.adapters.together.TogetherImplConfig", ), ), ] diff --git a/llama_toolchain/inference/quantization/fp8_impls.py b/llama_stack/inference/quantization/fp8_impls.py similarity index 100% rename from llama_toolchain/inference/quantization/fp8_impls.py rename to llama_stack/inference/quantization/fp8_impls.py diff --git a/llama_toolchain/inference/quantization/loader.py b/llama_stack/inference/quantization/loader.py similarity index 97% rename from llama_toolchain/inference/quantization/loader.py rename to llama_stack/inference/quantization/loader.py index 54827dce9..1b98bb9ad 100644 --- a/llama_toolchain/inference/quantization/loader.py +++ b/llama_stack/inference/quantization/loader.py @@ -14,9 +14,9 @@ import torch from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region from llama_models.llama3.api.model import Transformer, TransformerBlock -from llama_toolchain.inference.api import QuantizationType +from llama_stack.inference.api import QuantizationType -from llama_toolchain.inference.api.config import ( +from llama_stack.inference.api.config import ( CheckpointQuantizationFormat, MetaReferenceImplConfig, ) diff --git a/llama_toolchain/inference/quantization/scripts/build_conda.sh b/llama_stack/inference/quantization/scripts/build_conda.sh similarity index 100% rename from llama_toolchain/inference/quantization/scripts/build_conda.sh rename to llama_stack/inference/quantization/scripts/build_conda.sh diff --git a/llama_toolchain/inference/quantization/scripts/quantize_checkpoint.py b/llama_stack/inference/quantization/scripts/quantize_checkpoint.py similarity index 100% rename from llama_toolchain/inference/quantization/scripts/quantize_checkpoint.py rename to llama_stack/inference/quantization/scripts/quantize_checkpoint.py diff --git a/llama_toolchain/inference/quantization/scripts/run_quantize_checkpoint.sh b/llama_stack/inference/quantization/scripts/run_quantize_checkpoint.sh similarity index 100% rename from llama_toolchain/inference/quantization/scripts/run_quantize_checkpoint.sh rename to llama_stack/inference/quantization/scripts/run_quantize_checkpoint.sh diff --git a/llama_toolchain/inference/quantization/test_fp8.py b/llama_stack/inference/quantization/test_fp8.py similarity index 100% rename from llama_toolchain/inference/quantization/test_fp8.py rename to llama_stack/inference/quantization/test_fp8.py diff --git a/llama_toolchain/memory/common/__init__.py b/llama_stack/memory/__init__.py similarity index 100% rename from llama_toolchain/memory/common/__init__.py rename to llama_stack/memory/__init__.py diff --git a/llama_toolchain/memory/adapters/chroma/__init__.py b/llama_stack/memory/adapters/chroma/__init__.py similarity index 86% rename from llama_toolchain/memory/adapters/chroma/__init__.py rename to llama_stack/memory/adapters/chroma/__init__.py index c90a8e8ac..32a9c65a3 100644 --- a/llama_toolchain/memory/adapters/chroma/__init__.py +++ b/llama_stack/memory/adapters/chroma/__init__.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_toolchain.core.datatypes import RemoteProviderConfig +from llama_stack.core.datatypes import RemoteProviderConfig async def get_adapter_impl(config: RemoteProviderConfig, _deps): diff --git a/llama_toolchain/memory/adapters/chroma/chroma.py b/llama_stack/memory/adapters/chroma/chroma.py similarity index 97% rename from llama_toolchain/memory/adapters/chroma/chroma.py rename to llama_stack/memory/adapters/chroma/chroma.py index f4952cd0e..0c39fd9c0 100644 --- a/llama_toolchain/memory/adapters/chroma/chroma.py +++ b/llama_stack/memory/adapters/chroma/chroma.py @@ -12,10 +12,10 @@ from urllib.parse import urlparse import chromadb from numpy.typing import NDArray -from llama_toolchain.memory.api import * # noqa: F403 +from llama_stack.memory.api import * # noqa: F403 -from llama_toolchain.memory.common.vector_store import BankWithIndex, EmbeddingIndex +from llama_stack.memory.common.vector_store import BankWithIndex, EmbeddingIndex class ChromaIndex(EmbeddingIndex): diff --git a/llama_toolchain/memory/adapters/pgvector/__init__.py b/llama_stack/memory/adapters/pgvector/__init__.py similarity index 100% rename from llama_toolchain/memory/adapters/pgvector/__init__.py rename to llama_stack/memory/adapters/pgvector/__init__.py diff --git a/llama_toolchain/memory/adapters/pgvector/config.py b/llama_stack/memory/adapters/pgvector/config.py similarity index 100% rename from llama_toolchain/memory/adapters/pgvector/config.py rename to llama_stack/memory/adapters/pgvector/config.py diff --git a/llama_toolchain/memory/adapters/pgvector/pgvector.py b/llama_stack/memory/adapters/pgvector/pgvector.py similarity index 98% rename from llama_toolchain/memory/adapters/pgvector/pgvector.py rename to llama_stack/memory/adapters/pgvector/pgvector.py index 930d7720f..a8dadb0f3 100644 --- a/llama_toolchain/memory/adapters/pgvector/pgvector.py +++ b/llama_stack/memory/adapters/pgvector/pgvector.py @@ -13,10 +13,10 @@ from numpy.typing import NDArray from psycopg2 import sql from psycopg2.extras import execute_values, Json from pydantic import BaseModel -from llama_toolchain.memory.api import * # noqa: F403 +from llama_stack.memory.api import * # noqa: F403 -from llama_toolchain.memory.common.vector_store import ( +from llama_stack.memory.common.vector_store import ( ALL_MINILM_L6_V2_DIMENSION, BankWithIndex, EmbeddingIndex, diff --git a/llama_toolchain/memory/api/__init__.py b/llama_stack/memory/api/__init__.py similarity index 100% rename from llama_toolchain/memory/api/__init__.py rename to llama_stack/memory/api/__init__.py diff --git a/llama_toolchain/memory/api/api.py b/llama_stack/memory/api/api.py similarity index 100% rename from llama_toolchain/memory/api/api.py rename to llama_stack/memory/api/api.py diff --git a/llama_toolchain/memory/client.py b/llama_stack/memory/client.py similarity index 98% rename from llama_toolchain/memory/client.py rename to llama_stack/memory/client.py index c2c04b213..f629fafe8 100644 --- a/llama_toolchain/memory/client.py +++ b/llama_stack/memory/client.py @@ -15,7 +15,7 @@ import fire import httpx from termcolor import cprint -from llama_toolchain.core.datatypes import RemoteProviderConfig +from llama_stack.core.datatypes import RemoteProviderConfig from .api import * # noqa: F403 from .common.file_utils import data_url_from_file diff --git a/llama_toolchain/memory/meta_reference/__init__.py b/llama_stack/memory/common/__init__.py similarity index 100% rename from llama_toolchain/memory/meta_reference/__init__.py rename to llama_stack/memory/common/__init__.py diff --git a/llama_toolchain/memory/common/file_utils.py b/llama_stack/memory/common/file_utils.py similarity index 100% rename from llama_toolchain/memory/common/file_utils.py rename to llama_stack/memory/common/file_utils.py diff --git a/llama_toolchain/memory/common/vector_store.py b/llama_stack/memory/common/vector_store.py similarity index 98% rename from llama_toolchain/memory/common/vector_store.py rename to llama_stack/memory/common/vector_store.py index baa3fbf21..aca3fd13d 100644 --- a/llama_toolchain/memory/common/vector_store.py +++ b/llama_stack/memory/common/vector_store.py @@ -20,7 +20,7 @@ from pypdf import PdfReader from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.tokenizer import Tokenizer -from llama_toolchain.memory.api import * # noqa: F403 +from llama_stack.memory.api import * # noqa: F403 ALL_MINILM_L6_V2_DIMENSION = 384 diff --git a/llama_toolchain/safety/__init__.py b/llama_stack/memory/meta_reference/__init__.py similarity index 100% rename from llama_toolchain/safety/__init__.py rename to llama_stack/memory/meta_reference/__init__.py diff --git a/llama_toolchain/memory/meta_reference/faiss/__init__.py b/llama_stack/memory/meta_reference/faiss/__init__.py similarity index 100% rename from llama_toolchain/memory/meta_reference/faiss/__init__.py rename to llama_stack/memory/meta_reference/faiss/__init__.py diff --git a/llama_toolchain/memory/meta_reference/faiss/config.py b/llama_stack/memory/meta_reference/faiss/config.py similarity index 100% rename from llama_toolchain/memory/meta_reference/faiss/config.py rename to llama_stack/memory/meta_reference/faiss/config.py diff --git a/llama_toolchain/memory/meta_reference/faiss/faiss.py b/llama_stack/memory/meta_reference/faiss/faiss.py similarity index 95% rename from llama_toolchain/memory/meta_reference/faiss/faiss.py rename to llama_stack/memory/meta_reference/faiss/faiss.py index 2dcff4d25..dec8bd2df 100644 --- a/llama_toolchain/memory/meta_reference/faiss/faiss.py +++ b/llama_stack/memory/meta_reference/faiss/faiss.py @@ -15,13 +15,13 @@ from numpy.typing import NDArray from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.memory.api import * # noqa: F403 -from llama_toolchain.memory.common.vector_store import ( +from llama_stack.memory.api import * # noqa: F403 +from llama_stack.memory.common.vector_store import ( ALL_MINILM_L6_V2_DIMENSION, BankWithIndex, EmbeddingIndex, ) -from llama_toolchain.telemetry import tracing +from llama_stack.telemetry import tracing from .config import FaissImplConfig logger = logging.getLogger(__name__) diff --git a/llama_toolchain/memory/providers.py b/llama_stack/memory/providers.py similarity index 69% rename from llama_toolchain/memory/providers.py rename to llama_stack/memory/providers.py index cf443f5f3..bffd9eb64 100644 --- a/llama_toolchain/memory/providers.py +++ b/llama_stack/memory/providers.py @@ -6,7 +6,7 @@ from typing import List -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.core.datatypes import * # noqa: F403 EMBEDDING_DEPS = [ "blobfile", @@ -22,15 +22,15 @@ def available_providers() -> List[ProviderSpec]: api=Api.memory, provider_id="meta-reference-faiss", pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], - module="llama_toolchain.memory.meta_reference.faiss", - config_class="llama_toolchain.memory.meta_reference.faiss.FaissImplConfig", + module="llama_stack.memory.meta_reference.faiss", + config_class="llama_stack.memory.meta_reference.faiss.FaissImplConfig", ), remote_provider_spec( Api.memory, AdapterSpec( adapter_id="chromadb", pip_packages=EMBEDDING_DEPS + ["chromadb-client"], - module="llama_toolchain.memory.adapters.chroma", + module="llama_stack.memory.adapters.chroma", ), ), remote_provider_spec( @@ -38,8 +38,8 @@ def available_providers() -> List[ProviderSpec]: AdapterSpec( adapter_id="pgvector", pip_packages=EMBEDDING_DEPS + ["psycopg2-binary"], - module="llama_toolchain.memory.adapters.pgvector", - config_class="llama_toolchain.memory.adapters.pgvector.PGVectorConfig", + module="llama_stack.memory.adapters.pgvector", + config_class="llama_stack.memory.adapters.pgvector.PGVectorConfig", ), ), ] diff --git a/llama_toolchain/memory/router/__init__.py b/llama_stack/memory/router/__init__.py similarity index 90% rename from llama_toolchain/memory/router/__init__.py rename to llama_stack/memory/router/__init__.py index 25c5ac2a8..828be53a8 100644 --- a/llama_toolchain/memory/router/__init__.py +++ b/llama_stack/memory/router/__init__.py @@ -6,7 +6,7 @@ from typing import Any, List, Tuple -from llama_toolchain.core.datatypes import Api +from llama_stack.core.datatypes import Api async def get_router_impl(inner_impls: List[Tuple[str, Any]], deps: List[Api]): diff --git a/llama_toolchain/memory/router/router.py b/llama_stack/memory/router/router.py similarity index 96% rename from llama_toolchain/memory/router/router.py rename to llama_stack/memory/router/router.py index b415fbb96..c63d85d66 100644 --- a/llama_toolchain/memory/router/router.py +++ b/llama_stack/memory/router/router.py @@ -6,8 +6,8 @@ from typing import Any, Dict, List, Tuple -from llama_toolchain.core.datatypes import Api -from llama_toolchain.memory.api import * # noqa: F403 +from llama_stack.core.datatypes import Api +from llama_stack.memory.api import * # noqa: F403 class MemoryRouterImpl(Memory): diff --git a/llama_toolchain/models/api/endpoints.py b/llama_stack/models/api/endpoints.py similarity index 100% rename from llama_toolchain/models/api/endpoints.py rename to llama_stack/models/api/endpoints.py diff --git a/llama_toolchain/post_training/api/__init__.py b/llama_stack/post_training/api/__init__.py similarity index 100% rename from llama_toolchain/post_training/api/__init__.py rename to llama_stack/post_training/api/__init__.py diff --git a/llama_toolchain/post_training/api/api.py b/llama_stack/post_training/api/api.py similarity index 97% rename from llama_toolchain/post_training/api/api.py rename to llama_stack/post_training/api/api.py index 378515f83..d2f183c2b 100644 --- a/llama_toolchain/post_training/api/api.py +++ b/llama_stack/post_training/api/api.py @@ -14,8 +14,8 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.dataset.api import * # noqa: F403 -from llama_toolchain.common.training_types import * # noqa: F403 +from llama_stack.dataset.api import * # noqa: F403 +from llama_stack.common.training_types import * # noqa: F403 class OptimizerType(Enum): diff --git a/llama_toolchain/reward_scoring/api/__init__.py b/llama_stack/reward_scoring/api/__init__.py similarity index 100% rename from llama_toolchain/reward_scoring/api/__init__.py rename to llama_stack/reward_scoring/api/__init__.py diff --git a/llama_toolchain/reward_scoring/api/api.py b/llama_stack/reward_scoring/api/api.py similarity index 100% rename from llama_toolchain/reward_scoring/api/api.py rename to llama_stack/reward_scoring/api/api.py diff --git a/llama_toolchain/safety/meta_reference/shields/contrib/__init__.py b/llama_stack/safety/__init__.py similarity index 100% rename from llama_toolchain/safety/meta_reference/shields/contrib/__init__.py rename to llama_stack/safety/__init__.py diff --git a/llama_toolchain/safety/api/__init__.py b/llama_stack/safety/api/__init__.py similarity index 100% rename from llama_toolchain/safety/api/__init__.py rename to llama_stack/safety/api/__init__.py diff --git a/llama_toolchain/safety/api/api.py b/llama_stack/safety/api/api.py similarity index 96% rename from llama_toolchain/safety/api/api.py rename to llama_stack/safety/api/api.py index 631cfa992..f1abac409 100644 --- a/llama_toolchain/safety/api/api.py +++ b/llama_stack/safety/api/api.py @@ -11,7 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, validator from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.common.deployment_types import RestAPIExecutionConfig +from llama_stack.common.deployment_types import RestAPIExecutionConfig @json_schema_type diff --git a/llama_toolchain/safety/client.py b/llama_stack/safety/client.py similarity index 97% rename from llama_toolchain/safety/client.py rename to llama_stack/safety/client.py index 26a9813b3..5ae59c66f 100644 --- a/llama_toolchain/safety/client.py +++ b/llama_stack/safety/client.py @@ -13,11 +13,11 @@ import fire import httpx from llama_models.llama3.api.datatypes import UserMessage - -from llama_toolchain.core.datatypes import RemoteProviderConfig from pydantic import BaseModel from termcolor import cprint +from llama_stack.core.datatypes import RemoteProviderConfig + from .api import * # noqa: F403 diff --git a/llama_toolchain/safety/meta_reference/__init__.py b/llama_stack/safety/meta_reference/__init__.py similarity index 100% rename from llama_toolchain/safety/meta_reference/__init__.py rename to llama_stack/safety/meta_reference/__init__.py diff --git a/llama_toolchain/safety/meta_reference/config.py b/llama_stack/safety/meta_reference/config.py similarity index 100% rename from llama_toolchain/safety/meta_reference/config.py rename to llama_stack/safety/meta_reference/config.py diff --git a/llama_toolchain/safety/meta_reference/safety.py b/llama_stack/safety/meta_reference/safety.py similarity index 96% rename from llama_toolchain/safety/meta_reference/safety.py rename to llama_stack/safety/meta_reference/safety.py index 6c75e74e8..209b2975b 100644 --- a/llama_toolchain/safety/meta_reference/safety.py +++ b/llama_stack/safety/meta_reference/safety.py @@ -8,8 +8,8 @@ import asyncio from llama_models.sku_list import resolve_model -from llama_toolchain.common.model_utils import model_local_dir -from llama_toolchain.safety.api import * # noqa +from llama_stack.common.model_utils import model_local_dir +from llama_stack.safety.api import * # noqa from .config import SafetyConfig from .shields import ( diff --git a/llama_toolchain/safety/meta_reference/shields/__init__.py b/llama_stack/safety/meta_reference/shields/__init__.py similarity index 100% rename from llama_toolchain/safety/meta_reference/shields/__init__.py rename to llama_stack/safety/meta_reference/shields/__init__.py diff --git a/llama_toolchain/safety/meta_reference/shields/base.py b/llama_stack/safety/meta_reference/shields/base.py similarity index 97% rename from llama_toolchain/safety/meta_reference/shields/base.py rename to llama_stack/safety/meta_reference/shields/base.py index ed939212d..d6480259a 100644 --- a/llama_toolchain/safety/meta_reference/shields/base.py +++ b/llama_stack/safety/meta_reference/shields/base.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from typing import List from llama_models.llama3.api.datatypes import interleaved_text_media_as_str, Message -from llama_toolchain.safety.api import * # noqa: F403 +from llama_stack.safety.api import * # noqa: F403 CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" diff --git a/llama_toolchain/safety/meta_reference/shields/code_scanner.py b/llama_stack/safety/meta_reference/shields/code_scanner.py similarity index 95% rename from llama_toolchain/safety/meta_reference/shields/code_scanner.py rename to llama_stack/safety/meta_reference/shields/code_scanner.py index 564d15a53..32f52a6dc 100644 --- a/llama_toolchain/safety/meta_reference/shields/code_scanner.py +++ b/llama_stack/safety/meta_reference/shields/code_scanner.py @@ -8,7 +8,7 @@ from codeshield.cs import CodeShield from termcolor import cprint from .base import ShieldResponse, TextShield -from llama_toolchain.safety.api import * # noqa: F403 +from llama_stack.safety.api import * # noqa: F403 class CodeScannerShield(TextShield): diff --git a/llama_toolchain/telemetry/__init__.py b/llama_stack/safety/meta_reference/shields/contrib/__init__.py similarity index 100% rename from llama_toolchain/telemetry/__init__.py rename to llama_stack/safety/meta_reference/shields/contrib/__init__.py diff --git a/llama_toolchain/safety/meta_reference/shields/contrib/third_party_shield.py b/llama_stack/safety/meta_reference/shields/contrib/third_party_shield.py similarity index 93% rename from llama_toolchain/safety/meta_reference/shields/contrib/third_party_shield.py rename to llama_stack/safety/meta_reference/shields/contrib/third_party_shield.py index 61a5977ed..9aa8adea8 100644 --- a/llama_toolchain/safety/meta_reference/shields/contrib/third_party_shield.py +++ b/llama_stack/safety/meta_reference/shields/contrib/third_party_shield.py @@ -8,7 +8,7 @@ from typing import List from llama_models.llama3.api.datatypes import Message -from llama_toolchain.safety.meta_reference.shields.base import ( +from llama_stack.safety.meta_reference.shields.base import ( OnViolationAction, ShieldBase, ShieldResponse, diff --git a/llama_toolchain/safety/meta_reference/shields/llama_guard.py b/llama_stack/safety/meta_reference/shields/llama_guard.py similarity index 99% rename from llama_toolchain/safety/meta_reference/shields/llama_guard.py rename to llama_stack/safety/meta_reference/shields/llama_guard.py index fe04baa00..3cdfeac13 100644 --- a/llama_toolchain/safety/meta_reference/shields/llama_guard.py +++ b/llama_stack/safety/meta_reference/shields/llama_guard.py @@ -14,7 +14,7 @@ from llama_models.llama3.api.datatypes import Message, Role from transformers import AutoModelForCausalLM, AutoTokenizer from .base import CANNED_RESPONSE_TEXT, OnViolationAction, ShieldBase, ShieldResponse -from llama_toolchain.safety.api import * # noqa: F403 +from llama_stack.safety.api import * # noqa: F403 SAFE_RESPONSE = "safe" _INSTANCE = None diff --git a/llama_toolchain/safety/meta_reference/shields/prompt_guard.py b/llama_stack/safety/meta_reference/shields/prompt_guard.py similarity index 99% rename from llama_toolchain/safety/meta_reference/shields/prompt_guard.py rename to llama_stack/safety/meta_reference/shields/prompt_guard.py index a1097a6f7..2e5683a3d 100644 --- a/llama_toolchain/safety/meta_reference/shields/prompt_guard.py +++ b/llama_stack/safety/meta_reference/shields/prompt_guard.py @@ -14,7 +14,7 @@ from termcolor import cprint from transformers import AutoModelForSequenceClassification, AutoTokenizer from .base import message_content_as_str, OnViolationAction, ShieldResponse, TextShield -from llama_toolchain.safety.api import * # noqa: F403 +from llama_stack.safety.api import * # noqa: F403 class PromptGuardShield(TextShield): diff --git a/llama_toolchain/safety/providers.py b/llama_stack/safety/providers.py similarity index 71% rename from llama_toolchain/safety/providers.py rename to llama_stack/safety/providers.py index c523e628e..3fb653a34 100644 --- a/llama_toolchain/safety/providers.py +++ b/llama_stack/safety/providers.py @@ -6,7 +6,7 @@ from typing import List -from llama_toolchain.core.datatypes import Api, InlineProviderSpec, ProviderSpec +from llama_stack.core.datatypes import Api, InlineProviderSpec, ProviderSpec def available_providers() -> List[ProviderSpec]: @@ -20,7 +20,7 @@ def available_providers() -> List[ProviderSpec]: "torch", "transformers", ], - module="llama_toolchain.safety.meta_reference", - config_class="llama_toolchain.safety.meta_reference.SafetyConfig", + module="llama_stack.safety.meta_reference", + config_class="llama_stack.safety.meta_reference.SafetyConfig", ), ] diff --git a/llama_stack/stack.py b/llama_stack/stack.py new file mode 100644 index 000000000..69ce8bcd1 --- /dev/null +++ b/llama_stack/stack.py @@ -0,0 +1,34 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_models.llama3.api.datatypes import * # noqa: F403 +from llama_stack.agentic_system.api import * # noqa: F403 +from llama_stack.dataset.api import * # noqa: F403 +from llama_stack.evaluations.api import * # noqa: F403 +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.batch_inference.api import * # noqa: F403 +from llama_stack.memory.api import * # noqa: F403 +from llama_stack.telemetry.api import * # noqa: F403 +from llama_stack.post_training.api import * # noqa: F403 +from llama_stack.reward_scoring.api import * # noqa: F403 +from llama_stack.synthetic_data_generation.api import * # noqa: F403 +from llama_stack.safety.api import * # noqa: F403 + + +class LlamaStack( + Inference, + BatchInference, + AgenticSystem, + RewardScoring, + Safety, + SyntheticDataGeneration, + Datasets, + Telemetry, + PostTraining, + Memory, + Evaluations, +): + pass diff --git a/llama_toolchain/synthetic_data_generation/api/__init__.py b/llama_stack/synthetic_data_generation/api/__init__.py similarity index 100% rename from llama_toolchain/synthetic_data_generation/api/__init__.py rename to llama_stack/synthetic_data_generation/api/__init__.py diff --git a/llama_toolchain/synthetic_data_generation/api/api.py b/llama_stack/synthetic_data_generation/api/api.py similarity index 96% rename from llama_toolchain/synthetic_data_generation/api/api.py rename to llama_stack/synthetic_data_generation/api/api.py index 9a6c487af..f6059fc46 100644 --- a/llama_toolchain/synthetic_data_generation/api/api.py +++ b/llama_stack/synthetic_data_generation/api/api.py @@ -13,7 +13,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.reward_scoring.api import * # noqa: F403 +from llama_stack.reward_scoring.api import * # noqa: F403 class FilteringFunction(Enum): diff --git a/llama_toolchain/tools/__init__.py b/llama_stack/telemetry/__init__.py similarity index 100% rename from llama_toolchain/tools/__init__.py rename to llama_stack/telemetry/__init__.py diff --git a/llama_toolchain/telemetry/api/__init__.py b/llama_stack/telemetry/api/__init__.py similarity index 100% rename from llama_toolchain/telemetry/api/__init__.py rename to llama_stack/telemetry/api/__init__.py diff --git a/llama_toolchain/telemetry/api/api.py b/llama_stack/telemetry/api/api.py similarity index 100% rename from llama_toolchain/telemetry/api/api.py rename to llama_stack/telemetry/api/api.py diff --git a/llama_toolchain/telemetry/console/__init__.py b/llama_stack/telemetry/console/__init__.py similarity index 100% rename from llama_toolchain/telemetry/console/__init__.py rename to llama_stack/telemetry/console/__init__.py diff --git a/llama_toolchain/telemetry/console/config.py b/llama_stack/telemetry/console/config.py similarity index 100% rename from llama_toolchain/telemetry/console/config.py rename to llama_stack/telemetry/console/config.py diff --git a/llama_toolchain/telemetry/console/console.py b/llama_stack/telemetry/console/console.py similarity index 97% rename from llama_toolchain/telemetry/console/console.py rename to llama_stack/telemetry/console/console.py index 2e7b9980d..9b855818e 100644 --- a/llama_toolchain/telemetry/console/console.py +++ b/llama_stack/telemetry/console/console.py @@ -6,7 +6,7 @@ from typing import Optional -from llama_toolchain.telemetry.api import * # noqa: F403 +from llama_stack.telemetry.api import * # noqa: F403 from .config import ConsoleConfig diff --git a/llama_toolchain/telemetry/providers.py b/llama_stack/telemetry/providers.py similarity index 69% rename from llama_toolchain/telemetry/providers.py rename to llama_stack/telemetry/providers.py index 00038e569..1359dbbbd 100644 --- a/llama_toolchain/telemetry/providers.py +++ b/llama_stack/telemetry/providers.py @@ -6,7 +6,7 @@ from typing import List -from llama_toolchain.core.datatypes import * # noqa: F403 +from llama_stack.core.datatypes import * # noqa: F403 def available_providers() -> List[ProviderSpec]: @@ -15,7 +15,7 @@ def available_providers() -> List[ProviderSpec]: api=Api.telemetry, provider_id="console", pip_packages=[], - module="llama_toolchain.telemetry.console", - config_class="llama_toolchain.telemetry.console.ConsoleConfig", + module="llama_stack.telemetry.console", + config_class="llama_stack.telemetry.console.ConsoleConfig", ), ] diff --git a/llama_toolchain/telemetry/tracing.py b/llama_stack/telemetry/tracing.py similarity index 99% rename from llama_toolchain/telemetry/tracing.py rename to llama_stack/telemetry/tracing.py index 6afe5c2fb..8375ff32c 100644 --- a/llama_toolchain/telemetry/tracing.py +++ b/llama_stack/telemetry/tracing.py @@ -15,7 +15,7 @@ from functools import wraps from typing import Any, Dict, List -from llama_toolchain.telemetry.api import * # noqa: F403 +from llama_stack.telemetry.api import * # noqa: F403 def generate_short_uuid(len: int = 12): diff --git a/llama_toolchain/tools/custom/__init__.py b/llama_stack/tools/__init__.py similarity index 100% rename from llama_toolchain/tools/custom/__init__.py rename to llama_stack/tools/__init__.py diff --git a/llama_toolchain/tools/base.py b/llama_stack/tools/base.py similarity index 90% rename from llama_toolchain/tools/base.py rename to llama_stack/tools/base.py index 324cce0e2..f81085a15 100644 --- a/llama_toolchain/tools/base.py +++ b/llama_stack/tools/base.py @@ -7,7 +7,7 @@ from abc import ABC, abstractmethod from typing import List -from llama_toolchain.inference.api import Message +from llama_stack.inference.api import Message class BaseTool(ABC): diff --git a/llama_toolchain/tools/builtin.py b/llama_stack/tools/builtin.py similarity index 99% rename from llama_toolchain/tools/builtin.py rename to llama_stack/tools/builtin.py index 56fda3723..45ac97b88 100644 --- a/llama_toolchain/tools/builtin.py +++ b/llama_stack/tools/builtin.py @@ -21,8 +21,8 @@ from .ipython_tool.code_execution import ( TOOLS_ATTACHMENT_KEY_REGEX, ) -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.agentic_system.api import * # noqa: F403 +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.agentic_system.api import * # noqa: F403 from .base import BaseTool diff --git a/llama_toolchain/tools/ipython_tool/__init__.py b/llama_stack/tools/custom/__init__.py similarity index 100% rename from llama_toolchain/tools/ipython_tool/__init__.py rename to llama_stack/tools/custom/__init__.py diff --git a/llama_toolchain/tools/custom/datatypes.py b/llama_stack/tools/custom/datatypes.py similarity index 97% rename from llama_toolchain/tools/custom/datatypes.py rename to llama_stack/tools/custom/datatypes.py index 05b142d6f..8ae1d678d 100644 --- a/llama_toolchain/tools/custom/datatypes.py +++ b/llama_stack/tools/custom/datatypes.py @@ -10,7 +10,7 @@ from abc import abstractmethod from typing import Dict, List from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.agentic_system.api import * # noqa: F403 +from llama_stack.agentic_system.api import * # noqa: F403 class CustomTool: diff --git a/llama_stack/tools/ipython_tool/__init__.py b/llama_stack/tools/ipython_tool/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/tools/ipython_tool/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_toolchain/tools/ipython_tool/code_env_prefix.py b/llama_stack/tools/ipython_tool/code_env_prefix.py similarity index 100% rename from llama_toolchain/tools/ipython_tool/code_env_prefix.py rename to llama_stack/tools/ipython_tool/code_env_prefix.py diff --git a/llama_toolchain/tools/ipython_tool/code_execution.py b/llama_stack/tools/ipython_tool/code_execution.py similarity index 100% rename from llama_toolchain/tools/ipython_tool/code_execution.py rename to llama_stack/tools/ipython_tool/code_execution.py diff --git a/llama_toolchain/tools/ipython_tool/matplotlib_custom_backend.py b/llama_stack/tools/ipython_tool/matplotlib_custom_backend.py similarity index 100% rename from llama_toolchain/tools/ipython_tool/matplotlib_custom_backend.py rename to llama_stack/tools/ipython_tool/matplotlib_custom_backend.py diff --git a/llama_toolchain/tools/ipython_tool/utils.py b/llama_stack/tools/ipython_tool/utils.py similarity index 100% rename from llama_toolchain/tools/ipython_tool/utils.py rename to llama_stack/tools/ipython_tool/utils.py diff --git a/llama_toolchain/tools/safety.py b/llama_stack/tools/safety.py similarity index 88% rename from llama_toolchain/tools/safety.py rename to llama_stack/tools/safety.py index 24051af8a..5a33bbadb 100644 --- a/llama_toolchain/tools/safety.py +++ b/llama_stack/tools/safety.py @@ -6,10 +6,10 @@ from typing import List -from llama_toolchain.agentic_system.meta_reference.safety import ShieldRunnerMixin +from llama_stack.agentic_system.meta_reference.safety import ShieldRunnerMixin -from llama_toolchain.inference.api import Message -from llama_toolchain.safety.api import Safety, ShieldDefinition +from llama_stack.inference.api import Message +from llama_stack.safety.api import Safety, ShieldDefinition from .builtin import BaseTool diff --git a/llama_toolchain/agentic_system/__init__.py b/llama_toolchain/agentic_system/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/llama_toolchain/stack.py b/llama_toolchain/stack.py deleted file mode 100644 index 1e2976ab3..000000000 --- a/llama_toolchain/stack.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.agentic_system.api import * # noqa: F403 -from llama_toolchain.dataset.api import * # noqa: F403 -from llama_toolchain.evaluations.api import * # noqa: F403 -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.batch_inference.api import * # noqa: F403 -from llama_toolchain.memory.api import * # noqa: F403 -from llama_toolchain.telemetry.api import * # noqa: F403 -from llama_toolchain.post_training.api import * # noqa: F403 -from llama_toolchain.reward_scoring.api import * # noqa: F403 -from llama_toolchain.synthetic_data_generation.api import * # noqa: F403 -from llama_toolchain.safety.api import * # noqa: F403 - - -class LlamaStack( - Inference, - BatchInference, - AgenticSystem, - RewardScoring, - Safety, - SyntheticDataGeneration, - Datasets, - Telemetry, - PostTraining, - Memory, - Evaluations, -): - pass diff --git a/rfcs/RFC-0001-llama-stack.md b/rfcs/RFC-0001-llama-stack.md index 805e8cd84..a5fd83075 100644 --- a/rfcs/RFC-0001-llama-stack.md +++ b/rfcs/RFC-0001-llama-stack.md @@ -1,19 +1,19 @@ # The Llama Stack API **Authors:** -* Meta: @raghotham, @ashwinb, @hjshah, @jspisak +* Meta: @raghotham, @ashwinb, @hjshah, @jspisak ## Summary As part of the Llama 3.1 release, Meta is releasing an RFC for ‘Llama Stack’, a comprehensive set of interfaces / API for ML developers building on top of Llama foundation models. We are looking for feedback on where the API can be improved, any corner cases we may have missed and your general thoughts on how useful this will be. Ultimately, our hope is to create a standard for working with Llama models in order to simplify the developer experience and foster innovation across the Llama ecosystem. ## Motivation -Llama models were always intended to work as part of an overall system that can orchestrate several components, including calling external tools. Our vision is to go beyond the foundation models and give developers access to a broader system that gives them the flexibility to design and create custom offerings that align with their vision. This thinking started last year when we first introduced a system-level safety model. Meta has continued to release new components for orchestration at the system level and, most recently in Llama 3.1, we’ve introduced the Llama Guard 3 safety model that is multilingual, a prompt injection filter, Prompt Guard and refreshed v3 of our CyberSec Evals. We are also releasing a reference implementation of an agentic system to demonstrate how all the pieces fit together. +Llama models were always intended to work as part of an overall system that can orchestrate several components, including calling external tools. Our vision is to go beyond the foundation models and give developers access to a broader system that gives them the flexibility to design and create custom offerings that align with their vision. This thinking started last year when we first introduced a system-level safety model. Meta has continued to release new components for orchestration at the system level and, most recently in Llama 3.1, we’ve introduced the Llama Guard 3 safety model that is multilingual, a prompt injection filter, Prompt Guard and refreshed v3 of our CyberSec Evals. We are also releasing a reference implementation of an agentic system to demonstrate how all the pieces fit together. -While building the reference implementation, we realized that having a clean and consistent way to interface between components could be valuable not only for us but for anyone leveraging Llama models and other components as part of their system. We’ve also heard from the community as they face a similar challenge as components exist with overlapping functionality and there are incompatible interfaces and yet don't cover the end-to-end model life cycle. +While building the reference implementation, we realized that having a clean and consistent way to interface between components could be valuable not only for us but for anyone leveraging Llama models and other components as part of their system. We’ve also heard from the community as they face a similar challenge as components exist with overlapping functionality and there are incompatible interfaces and yet don't cover the end-to-end model life cycle. With these motivations, we engaged folks in industry, startups, and the broader developer community to help better define the interfaces of these components. We’re releasing this Llama Stack RFC as a set of standardized and opinionated interfaces for how to surface canonical toolchain components (like inference, fine-tuning, evals, synthetic data generation) and agentic applications to ML developers. Our hope is to have these become well adopted across the ecosystem, which should help with easier interoperability. We would like for builders of multiple components to provide implementations to these standard APIs so that there can be vertically integrated “distributions” of the Llama Stack that can work out of the box easily. -We welcome feedback and ways to improve the proposal. We’re excited to grow the ecosystem around Llama and lower barriers for both developers and platform providers. +We welcome feedback and ways to improve the proposal. We’re excited to grow the ecosystem around Llama and lower barriers for both developers and platform providers. ## Design decisions Meta releases weights of both the pretrained and instruction fine-tuned Llama models to support several use cases. These weights can be improved - fine tuned and aligned - with curated datasets to then be deployed for inference to support specific applications. The curated datasets can be produced manually by humans or synthetically by other models or by leveraging human feedback by collecting usage data of the application itself. This results in a continuous improvement cycle where the model gets better over time. This is the model life cycle. @@ -42,8 +42,8 @@ Note that as of today, in the OSS world, such a “loop” is often coded explic **Let's consider an example:** 1. The user asks the system "Who played the NBA finals last year?" -1. The model "understands" that this question needs to be answered using web search. It answers this abstractly with a message of the form "Please call the search tool for me with the query: 'List finalist teams for NBA in the last year' ". Note that the model by itself does not call the tool (of course!) -1. The executor consults the set of tool implementations which have been configured by the developer to find an implementation for the "search tool". If it does not find it, it returns an error to the model. Otherwise, it executes this tool and returns the result of this tool back to the model. +1. The model "understands" that this question needs to be answered using web search. It answers this abstractly with a message of the form "Please call the search tool for me with the query: 'List finalist teams for NBA in the last year' ". Note that the model by itself does not call the tool (of course!) +1. The executor consults the set of tool implementations which have been configured by the developer to find an implementation for the "search tool". If it does not find it, it returns an error to the model. Otherwise, it executes this tool and returns the result of this tool back to the model. 1. The model reasons once again (using all the messages above) and decides to send a final response "In 2023, Denver Nuggets played against the Miami Heat in the NBA finals." to the executor 1. The executor returns the response directly to the user (since there is no tool call to be executed.) @@ -73,14 +73,14 @@ The API is defined in the [YAML](RFC-0001-llama-stack-assets/llama-stack-spec.ya ## Sample implementations -To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-agentic-system](https://github.com/meta-llama/llama-agentic-system) repository contains [6 different examples](https://github.com/meta-llama/llama-agentic-system/tree/main/examples/scripts) ranging from very basic to a multi turn agent. +To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-agentic-system](https://github.com/meta-llama/llama-agentic-system) repository contains [6 different examples](https://github.com/meta-llama/llama-agentic-system/tree/main/examples/scripts) ranging from very basic to a multi turn agent. -There is also a sample inference endpoint implementation in the [llama-toolchain](https://github.com/meta-llama/llama-toolchain/blob/main/llama_toolchain/inference/server.py) repository. +There is also a sample inference endpoint implementation in the [llama-toolchain](https://github.com/meta-llama/llama-toolchain/blob/main/llama_stack/inference/server.py) repository. ## Limitations The reference implementation for Llama Stack APIs to date only includes sample implementations using the inference API. We are planning to flesh out the design of Llama Stack Distributions (distros) by combining capabilities from different providers into a single vertically integrated stack. We plan to implement other APIs and, of course, we’d love contributions!! -Thank you in advance for your feedback, support and contributions to make this a better API. +Thank you in advance for your feedback, support and contributions to make this a better API. Cheers! diff --git a/rfcs/openapi_generator/README.md b/rfcs/openapi_generator/README.md index 023486534..9d407905d 100644 --- a/rfcs/openapi_generator/README.md +++ b/rfcs/openapi_generator/README.md @@ -1,4 +1,4 @@ -The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_toolchain/[]/api/endpoints.py` using the `generate.py` utility. +The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/[]/api/endpoints.py` using the `generate.py` utility. Please install the following packages before running the script: diff --git a/rfcs/openapi_generator/generate.py b/rfcs/openapi_generator/generate.py index 279389a47..bdc67fbed 100644 --- a/rfcs/openapi_generator/generate.py +++ b/rfcs/openapi_generator/generate.py @@ -31,7 +31,7 @@ from .pyopenapi.utility import Specification schema_utils.json_schema_type = json_schema_type -from llama_toolchain.stack import LlamaStack +from llama_stack.stack import LlamaStack # TODO: this should be fixed in the generator itself so it reads appropriate annotations diff --git a/setup.py b/setup.py index 7273bee51..f7f06bdf4 100644 --- a/setup.py +++ b/setup.py @@ -15,15 +15,15 @@ def read_requirements(): setup( - name="llama_toolchain", + name="llama_stack", version="0.0.16", author="Meta Llama", author_email="llama-oss@meta.com", description="Llama toolchain", entry_points={ "console_scripts": [ - "llama = llama_toolchain.cli.llama:main", - "install-wheel-from-presigned = llama_toolchain.cli.scripts.run:install_wheel_from_presigned", + "llama = llama_stack.cli.llama:main", + "install-wheel-from-presigned = llama_stack.cli.scripts.run:install_wheel_from_presigned", ] }, long_description=open("README.md").read(), diff --git a/tests/example_custom_tool.py b/tests/example_custom_tool.py index ec338982e..f03f18e39 100644 --- a/tests/example_custom_tool.py +++ b/tests/example_custom_tool.py @@ -7,7 +7,7 @@ from typing import Dict from llama_models.llama3.api.datatypes import ToolParamDefinition -from llama_toolchain.tools.custom.datatypes import SingleMessageCustomTool +from llama_stack.tools.custom.datatypes import SingleMessageCustomTool class GetBoilingPointTool(SingleMessageCustomTool): diff --git a/tests/test_e2e.py b/tests/test_e2e.py index ea0246f20..24fc651bd 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -11,12 +11,12 @@ import os import unittest -from llama_toolchain.agentic_system.event_logger import EventLogger, LogEvent -from llama_toolchain.agentic_system.utils import get_agent_system_instance +from llama_stack.agentic_system.event_logger import EventLogger, LogEvent +from llama_stack.agentic_system.utils import get_agent_system_instance from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_toolchain.agentic_system.api.datatypes import StepType -from llama_toolchain.tools.custom.datatypes import CustomTool +from llama_stack.agentic_system.api.datatypes import StepType +from llama_stack.tools.custom.datatypes import CustomTool from tests.example_custom_tool import GetBoilingPointTool diff --git a/tests/test_inference.py b/tests/test_inference.py index 800046355..ba062046d 100644 --- a/tests/test_inference.py +++ b/tests/test_inference.py @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + # Run this test using the following command: # python -m unittest tests/test_inference.py @@ -19,12 +25,12 @@ from llama_models.llama3.api.datatypes import ( UserMessage, ) -from llama_toolchain.inference.api import ( +from llama_stack.inference.api import ( ChatCompletionRequest, ChatCompletionResponseEventType, ) -from llama_toolchain.inference.meta_reference.config import MetaReferenceImplConfig -from llama_toolchain.inference.meta_reference.inference import get_provider_impl +from llama_stack.inference.meta_reference.config import MetaReferenceImplConfig +from llama_stack.inference.meta_reference.inference import get_provider_impl MODEL = "Meta-Llama3.1-8B-Instruct" diff --git a/tests/test_ollama_inference.py b/tests/test_ollama_inference.py index c3cef3a10..878e52991 100644 --- a/tests/test_ollama_inference.py +++ b/tests/test_ollama_inference.py @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + import textwrap import unittest from datetime import datetime @@ -14,12 +20,12 @@ from llama_models.llama3.api.datatypes import ( ToolResponseMessage, UserMessage, ) -from llama_toolchain.inference.api import ( +from llama_stack.inference.api import ( ChatCompletionRequest, ChatCompletionResponseEventType, ) -from llama_toolchain.inference.ollama.config import OllamaImplConfig -from llama_toolchain.inference.ollama.ollama import get_provider_impl +from llama_stack.inference.ollama.config import OllamaImplConfig +from llama_stack.inference.ollama.ollama import get_provider_impl class OllamaInferenceTests(unittest.IsolatedAsyncioTestCase): diff --git a/tests/test_prepare_messages.py b/tests/test_prepare_messages.py index 49624b04d..df3473b4c 100644 --- a/tests/test_prepare_messages.py +++ b/tests/test_prepare_messages.py @@ -1,8 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + import unittest from llama_models.llama3.api import * # noqa: F403 -from llama_toolchain.inference.api import * # noqa: F403 -from llama_toolchain.inference.prepare_messages import prepare_messages +from llama_stack.inference.api import * # noqa: F403 +from llama_stack.inference.prepare_messages import prepare_messages MODEL = "Meta-Llama3.1-8B-Instruct"