From 4d8bfcea1b9c2ed2e9788bd1f21706a5cced13b1 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 29 Oct 2024 14:14:05 -0700 Subject: [PATCH] docs --- docs/source/api_providers/index.md | 3 +- docs/source/api_providers/memory_api.md | 53 ++++++++++++++++ .../distributions/meta-reference-gpu.md | 61 +++---------------- 3 files changed, 65 insertions(+), 52 deletions(-) create mode 100644 docs/source/api_providers/memory_api.md diff --git a/docs/source/api_providers/index.md b/docs/source/api_providers/index.md index f4352b043..134752151 100644 --- a/docs/source/api_providers/index.md +++ b/docs/source/api_providers/index.md @@ -7,7 +7,8 @@ As an example, for Inference, we could have the implementation be backed by open A provider can also be just a pointer to a remote REST service -- for example, cloud providers or dedicated inference providers could serve these APIs. ```{toctree} -:maxdepth: 2 +:maxdepth: 1 new_api_provider +memory_api ``` diff --git a/docs/source/api_providers/memory_api.md b/docs/source/api_providers/memory_api.md new file mode 100644 index 000000000..bed688c4b --- /dev/null +++ b/docs/source/api_providers/memory_api.md @@ -0,0 +1,53 @@ +# Memory API Providers + +### Start Distribution With pgvector/chromadb Memory Provider + +##### pgvector +1. Start running the pgvector server: + +``` +docker run --network host --name mypostgres -it -p 5432:5432 -e POSTGRES_PASSWORD=mysecretpassword -e POSTGRES_USER=postgres -e POSTGRES_DB=postgres pgvector/pgvector:pg16 +``` + +2. Edit the `run.yaml` file to point to the pgvector server. +``` +memory: + - provider_id: pgvector + provider_type: remote::pgvector + config: + host: 127.0.0.1 + port: 5432 + db: postgres + user: postgres + password: mysecretpassword +``` + +> [!NOTE] +> If you get a `RuntimeError: Vector extension is not installed.`. You will need to run `CREATE EXTENSION IF NOT EXISTS vector;` to include the vector extension. E.g. + +``` +docker exec -it mypostgres ./bin/psql -U postgres +postgres=# CREATE EXTENSION IF NOT EXISTS vector; +postgres=# SELECT extname from pg_extension; + extname +``` + +3. Run `docker compose up` with the updated `run.yaml` file. + +##### chromadb +1. Start running chromadb server +``` +docker run -it --network host --name chromadb -p 6000:6000 -v ./chroma_vdb:/chroma/chroma -e IS_PERSISTENT=TRUE chromadb/chroma:latest +``` + +2. Edit the `run.yaml` file to point to the chromadb server. +``` +memory: + - provider_id: remote::chromadb + provider_type: remote::chromadb + config: + host: localhost + port: 6000 +``` + +3. Run `docker compose up` with the updated `run.yaml` file. diff --git a/docs/source/getting_started/distributions/meta-reference-gpu.md b/docs/source/getting_started/distributions/meta-reference-gpu.md index 5c576122f..c6405173b 100644 --- a/docs/source/getting_started/distributions/meta-reference-gpu.md +++ b/docs/source/getting_started/distributions/meta-reference-gpu.md @@ -17,13 +17,10 @@ Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3 Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M ``` -### Start the Distribution (Single Node GPU) +### Docker: Start the Distribution ``` -$ cd distributions/meta-reference-gpu -$ ls -build.yaml compose.yaml README.md run.yaml -$ docker compose up +$ cd distributions/meta-reference-gpu && docker compose up ``` > [!NOTE] @@ -40,60 +37,22 @@ This will download and start running a pre-built docker container. Alternatively docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-gpu --yaml_config /root/my-run.yaml ``` -### Alternative (Build and start distribution locally via conda) -- You may checkout the [Getting Started](../../docs/getting_started.md) for more details on building locally via conda and starting up a meta-reference distribution. +### Conda: Start the Distribution -### Start Distribution With pgvector/chromadb Memory Provider -##### pgvector -1. Start running the pgvector server: +1. Install the `llama` CLI. See [CLI Reference]() + +2. Build the `meta-reference-gpu` distribution ``` -docker run --network host --name mypostgres -it -p 5432:5432 -e POSTGRES_PASSWORD=mysecretpassword -e POSTGRES_USER=postgres -e POSTGRES_DB=postgres pgvector/pgvector:pg16 +$ llama stack build --template meta-reference-gpu --image-type conda ``` -2. Edit the `run.yaml` file to point to the pgvector server. +3. Start running distribution ``` -memory: - - provider_id: pgvector - provider_type: remote::pgvector - config: - host: 127.0.0.1 - port: 5432 - db: postgres - user: postgres - password: mysecretpassword +$ cd distributions/meta-reference-gpu +$ llama stack run ./run.yaml ``` -> [!NOTE] -> If you get a `RuntimeError: Vector extension is not installed.`. You will need to run `CREATE EXTENSION IF NOT EXISTS vector;` to include the vector extension. E.g. - -``` -docker exec -it mypostgres ./bin/psql -U postgres -postgres=# CREATE EXTENSION IF NOT EXISTS vector; -postgres=# SELECT extname from pg_extension; - extname -``` - -3. Run `docker compose up` with the updated `run.yaml` file. - -##### chromadb -1. Start running chromadb server -``` -docker run -it --network host --name chromadb -p 6000:6000 -v ./chroma_vdb:/chroma/chroma -e IS_PERSISTENT=TRUE chromadb/chroma:latest -``` - -2. Edit the `run.yaml` file to point to the chromadb server. -``` -memory: - - provider_id: remote::chromadb - provider_type: remote::chromadb - config: - host: localhost - port: 6000 -``` - -3. Run `docker compose up` with the updated `run.yaml` file. - ### Serving a new model You may change the `config.model` in `run.yaml` to update the model currently being served by the distribution. Make sure you have the model checkpoint downloaded in your `~/.llama`. ```