From c2d2a80b0ae9caefb77fd78f9799493e06f9f5b5 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Fri, 28 Feb 2025 08:46:38 +0800 Subject: [PATCH] docs: update the output of llama-stack-client models list (#1271) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- .../self_hosted_distro/ollama.md | 20 +++++++++++-------- docs/source/getting_started/index.md | 16 ++++++++++----- .../llama_stack_client_cli_reference.md | 14 ++++++++----- llama_stack/templates/ollama/doc_template.md | 20 +++++++++++-------- 4 files changed, 44 insertions(+), 26 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index 80d84b402..8f23cef43 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -141,17 +141,21 @@ ollama run To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama. ``` $ ollama ps - -NAME ID SIZE PROCESSOR UNTIL -llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now +NAME ID SIZE PROCESSOR UNTIL +llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now ``` To verify that the model served by ollama is correctly connected to Llama Stack server ```bash $ llama-stack-client models list -+----------------------+----------------------+---------------+-----------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+===============================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} | -+----------------------+----------------------+---------------+-----------------------------------------------+ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index f017a9723..ecef20d55 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -102,12 +102,18 @@ Let's use the `llama-stack-client` CLI to check the connectivity to the server. $ llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT > Enter the API key (leave empty if no key is needed): Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321 + $ llama-stack-client models list -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ -┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ -│ meta-llama/Llama-3.2-3B-Instruct │ ollama │ llama3.2:3b-instruct-fp16 │ │ -└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` You can test basic Llama inference completion using the CLI too. diff --git a/docs/source/references/llama_stack_client_cli_reference.md b/docs/source/references/llama_stack_client_cli_reference.md index bf99f2b57..26b81cf92 100644 --- a/docs/source/references/llama_stack_client_cli_reference.md +++ b/docs/source/references/llama_stack_client_cli_reference.md @@ -58,11 +58,15 @@ llama-stack-client providers list llama-stack-client models list ``` ``` -+----------------------+----------------------+---------------+----------------------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+==========================================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | tgi0 | {'huggingface_repo': 'meta-llama/Llama-3.1-8B-Instruct'} | -+----------------------+----------------------+---------------+----------------------------------------------------------+ +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` ### `llama-stack-client models get` diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md index 1d95e4b65..e5444d3da 100644 --- a/llama_stack/templates/ollama/doc_template.md +++ b/llama_stack/templates/ollama/doc_template.md @@ -130,17 +130,21 @@ ollama run To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama. ``` $ ollama ps - -NAME ID SIZE PROCESSOR UNTIL -llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now +NAME ID SIZE PROCESSOR UNTIL +llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now ``` To verify that the model served by ollama is correctly connected to Llama Stack server ```bash $ llama-stack-client models list -+----------------------+----------------------+---------------+-----------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+===============================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} | -+----------------------+----------------------+---------------+-----------------------------------------------+ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ```