diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ab9c4d82e..7e05c683a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,6 +22,7 @@ pip install -r requirements.txt pip install sphinx-autobuild # This will start a local server (usually at http://127.0.0.1:8000) that automatically rebuilds and refreshes when you make changes to the documentation. +make html sphinx-autobuild source build/html ``` diff --git a/docs/source/getting_started/distributions/remote_hosted_distro/index.md b/docs/source/getting_started/distributions/remote_hosted_distro/index.md index 719f2f301..76d5fdf27 100644 --- a/docs/source/getting_started/distributions/remote_hosted_distro/index.md +++ b/docs/source/getting_started/distributions/remote_hosted_distro/index.md @@ -1,15 +1,42 @@ # Remote-Hosted Distribution -Remote Hosted distributions are distributions connecting to remote hosted services through Llama Stack server. Inference is done through remote providers. These are useful if you have an API key for a remote inference provider like Fireworks, Together, etc. +Remote-Hosted distributions are available endpoints serving Llama Stack API that you can directly connect to. -| **Distribution** | **Llama Stack Docker** | Start This Distribution | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | -|:----------------: |:------------------------------------------: |:-----------------------: |:------------------: |:------------------: |:------------------: |:------------------: |:------------------: | -| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/remote_hosted_distro/together.html) | remote::together | meta-reference | remote::weaviate | meta-reference | meta-reference | -| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/remote_hosted_distro/fireworks.html) | remote::fireworks | meta-reference | remote::weaviate | meta-reference | meta-reference | +| Distribution | Endpoint | Inference | Agents | Memory | Safety | Telemetry | +|-------------|----------|-----------|---------|---------|---------|------------| +| Together | [https://llama-stack.together.ai](https://llama-stack.together.ai) | remote::together | meta-reference | remote::weaviate | meta-reference | meta-reference | +| Fireworks | [https://llamastack-preview.fireworks.ai](https://llamastack-preview.fireworks.ai) | remote::fireworks | meta-reference | remote::weaviate | meta-reference | meta-reference | -```{toctree} -:maxdepth: 1 +## Connecting to Remote-Hosted Distributions -fireworks -together +You can use `llama-stack-client` to interact with these endpoints. For example, to list the available models served by the Fireworks endpoint: + +```bash +$ pip install llama-stack-client +$ llama-stack-client configure --endpoint https://llamastack-preview.fireworks.ai +$ llama-stack-client models list ``` + +You will see outputs: +``` +$ llama-stack-client models list ++------------------------------+------------------------------+---------------+------------+ +| identifier | llama_model | provider_id | metadata | ++==============================+==============================+===============+============+ +| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | fireworks0 | {} | ++------------------------------+------------------------------+---------------+------------+ +| Llama3.1-70B-Instruct | Llama3.1-70B-Instruct | fireworks0 | {} | ++------------------------------+------------------------------+---------------+------------+ +| Llama3.1-405B-Instruct | Llama3.1-405B-Instruct | fireworks0 | {} | ++------------------------------+------------------------------+---------------+------------+ +| Llama3.2-1B-Instruct | Llama3.2-1B-Instruct | fireworks0 | {} | ++------------------------------+------------------------------+---------------+------------+ +| Llama3.2-3B-Instruct | Llama3.2-3B-Instruct | fireworks0 | {} | ++------------------------------+------------------------------+---------------+------------+ +| Llama3.2-11B-Vision-Instruct | Llama3.2-11B-Vision-Instruct | fireworks0 | {} | ++------------------------------+------------------------------+---------------+------------+ +| Llama3.2-90B-Vision-Instruct | Llama3.2-90B-Vision-Instruct | fireworks0 | {} | ++------------------------------+------------------------------+---------------+------------+ +``` + +Checkout the [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python/blob/main/docs/cli_reference.md) repo for more details on how to use the `llama-stack-client` CLI. Checkout [llama-stack-app](https://github.com/meta-llama/llama-stack-apps/tree/main) for examples applications built on top of Llama Stack. diff --git a/docs/source/getting_started/distributions/remote_hosted_distro/bedrock.md b/docs/source/getting_started/distributions/self_hosted_distro/bedrock.md similarity index 100% rename from docs/source/getting_started/distributions/remote_hosted_distro/bedrock.md rename to docs/source/getting_started/distributions/self_hosted_distro/bedrock.md diff --git a/docs/source/getting_started/distributions/remote_hosted_distro/fireworks.md b/docs/source/getting_started/distributions/self_hosted_distro/fireworks.md similarity index 100% rename from docs/source/getting_started/distributions/remote_hosted_distro/fireworks.md rename to docs/source/getting_started/distributions/self_hosted_distro/fireworks.md diff --git a/docs/source/getting_started/distributions/self_hosted_distro/index.md b/docs/source/getting_started/distributions/self_hosted_distro/index.md index a2f3876ec..ed6ab5d7f 100644 --- a/docs/source/getting_started/distributions/self_hosted_distro/index.md +++ b/docs/source/getting_started/distributions/self_hosted_distro/index.md @@ -8,6 +8,10 @@ We offer deployable distributions where you can host your own Llama Stack server | Meta Reference Quantized | [llamastack/distribution-meta-reference-quantized-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-quantized-gpu/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/meta-reference-quantized-gpu.html) | meta-reference-quantized | meta-reference | meta-reference; remote::pgvector; remote::chromadb | meta-reference | meta-reference | | Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html) | remote::ollama | meta-reference | remote::pgvector; remote::chromadb | meta-reference | meta-reference | | TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/tgi.html) | remote::tgi | meta-reference | meta-reference; remote::pgvector; remote::chromadb | meta-reference | meta-reference | +| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/together.html) | remote::together | meta-reference | remote::weaviate | meta-reference | meta-reference | +| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/fireworks.html) | remote::fireworks | meta-reference | remote::weaviate | meta-reference | meta-reference | +| Bedrock | [llamastack/distribution-bedrock](https://hub.docker.com/repository/docker/llamastack/distribution-bedrock/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/bedrock.html) | remote::bedrock | meta-reference | remote::weaviate | meta-reference | meta-reference | + ```{toctree} :maxdepth: 1 @@ -17,4 +21,7 @@ meta-reference-quantized-gpu ollama tgi dell-tgi +together +fireworks +bedrock ``` diff --git a/docs/source/getting_started/distributions/remote_hosted_distro/together.md b/docs/source/getting_started/distributions/self_hosted_distro/together.md similarity index 100% rename from docs/source/getting_started/distributions/remote_hosted_distro/together.md rename to docs/source/getting_started/distributions/self_hosted_distro/together.md diff --git a/llama_stack/providers/tests/eval/fixtures.py b/llama_stack/providers/tests/eval/fixtures.py index 22181f3b2..810239440 100644 --- a/llama_stack/providers/tests/eval/fixtures.py +++ b/llama_stack/providers/tests/eval/fixtures.py @@ -52,11 +52,4 @@ async def eval_stack(request): provider_data, ) - return ( - impls[Api.eval], - impls[Api.eval_tasks], - impls[Api.scoring], - impls[Api.scoring_functions], - impls[Api.datasetio], - impls[Api.datasets], - ) + return impls diff --git a/llama_stack/providers/tests/eval/provider_config_example.yaml b/llama_stack/providers/tests/eval/provider_config_example.yaml deleted file mode 100644 index 38f7512f1..000000000 --- a/llama_stack/providers/tests/eval/provider_config_example.yaml +++ /dev/null @@ -1,22 +0,0 @@ -providers: - datasetio: - - provider_id: test-meta - provider_type: meta-reference - config: {} - scoring: - - provider_id: test-meta - provider_type: meta-reference - config: {} - eval: - - provider_id: test-meta - provider_type: meta-reference - config: {} - inference: - - provider_id: test-tgi - provider_type: remote::tgi - config: - url: http://127.0.0.1:5009 - - provider_id: test-tgi-2 - provider_type: remote::tgi - config: - url: http://127.0.0.1:5010 diff --git a/llama_stack/providers/tests/eval/test_eval.py b/llama_stack/providers/tests/eval/test_eval.py index 721421d37..88f577cd8 100644 --- a/llama_stack/providers/tests/eval/test_eval.py +++ b/llama_stack/providers/tests/eval/test_eval.py @@ -30,19 +30,23 @@ class Testeval: async def test_eval_tasks_list(self, eval_stack): # NOTE: this needs you to ensure that you are starting from a clean state # but so far we don't have an unregister API unfortunately, so be careful - _, eval_tasks_impl, _, _, _, _ = eval_stack + eval_tasks_impl = eval_stack[Api.eval_tasks] response = await eval_tasks_impl.list_eval_tasks() assert isinstance(response, list) @pytest.mark.asyncio async def test_eval_evaluate_rows(self, eval_stack): - eval_impl, eval_tasks_impl, _, _, datasetio_impl, datasets_impl = eval_stack + eval_impl, eval_tasks_impl, datasetio_impl, datasets_impl = ( + eval_stack[Api.eval], + eval_stack[Api.eval_tasks], + eval_stack[Api.datasetio], + eval_stack[Api.datasets], + ) await register_dataset( datasets_impl, for_generation=True, dataset_id="test_dataset_for_eval" ) - response = await datasets_impl.list_datasets() - assert len(response) >= 1 + assert len(response) == 1 rows = await datasetio_impl.get_rows_paginated( dataset_id="test_dataset_for_eval", rows_in_page=3, @@ -79,7 +83,11 @@ class Testeval: @pytest.mark.asyncio async def test_eval_run_eval(self, eval_stack): - eval_impl, eval_tasks_impl, _, _, datasetio_impl, datasets_impl = eval_stack + eval_impl, eval_tasks_impl, datasets_impl = ( + eval_stack[Api.eval], + eval_stack[Api.eval_tasks], + eval_stack[Api.datasets], + ) await register_dataset( datasets_impl, for_generation=True, dataset_id="test_dataset_for_eval" )