diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md new file mode 100644 index 000000000..84b85b91c --- /dev/null +++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md @@ -0,0 +1,125 @@ +--- +orphan: true +--- + +# Meta Reference GPU Distribution + +```{toctree} +:maxdepth: 2 +:hidden: + +self +``` + +The `llamastack/distribution-meta-reference-gpu` distribution consists of the following provider configurations: + +| API | Provider(s) | +|-----|-------------| +| agents | `inline::meta-reference` | +| datasetio | `remote::huggingface`, `inline::localfs` | +| eval | `inline::meta-reference` | +| inference | `inline::meta-reference` | +| safety | `inline::llama-guard` | +| scoring | `inline::basic`, `inline::llm-as-judge`, `inline::braintrust` | +| telemetry | `inline::meta-reference` | +| tool_runtime | `remote::brave-search`, `remote::tavily-search`, `inline::rag-runtime`, `remote::model-context-protocol` | +| vector_io | `inline::faiss`, `remote::chromadb`, `remote::pgvector` | + + +Note that you need access to nvidia GPUs to run this distribution. This distribution is not compatible with CPU-only machines or machines with AMD GPUs. + +### Environment Variables + +The following environment variables can be configured: + +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `8321`) +- `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`) +- `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`) +- `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`) +- `SAFETY_CHECKPOINT_DIR`: Directory containing the Llama-Guard model checkpoint (default: `null`) + + +## Prerequisite: Downloading Models + +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](../../references/llama_cli_reference/download_models.md) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. + +``` +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ +``` + +## Running the Distribution + +You can do this via venv or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +LLAMA_STACK_PORT=8321 +docker run \ + -it \ + --pull always \ + --gpu all \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + llamastack/distribution-meta-reference-gpu \ + --port $LLAMA_STACK_PORT \ + --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct +``` + +If you are using Llama Stack Safety / Shield APIs, use: + +```bash +docker run \ + -it \ + --pull always \ + --gpu all \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + llamastack/distribution-meta-reference-gpu \ + --port $LLAMA_STACK_PORT \ + --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ + --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B +``` + +### Via venv + +Make sure you have done `uv pip install llama-stack` and have the Llama Stack CLI available. + +```bash +llama stack build --distro meta-reference-gpu --image-type venv +llama stack run distributions/meta-reference-gpu/run.yaml \ + --port 8321 \ + --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct +``` + +If you are using Llama Stack Safety / Shield APIs, use: + +```bash +llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \ + --port 8321 \ + --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ + --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B +``` diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md new file mode 100644 index 000000000..fba411640 --- /dev/null +++ b/docs/docs/distributions/self_hosted_distro/nvidia.md @@ -0,0 +1,171 @@ +--- +orphan: true +--- + +# NVIDIA Distribution + +The `llamastack/distribution-nvidia` distribution consists of the following provider configurations. + +| API | Provider(s) | +|-----|-------------| +| agents | `inline::meta-reference` | +| datasetio | `inline::localfs`, `remote::nvidia` | +| eval | `remote::nvidia` | +| files | `inline::localfs` | +| inference | `remote::nvidia` | +| post_training | `remote::nvidia` | +| safety | `remote::nvidia` | +| scoring | `inline::basic` | +| telemetry | `inline::meta-reference` | +| tool_runtime | `inline::rag-runtime` | +| vector_io | `inline::faiss` | + + +### Environment Variables + +The following environment variables can be configured: + +- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``) +- `NVIDIA_APPEND_API_VERSION`: Whether to append the API version to the base_url (default: `True`) +- `NVIDIA_DATASET_NAMESPACE`: NVIDIA Dataset Namespace (default: `default`) +- `NVIDIA_PROJECT_ID`: NVIDIA Project ID (default: `test-project`) +- `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`) +- `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`) +- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`) +- `NVIDIA_GUARDRAILS_CONFIG_ID`: NVIDIA Guardrail Configuration ID (default: `self-check`) +- `NVIDIA_EVALUATOR_URL`: URL for the NeMo Evaluator Service (default: `http://0.0.0.0:7331`) +- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`) +- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`) + +### Models + +The following models are available by default: + +- `meta/llama3-8b-instruct ` +- `meta/llama3-70b-instruct ` +- `meta/llama-3.1-8b-instruct ` +- `meta/llama-3.1-70b-instruct ` +- `meta/llama-3.1-405b-instruct ` +- `meta/llama-3.2-1b-instruct ` +- `meta/llama-3.2-3b-instruct ` +- `meta/llama-3.2-11b-vision-instruct ` +- `meta/llama-3.2-90b-vision-instruct ` +- `meta/llama-3.3-70b-instruct ` +- `nvidia/vila ` +- `nvidia/llama-3.2-nv-embedqa-1b-v2 ` +- `nvidia/nv-embedqa-e5-v5 ` +- `nvidia/nv-embedqa-mistral-7b-v2 ` +- `snowflake/arctic-embed-l ` + + +## Prerequisites +### NVIDIA API Keys + +Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/). Use this key for the `NVIDIA_API_KEY` environment variable. + +### Deploy NeMo Microservices Platform +The NVIDIA NeMo microservices platform supports end-to-end microservice deployment of a complete AI flywheel on your Kubernetes cluster through the NeMo Microservices Helm Chart. Please reference the [NVIDIA NeMo Microservices documentation](https://docs.nvidia.com/nemo/microservices/latest/about/index.html) for platform prerequisites and instructions to install and deploy the platform. + +## Supported Services +Each Llama Stack API corresponds to a specific NeMo microservice. The core microservices (Customizer, Evaluator, Guardrails) are exposed by the same endpoint. The platform components (Data Store) are each exposed by separate endpoints. + +### Inference: NVIDIA NIM +NVIDIA NIM is used for running inference with registered models. There are two ways to access NVIDIA NIMs: + 1. Hosted (default): Preview APIs hosted at https://integrate.api.nvidia.com (Requires an API key) + 2. Self-hosted: NVIDIA NIMs that run on your own infrastructure. + +The deployed platform includes the NIM Proxy microservice, which is the service that provides to access your NIMs (for example, to run inference on a model). Set the `NVIDIA_BASE_URL` environment variable to use your NVIDIA NIM Proxy deployment. + +### Datasetio API: NeMo Data Store +The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint. + +See the [NVIDIA Datasetio docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage. + +### Eval API: NeMo Evaluator +The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint. + +See the [NVIDIA Eval docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage. + +### Post-Training API: NeMo Customizer +The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint. + +See the [NVIDIA Post-Training docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage. + +### Safety API: NeMo Guardrails +The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint. + +See the [NVIDIA Safety docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/safety/nvidia/README.md) for supported features and example usage. + +## Deploying models +In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`. + +Note: For improved inference speeds, we need to use NIM with `fast_outlines` guided decoding system (specified in the request body). This is the default if you deployed the platform with the NeMo Microservices Helm Chart. +```sh +# URL to NeMo NIM Proxy service +export NEMO_URL="http://nemo.test" + +curl --location "$NEMO_URL/v1/deployment/model-deployments" \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "name": "llama-3.2-1b-instruct", + "namespace": "meta", + "config": { + "model": "meta/llama-3.2-1b-instruct", + "nim_deployment": { + "image_name": "nvcr.io/nim/meta/llama-3.2-1b-instruct", + "image_tag": "1.8.3", + "pvc_size": "25Gi", + "gpu": 1, + "additional_envs": { + "NIM_GUIDED_DECODING_BACKEND": "fast_outlines" + } + } + } + }' +``` +This NIM deployment should take approximately 10 minutes to go live. [See the docs](https://docs.nvidia.com/nemo/microservices/latest/get-started/tutorials/deploy-nims.html) for more information on how to deploy a NIM and verify it's available for inference. + +You can also remove a deployed NIM to free up GPU resources, if needed. +```sh +export NEMO_URL="http://nemo.test" + +curl -X DELETE "$NEMO_URL/v1/deployment/model-deployments/meta/llama-3.1-8b-instruct" +``` + +## Running Llama Stack with NVIDIA + +You can do this via venv (build code), or Docker which has a pre-built image. + +### Via Docker + +This method allows you to get started quickly without having to build the distribution code. + +```bash +LLAMA_STACK_PORT=8321 +docker run \ + -it \ + --pull always \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ./run.yaml:/root/my-run.yaml \ + llamastack/distribution-nvidia \ + --config /root/my-run.yaml \ + --port $LLAMA_STACK_PORT \ + --env NVIDIA_API_KEY=$NVIDIA_API_KEY +``` + +### Via venv + +If you've set up your local development environment, you can also build the image using your local virtual environment. + +```bash +INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct +llama stack build --distro nvidia --image-type venv +llama stack run ./run.yaml \ + --port 8321 \ + --env NVIDIA_API_KEY=$NVIDIA_API_KEY \ + --env INFERENCE_MODEL=$INFERENCE_MODEL +``` + +## Example Notebooks +For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in [docs/notebooks/nvidia](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks/nvidia). diff --git a/docs/source/providers/agents/index.md b/docs/docs/providers/agents/index.mdx similarity index 50% rename from docs/source/providers/agents/index.md rename to docs/docs/providers/agents/index.mdx index a2c48d4b9..df020f284 100644 --- a/docs/source/providers/agents/index.md +++ b/docs/docs/providers/agents/index.mdx @@ -1,3 +1,16 @@ +--- +description: "Agents API for creating and interacting with agentic systems. + + Main functionalities provided by this API: + - Create agents with specific instructions and ability to use tools. + - Interactions with agents are grouped into sessions (\"threads\"), and each interaction is called a \"turn\". + - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details). + - Agents can be provided with various shields (see the Safety API for more details). + - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details." +sidebar_label: Agents +title: Agents +--- + # Agents ## Overview @@ -15,8 +28,4 @@ This section contains documentation for all available providers for the **agents ## Providers -```{toctree} -:maxdepth: 1 - -inline_meta-reference -``` +- [Meta-Reference](./inline_meta-reference) diff --git a/docs/source/providers/agents/inline_meta-reference.md b/docs/docs/providers/agents/inline_meta-reference.mdx similarity index 80% rename from docs/source/providers/agents/inline_meta-reference.md rename to docs/docs/providers/agents/inline_meta-reference.mdx index 5f64f79e1..fd961745f 100644 --- a/docs/source/providers/agents/inline_meta-reference.md +++ b/docs/docs/providers/agents/inline_meta-reference.mdx @@ -1,3 +1,9 @@ +--- +description: "Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks." +sidebar_label: Meta-Reference +title: inline::meta-reference +--- + # inline::meta-reference ## Description @@ -20,6 +26,4 @@ persistence_store: responses_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db - ``` - diff --git a/docs/source/providers/batches/index.md b/docs/docs/providers/batches/index.mdx similarity index 51% rename from docs/source/providers/batches/index.md rename to docs/docs/providers/batches/index.mdx index d6d2fa9a3..a966cc153 100644 --- a/docs/source/providers/batches/index.md +++ b/docs/docs/providers/batches/index.mdx @@ -1,3 +1,18 @@ +--- +description: "The Batches API enables efficient processing of multiple requests in a single operation, + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. + + The API is designed to allow use of openai client libraries for seamless integration. + + This API provides the following extensions: + - idempotent batch creation + + Note: This API is currently under active development and may undergo changes." +sidebar_label: Batches +title: Batches +--- + # Batches ## Overview @@ -17,8 +32,4 @@ This section contains documentation for all available providers for the **batche ## Providers -```{toctree} -:maxdepth: 1 - -inline_reference -``` +- [Reference](./inline_reference) diff --git a/docs/source/providers/batches/inline_reference.md b/docs/docs/providers/batches/inline_reference.mdx similarity index 86% rename from docs/source/providers/batches/inline_reference.md rename to docs/docs/providers/batches/inline_reference.mdx index a58e5124d..f43800555 100644 --- a/docs/source/providers/batches/inline_reference.md +++ b/docs/docs/providers/batches/inline_reference.mdx @@ -1,3 +1,9 @@ +--- +description: "Reference implementation of batches API with KVStore persistence." +sidebar_label: Reference +title: inline::reference +--- + # inline::reference ## Description @@ -18,6 +24,4 @@ Reference implementation of batches API with KVStore persistence. kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/batches.db - ``` - diff --git a/docs/docs/providers/datasetio/index.md b/docs/docs/providers/datasetio/index.md new file mode 100644 index 000000000..6c02acf6c --- /dev/null +++ b/docs/docs/providers/datasetio/index.md @@ -0,0 +1,16 @@ +--- +sidebar_label: Datasetio +title: Datasetio +--- + +# Datasetio + +## Overview + +This section contains documentation for all available providers for the **datasetio** API. + +## Providers + +- [Localfs](./inline_localfs) +- [Remote - Huggingface](./remote_huggingface) +- [Remote - Nvidia](./remote_nvidia) diff --git a/docs/docs/providers/datasetio/index.mdx b/docs/docs/providers/datasetio/index.mdx new file mode 100644 index 000000000..6c02acf6c --- /dev/null +++ b/docs/docs/providers/datasetio/index.mdx @@ -0,0 +1,16 @@ +--- +sidebar_label: Datasetio +title: Datasetio +--- + +# Datasetio + +## Overview + +This section contains documentation for all available providers for the **datasetio** API. + +## Providers + +- [Localfs](./inline_localfs) +- [Remote - Huggingface](./remote_huggingface) +- [Remote - Nvidia](./remote_nvidia) diff --git a/docs/source/providers/datasetio/inline_localfs.md b/docs/docs/providers/datasetio/inline_localfs.mdx similarity index 78% rename from docs/source/providers/datasetio/inline_localfs.md rename to docs/docs/providers/datasetio/inline_localfs.mdx index 87a0c795c..b02a3a3bd 100644 --- a/docs/source/providers/datasetio/inline_localfs.md +++ b/docs/docs/providers/datasetio/inline_localfs.mdx @@ -1,3 +1,9 @@ +--- +description: "Local filesystem-based dataset I/O provider for reading and writing datasets to local storage." +sidebar_label: Localfs +title: inline::localfs +--- + # inline::localfs ## Description @@ -16,6 +22,4 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db - ``` - diff --git a/docs/source/providers/datasetio/remote_huggingface.md b/docs/docs/providers/datasetio/remote_huggingface.mdx similarity index 77% rename from docs/source/providers/datasetio/remote_huggingface.md rename to docs/docs/providers/datasetio/remote_huggingface.mdx index 3711f7396..82597d999 100644 --- a/docs/source/providers/datasetio/remote_huggingface.md +++ b/docs/docs/providers/datasetio/remote_huggingface.mdx @@ -1,3 +1,9 @@ +--- +description: "HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub." +sidebar_label: Remote - Huggingface +title: remote::huggingface +--- + # remote::huggingface ## Description @@ -16,6 +22,4 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db - ``` - diff --git a/docs/source/providers/datasetio/remote_nvidia.md b/docs/docs/providers/datasetio/remote_nvidia.mdx similarity index 83% rename from docs/source/providers/datasetio/remote_nvidia.md rename to docs/docs/providers/datasetio/remote_nvidia.mdx index 1ad1cdb32..35a7dacee 100644 --- a/docs/source/providers/datasetio/remote_nvidia.md +++ b/docs/docs/providers/datasetio/remote_nvidia.mdx @@ -1,3 +1,9 @@ +--- +description: "NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform." +sidebar_label: Remote - Nvidia +title: remote::nvidia +--- + # remote::nvidia ## Description @@ -20,6 +26,4 @@ api_key: ${env.NVIDIA_API_KEY:=} dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} project_id: ${env.NVIDIA_PROJECT_ID:=test-project} datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} - ``` - diff --git a/docs/docs/providers/eval/index.mdx b/docs/docs/providers/eval/index.mdx new file mode 100644 index 000000000..e486814de --- /dev/null +++ b/docs/docs/providers/eval/index.mdx @@ -0,0 +1,18 @@ +--- +description: "Llama Stack Evaluation API for running evaluations on model and agent candidates." +sidebar_label: Eval +title: Eval +--- + +# Eval + +## Overview + +Llama Stack Evaluation API for running evaluations on model and agent candidates. + +This section contains documentation for all available providers for the **eval** API. + +## Providers + +- [Meta-Reference](./inline_meta-reference) +- [Remote - Nvidia](./remote_nvidia) diff --git a/docs/source/providers/eval/inline_meta-reference.md b/docs/docs/providers/eval/inline_meta-reference.mdx similarity index 76% rename from docs/source/providers/eval/inline_meta-reference.md rename to docs/docs/providers/eval/inline_meta-reference.mdx index 606883c72..b0eb589e0 100644 --- a/docs/source/providers/eval/inline_meta-reference.md +++ b/docs/docs/providers/eval/inline_meta-reference.mdx @@ -1,3 +1,9 @@ +--- +description: "Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics." +sidebar_label: Meta-Reference +title: inline::meta-reference +--- + # inline::meta-reference ## Description @@ -16,6 +22,4 @@ Meta's reference implementation of evaluation tasks with support for multiple la kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db - ``` - diff --git a/docs/source/providers/eval/remote_nvidia.md b/docs/docs/providers/eval/remote_nvidia.mdx similarity index 74% rename from docs/source/providers/eval/remote_nvidia.md rename to docs/docs/providers/eval/remote_nvidia.mdx index cb764b511..36bb4726b 100644 --- a/docs/source/providers/eval/remote_nvidia.md +++ b/docs/docs/providers/eval/remote_nvidia.mdx @@ -1,3 +1,9 @@ +--- +description: "NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform." +sidebar_label: Remote - Nvidia +title: remote::nvidia +--- + # remote::nvidia ## Description @@ -14,6 +20,4 @@ NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform. ```yaml evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} - ``` - diff --git a/docs/source/providers/external/external-providers-guide.md b/docs/docs/providers/external/external-providers-guide.mdx similarity index 99% rename from docs/source/providers/external/external-providers-guide.md rename to docs/docs/providers/external/external-providers-guide.mdx index e2d4ebea9..eb30afd93 100644 --- a/docs/source/providers/external/external-providers-guide.md +++ b/docs/docs/providers/external/external-providers-guide.mdx @@ -283,4 +283,4 @@ additional_pip_packages: No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc. -The provider will now be available in Llama Stack with the type `remote::ramalama`. \ No newline at end of file +The provider will now be available in Llama Stack with the type `remote::ramalama`. diff --git a/docs/source/providers/external/external-providers-list.md b/docs/docs/providers/external/external-providers-list.mdx similarity index 100% rename from docs/source/providers/external/external-providers-list.md rename to docs/docs/providers/external/external-providers-list.mdx diff --git a/docs/source/providers/external/index.md b/docs/docs/providers/external/index.mdx similarity index 68% rename from docs/source/providers/external/index.md rename to docs/docs/providers/external/index.mdx index 989a7f5b8..375a97c82 100644 --- a/docs/source/providers/external/index.md +++ b/docs/docs/providers/external/index.mdx @@ -5,9 +5,7 @@ Llama Stack supports external providers that live outside of the main codebase. - Share providers with others without contributing to the main codebase - Keep provider-specific code separate from the core Llama Stack code -```{toctree} -:maxdepth: 1 +## External Provider Documentation -external-providers-list -external-providers-guide -``` \ No newline at end of file +- [Known External Providers](external-providers-list) +- [Creating External Providers](external-providers-guide) diff --git a/docs/source/providers/files/index.md b/docs/docs/providers/files/index.mdx similarity index 55% rename from docs/source/providers/files/index.md rename to docs/docs/providers/files/index.mdx index 128953223..98bd89633 100644 --- a/docs/source/providers/files/index.md +++ b/docs/docs/providers/files/index.mdx @@ -1,3 +1,8 @@ +--- +sidebar_label: Files +title: Files +--- + # Files ## Overview @@ -6,9 +11,5 @@ This section contains documentation for all available providers for the **files* ## Providers -```{toctree} -:maxdepth: 1 - -inline_localfs -remote_s3 -``` +- [Localfs](./inline_localfs) +- [Remote - S3](./remote_s3) diff --git a/docs/source/providers/files/inline_localfs.md b/docs/docs/providers/files/inline_localfs.mdx similarity index 82% rename from docs/source/providers/files/inline_localfs.md rename to docs/docs/providers/files/inline_localfs.mdx index 09267b7d8..86d141f93 100644 --- a/docs/source/providers/files/inline_localfs.md +++ b/docs/docs/providers/files/inline_localfs.mdx @@ -1,3 +1,9 @@ +--- +description: "Local filesystem-based file storage provider for managing files and documents locally." +sidebar_label: Localfs +title: inline::localfs +--- + # inline::localfs ## Description @@ -19,6 +25,4 @@ storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db - ``` - diff --git a/docs/source/providers/files/remote_s3.md b/docs/docs/providers/files/remote_s3.mdx similarity index 89% rename from docs/source/providers/files/remote_s3.md rename to docs/docs/providers/files/remote_s3.mdx index 2e3cebabd..353cedbfb 100644 --- a/docs/source/providers/files/remote_s3.md +++ b/docs/docs/providers/files/remote_s3.mdx @@ -1,3 +1,9 @@ +--- +description: "AWS S3-based file storage provider for scalable cloud file management with metadata persistence." +sidebar_label: Remote - S3 +title: remote::s3 +--- + # remote::s3 ## Description @@ -28,6 +34,4 @@ auto_create_bucket: ${env.S3_AUTO_CREATE_BUCKET:=false} metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/s3_files_metadata.db - ``` - diff --git a/docs/source/providers/index.md b/docs/docs/providers/index.mdx similarity index 57% rename from docs/source/providers/index.md rename to docs/docs/providers/index.mdx index 3f66ecd0c..d275ac1a3 100644 --- a/docs/source/providers/index.md +++ b/docs/docs/providers/index.mdx @@ -1,3 +1,10 @@ +--- +title: API Providers +description: Ecosystem of providers for swapping implementations across the same API +sidebar_label: Overview +sidebar_position: 1 +--- + # API Providers The goal of Llama Stack is to build an ecosystem where users can easily swap out different implementations for the same API. Examples for these include: @@ -12,17 +19,15 @@ Providers come in two flavors: Importantly, Llama Stack always strives to provide at least one fully inline provider for each API so you can iterate on a fully featured environment locally. -```{toctree} -:maxdepth: 1 +## Provider Categories -external/index -openai -inference/index -agents/index -datasetio/index -safety/index -telemetry/index -vector_io/index -tool_runtime/index -files/index -``` +- **[External Providers](./external/)** - Guide for building and using external providers +- **[OpenAI Compatibility](./openai)** - OpenAI API compatibility layer +- **[Inference](./inference/)** - LLM and embedding model providers +- **[Agents](./agents/)** - Agentic system providers +- **[DatasetIO](./datasetio/)** - Dataset and data loader providers +- **[Safety](./safety/)** - Content moderation and safety providers +- **[Telemetry](./telemetry/)** - Monitoring and observability providers +- **[Vector IO](./vector-io/)** - Vector database providers +- **[Tool Runtime](./tool-runtime/)** - Tool and protocol providers +- **[Files](./files/)** - File system and storage providers diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx new file mode 100644 index 000000000..a9365c5f2 --- /dev/null +++ b/docs/docs/providers/inference/index.mdx @@ -0,0 +1,48 @@ +--- +description: "Llama Stack Inference API for generating completions, chat completions, and embeddings. + + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search." +sidebar_label: Inference +title: Inference +--- + +# Inference + +## Overview + +Llama Stack Inference API for generating completions, chat completions, and embeddings. + + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + +This section contains documentation for all available providers for the **inference** API. + +## Providers + +- [Meta-Reference](./inline_meta-reference) +- [Sentence-Transformers](./inline_sentence-transformers) +- [Remote - Anthropic](./remote_anthropic) +- [Remote - Azure](./remote_azure) +- [Remote - Bedrock](./remote_bedrock) +- [Remote - Cerebras](./remote_cerebras) +- [Remote - Databricks](./remote_databricks) +- [Remote - Fireworks](./remote_fireworks) +- [Remote - Gemini](./remote_gemini) +- [Remote - Groq](./remote_groq) +- [Remote - Hf - Endpoint](./remote_hf_endpoint) +- [Remote - Hf - Serverless](./remote_hf_serverless) +- [Remote - Llama-Openai-Compat](./remote_llama-openai-compat) +- [Remote - Nvidia](./remote_nvidia) +- [Remote - Ollama](./remote_ollama) +- [Remote - Openai](./remote_openai) +- [Remote - Passthrough](./remote_passthrough) +- [Remote - Runpod](./remote_runpod) +- [Remote - Sambanova](./remote_sambanova) +- [Remote - Tgi](./remote_tgi) +- [Remote - Together](./remote_together) +- [Remote - Vertexai](./remote_vertexai) +- [Remote - Vllm](./remote_vllm) +- [Remote - Watsonx](./remote_watsonx) diff --git a/docs/source/providers/inference/inline_meta-reference.md b/docs/docs/providers/inference/inline_meta-reference.mdx similarity index 84% rename from docs/source/providers/inference/inline_meta-reference.md rename to docs/docs/providers/inference/inline_meta-reference.mdx index eca12a839..328586f9a 100644 --- a/docs/source/providers/inference/inline_meta-reference.md +++ b/docs/docs/providers/inference/inline_meta-reference.mdx @@ -1,3 +1,9 @@ +--- +description: "Meta's reference implementation of inference with support for various model formats and optimization techniques." +sidebar_label: Meta-Reference +title: inline::meta-reference +--- + # inline::meta-reference ## Description @@ -27,6 +33,4 @@ quantization: model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} max_batch_size: ${env.MAX_BATCH_SIZE:=1} max_seq_len: ${env.MAX_SEQ_LEN:=4096} - ``` - diff --git a/docs/docs/providers/inference/inline_sentence-transformers.mdx b/docs/docs/providers/inference/inline_sentence-transformers.mdx new file mode 100644 index 000000000..0e207bbdb --- /dev/null +++ b/docs/docs/providers/inference/inline_sentence-transformers.mdx @@ -0,0 +1,17 @@ +--- +description: "Sentence Transformers inference provider for text embeddings and similarity search." +sidebar_label: Sentence-Transformers +title: inline::sentence-transformers +--- + +# inline::sentence-transformers + +## Description + +Sentence Transformers inference provider for text embeddings and similarity search. + +## Sample Configuration + +```yaml +{} +``` diff --git a/docs/source/providers/inference/remote_anthropic.md b/docs/docs/providers/inference/remote_anthropic.mdx similarity index 69% rename from docs/source/providers/inference/remote_anthropic.md rename to docs/docs/providers/inference/remote_anthropic.mdx index 4680608b1..6bd636c92 100644 --- a/docs/source/providers/inference/remote_anthropic.md +++ b/docs/docs/providers/inference/remote_anthropic.mdx @@ -1,3 +1,9 @@ +--- +description: "Anthropic inference provider for accessing Claude models and Anthropic's AI services." +sidebar_label: Remote - Anthropic +title: remote::anthropic +--- + # remote::anthropic ## Description @@ -14,6 +20,4 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv ```yaml api_key: ${env.ANTHROPIC_API_KEY:=} - ``` - diff --git a/docs/source/providers/inference/remote_azure.md b/docs/docs/providers/inference/remote_azure.mdx similarity index 78% rename from docs/source/providers/inference/remote_azure.md rename to docs/docs/providers/inference/remote_azure.mdx index 19f8f418b..0eb0ea755 100644 --- a/docs/source/providers/inference/remote_azure.md +++ b/docs/docs/providers/inference/remote_azure.mdx @@ -1,3 +1,12 @@ +--- +description: | + Azure OpenAI inference provider for accessing GPT models and other Azure services. + Provider documentation + https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview +sidebar_label: Remote - Azure +title: remote::azure +--- + # remote::azure ## Description @@ -24,6 +33,4 @@ api_key: ${env.AZURE_API_KEY:=} api_base: ${env.AZURE_API_BASE:=} api_version: ${env.AZURE_API_VERSION:=} api_type: ${env.AZURE_API_TYPE:=} - ``` - diff --git a/docs/source/providers/inference/remote_bedrock.md b/docs/docs/providers/inference/remote_bedrock.mdx similarity index 91% rename from docs/source/providers/inference/remote_bedrock.md rename to docs/docs/providers/inference/remote_bedrock.mdx index 216dd4adb..04c2154a9 100644 --- a/docs/source/providers/inference/remote_bedrock.md +++ b/docs/docs/providers/inference/remote_bedrock.mdx @@ -1,3 +1,9 @@ +--- +description: "AWS Bedrock inference provider for accessing various AI models through AWS's managed service." +sidebar_label: Remote - Bedrock +title: remote::bedrock +--- + # remote::bedrock ## Description @@ -23,6 +29,4 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man ```yaml {} - ``` - diff --git a/docs/source/providers/inference/remote_cerebras.md b/docs/docs/providers/inference/remote_cerebras.mdx similarity index 76% rename from docs/source/providers/inference/remote_cerebras.md rename to docs/docs/providers/inference/remote_cerebras.mdx index 3bd3dda25..d9cc93aef 100644 --- a/docs/source/providers/inference/remote_cerebras.md +++ b/docs/docs/providers/inference/remote_cerebras.mdx @@ -1,3 +1,9 @@ +--- +description: "Cerebras inference provider for running models on Cerebras Cloud platform." +sidebar_label: Remote - Cerebras +title: remote::cerebras +--- + # remote::cerebras ## Description @@ -16,6 +22,4 @@ Cerebras inference provider for running models on Cerebras Cloud platform. ```yaml base_url: https://api.cerebras.ai api_key: ${env.CEREBRAS_API_KEY:=} - ``` - diff --git a/docs/source/providers/inference/remote_databricks.md b/docs/docs/providers/inference/remote_databricks.mdx similarity index 75% rename from docs/source/providers/inference/remote_databricks.md rename to docs/docs/providers/inference/remote_databricks.mdx index 3b418f7d4..7f736db9d 100644 --- a/docs/source/providers/inference/remote_databricks.md +++ b/docs/docs/providers/inference/remote_databricks.mdx @@ -1,3 +1,9 @@ +--- +description: "Databricks inference provider for running models on Databricks' unified analytics platform." +sidebar_label: Remote - Databricks +title: remote::databricks +--- + # remote::databricks ## Description @@ -16,6 +22,4 @@ Databricks inference provider for running models on Databricks' unified analytic ```yaml url: ${env.DATABRICKS_HOST:=} api_token: ${env.DATABRICKS_TOKEN:=} - ``` - diff --git a/docs/source/providers/inference/remote_fireworks.md b/docs/docs/providers/inference/remote_fireworks.mdx similarity index 80% rename from docs/source/providers/inference/remote_fireworks.md rename to docs/docs/providers/inference/remote_fireworks.mdx index 28dbf1d3f..d2c3a664e 100644 --- a/docs/source/providers/inference/remote_fireworks.md +++ b/docs/docs/providers/inference/remote_fireworks.mdx @@ -1,3 +1,9 @@ +--- +description: "Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform." +sidebar_label: Remote - Fireworks +title: remote::fireworks +--- + # remote::fireworks ## Description @@ -17,6 +23,4 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire ```yaml url: https://api.fireworks.ai/inference/v1 api_key: ${env.FIREWORKS_API_KEY:=} - ``` - diff --git a/docs/source/providers/inference/remote_gemini.md b/docs/docs/providers/inference/remote_gemini.mdx similarity index 70% rename from docs/source/providers/inference/remote_gemini.md rename to docs/docs/providers/inference/remote_gemini.mdx index 14b3223f2..0505c69da 100644 --- a/docs/source/providers/inference/remote_gemini.md +++ b/docs/docs/providers/inference/remote_gemini.mdx @@ -1,3 +1,9 @@ +--- +description: "Google Gemini inference provider for accessing Gemini models and Google's AI services." +sidebar_label: Remote - Gemini +title: remote::gemini +--- + # remote::gemini ## Description @@ -14,6 +20,4 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser ```yaml api_key: ${env.GEMINI_API_KEY:=} - ``` - diff --git a/docs/source/providers/inference/remote_groq.md b/docs/docs/providers/inference/remote_groq.mdx similarity index 76% rename from docs/source/providers/inference/remote_groq.md rename to docs/docs/providers/inference/remote_groq.mdx index 68bd4d5b3..1797035c1 100644 --- a/docs/source/providers/inference/remote_groq.md +++ b/docs/docs/providers/inference/remote_groq.mdx @@ -1,3 +1,9 @@ +--- +description: "Groq inference provider for ultra-fast inference using Groq's LPU technology." +sidebar_label: Remote - Groq +title: remote::groq +--- + # remote::groq ## Description @@ -16,6 +22,4 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology. ```yaml url: https://api.groq.com api_key: ${env.GROQ_API_KEY:=} - ``` - diff --git a/docs/source/providers/inference/remote_hf_endpoint.md b/docs/docs/providers/inference/remote_hf_endpoint.mdx similarity index 59% rename from docs/source/providers/inference/remote_hf_endpoint.md rename to docs/docs/providers/inference/remote_hf_endpoint.mdx index 8aaf13476..771b24f8d 100644 --- a/docs/source/providers/inference/remote_hf_endpoint.md +++ b/docs/docs/providers/inference/remote_hf_endpoint.mdx @@ -1,3 +1,9 @@ +--- +description: "HuggingFace Inference Endpoints provider for dedicated model serving." +sidebar_label: Remote - Hf - Endpoint +title: remote::hf::endpoint +--- + # remote::hf::endpoint ## Description @@ -8,7 +14,7 @@ HuggingFace Inference Endpoints provider for dedicated model serving. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `endpoint_name` | `` | No | | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | +| `endpoint_name` | `` | No | | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | | `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | ## Sample Configuration @@ -16,6 +22,4 @@ HuggingFace Inference Endpoints provider for dedicated model serving. ```yaml endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} api_token: ${env.HF_API_TOKEN} - ``` - diff --git a/docs/source/providers/inference/remote_hf_serverless.md b/docs/docs/providers/inference/remote_hf_serverless.mdx similarity index 79% rename from docs/source/providers/inference/remote_hf_serverless.md rename to docs/docs/providers/inference/remote_hf_serverless.mdx index 6764590b8..1a89b8e3e 100644 --- a/docs/source/providers/inference/remote_hf_serverless.md +++ b/docs/docs/providers/inference/remote_hf_serverless.mdx @@ -1,3 +1,9 @@ +--- +description: "HuggingFace Inference API serverless provider for on-demand model inference." +sidebar_label: Remote - Hf - Serverless +title: remote::hf::serverless +--- + # remote::hf::serverless ## Description @@ -16,6 +22,4 @@ HuggingFace Inference API serverless provider for on-demand model inference. ```yaml huggingface_repo: ${env.INFERENCE_MODEL} api_token: ${env.HF_API_TOKEN} - ``` - diff --git a/docs/source/providers/inference/remote_llama-openai-compat.md b/docs/docs/providers/inference/remote_llama-openai-compat.mdx similarity index 75% rename from docs/source/providers/inference/remote_llama-openai-compat.md rename to docs/docs/providers/inference/remote_llama-openai-compat.mdx index 5c97aebc3..cb624ad87 100644 --- a/docs/source/providers/inference/remote_llama-openai-compat.md +++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx @@ -1,3 +1,9 @@ +--- +description: "Llama OpenAI-compatible provider for using Llama models with OpenAI API format." +sidebar_label: Remote - Llama-Openai-Compat +title: remote::llama-openai-compat +--- + # remote::llama-openai-compat ## Description @@ -16,6 +22,4 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format. ```yaml openai_compat_api_base: https://api.llama.com/compat/v1/ api_key: ${env.LLAMA_API_KEY} - ``` - diff --git a/docs/source/providers/inference/remote_nvidia.md b/docs/docs/providers/inference/remote_nvidia.mdx similarity index 85% rename from docs/source/providers/inference/remote_nvidia.md rename to docs/docs/providers/inference/remote_nvidia.mdx index 1b12839df..4a8be5d03 100644 --- a/docs/source/providers/inference/remote_nvidia.md +++ b/docs/docs/providers/inference/remote_nvidia.mdx @@ -1,3 +1,9 @@ +--- +description: "NVIDIA inference provider for accessing NVIDIA NIM models and AI services." +sidebar_label: Remote - Nvidia +title: remote::nvidia +--- + # remote::nvidia ## Description @@ -19,6 +25,4 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services. url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} api_key: ${env.NVIDIA_API_KEY:=} append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} - ``` - diff --git a/docs/source/providers/inference/remote_ollama.md b/docs/docs/providers/inference/remote_ollama.mdx similarity index 75% rename from docs/source/providers/inference/remote_ollama.md rename to docs/docs/providers/inference/remote_ollama.mdx index f9f0a7622..5d9a4ad6c 100644 --- a/docs/source/providers/inference/remote_ollama.md +++ b/docs/docs/providers/inference/remote_ollama.mdx @@ -1,3 +1,9 @@ +--- +description: "Ollama inference provider for running local models through the Ollama runtime." +sidebar_label: Remote - Ollama +title: remote::ollama +--- + # remote::ollama ## Description @@ -15,6 +21,4 @@ Ollama inference provider for running local models through the Ollama runtime. ```yaml url: ${env.OLLAMA_URL:=http://localhost:11434} - ``` - diff --git a/docs/source/providers/inference/remote_openai.md b/docs/docs/providers/inference/remote_openai.mdx similarity index 77% rename from docs/source/providers/inference/remote_openai.md rename to docs/docs/providers/inference/remote_openai.mdx index 18a74caea..56ca94233 100644 --- a/docs/source/providers/inference/remote_openai.md +++ b/docs/docs/providers/inference/remote_openai.mdx @@ -1,3 +1,9 @@ +--- +description: "OpenAI inference provider for accessing GPT models and other OpenAI services." +sidebar_label: Remote - Openai +title: remote::openai +--- + # remote::openai ## Description @@ -16,6 +22,4 @@ OpenAI inference provider for accessing GPT models and other OpenAI services. ```yaml api_key: ${env.OPENAI_API_KEY:=} base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1} - ``` - diff --git a/docs/source/providers/inference/remote_passthrough.md b/docs/docs/providers/inference/remote_passthrough.mdx similarity index 74% rename from docs/source/providers/inference/remote_passthrough.md rename to docs/docs/providers/inference/remote_passthrough.mdx index 9005e5339..972cc2a08 100644 --- a/docs/source/providers/inference/remote_passthrough.md +++ b/docs/docs/providers/inference/remote_passthrough.mdx @@ -1,3 +1,9 @@ +--- +description: "Passthrough inference provider for connecting to any external inference service not directly supported." +sidebar_label: Remote - Passthrough +title: remote::passthrough +--- + # remote::passthrough ## Description @@ -16,6 +22,4 @@ Passthrough inference provider for connecting to any external inference service ```yaml url: ${env.PASSTHROUGH_URL} api_key: ${env.PASSTHROUGH_API_KEY} - ``` - diff --git a/docs/source/providers/inference/remote_runpod.md b/docs/docs/providers/inference/remote_runpod.mdx similarity index 75% rename from docs/source/providers/inference/remote_runpod.md rename to docs/docs/providers/inference/remote_runpod.mdx index ff1c0bcb6..2e8847dc5 100644 --- a/docs/source/providers/inference/remote_runpod.md +++ b/docs/docs/providers/inference/remote_runpod.mdx @@ -1,3 +1,9 @@ +--- +description: "RunPod inference provider for running models on RunPod's cloud GPU platform." +sidebar_label: Remote - Runpod +title: remote::runpod +--- + # remote::runpod ## Description @@ -16,6 +22,4 @@ RunPod inference provider for running models on RunPod's cloud GPU platform. ```yaml url: ${env.RUNPOD_URL:=} api_token: ${env.RUNPOD_API_TOKEN} - ``` - diff --git a/docs/source/providers/inference/remote_sambanova-openai-compat.md b/docs/docs/providers/inference/remote_sambanova-openai-compat.mdx similarity index 99% rename from docs/source/providers/inference/remote_sambanova-openai-compat.md rename to docs/docs/providers/inference/remote_sambanova-openai-compat.mdx index 3074a5885..9b4716d7e 100644 --- a/docs/source/providers/inference/remote_sambanova-openai-compat.md +++ b/docs/docs/providers/inference/remote_sambanova-openai-compat.mdx @@ -18,4 +18,3 @@ openai_compat_api_base: https://api.sambanova.ai/v1 api_key: ${env.SAMBANOVA_API_KEY:=} ``` - diff --git a/docs/source/providers/inference/remote_sambanova.md b/docs/docs/providers/inference/remote_sambanova.mdx similarity index 76% rename from docs/source/providers/inference/remote_sambanova.md rename to docs/docs/providers/inference/remote_sambanova.mdx index 9d15c97d5..6ee28b400 100644 --- a/docs/source/providers/inference/remote_sambanova.md +++ b/docs/docs/providers/inference/remote_sambanova.mdx @@ -1,3 +1,9 @@ +--- +description: "SambaNova inference provider for running models on SambaNova's dataflow architecture." +sidebar_label: Remote - Sambanova +title: remote::sambanova +--- + # remote::sambanova ## Description @@ -16,6 +22,4 @@ SambaNova inference provider for running models on SambaNova's dataflow architec ```yaml url: https://api.sambanova.ai/v1 api_key: ${env.SAMBANOVA_API_KEY:=} - ``` - diff --git a/docs/source/providers/inference/remote_tgi.md b/docs/docs/providers/inference/remote_tgi.mdx similarity index 71% rename from docs/source/providers/inference/remote_tgi.md rename to docs/docs/providers/inference/remote_tgi.mdx index 104bb4aab..3a348056f 100644 --- a/docs/source/providers/inference/remote_tgi.md +++ b/docs/docs/providers/inference/remote_tgi.mdx @@ -1,3 +1,9 @@ +--- +description: "Text Generation Inference (TGI) provider for HuggingFace model serving." +sidebar_label: Remote - Tgi +title: remote::tgi +--- + # remote::tgi ## Description @@ -14,6 +20,4 @@ Text Generation Inference (TGI) provider for HuggingFace model serving. ```yaml url: ${env.TGI_URL:=} - ``` - diff --git a/docs/source/providers/inference/remote_together.md b/docs/docs/providers/inference/remote_together.mdx similarity index 80% rename from docs/source/providers/inference/remote_together.md rename to docs/docs/providers/inference/remote_together.mdx index be764e635..da232a45b 100644 --- a/docs/source/providers/inference/remote_together.md +++ b/docs/docs/providers/inference/remote_together.mdx @@ -1,3 +1,9 @@ +--- +description: "Together AI inference provider for open-source models and collaborative AI development." +sidebar_label: Remote - Together +title: remote::together +--- + # remote::together ## Description @@ -17,6 +23,4 @@ Together AI inference provider for open-source models and collaborative AI devel ```yaml url: https://api.together.xyz/v1 api_key: ${env.TOGETHER_API_KEY:=} - ``` - diff --git a/docs/source/providers/inference/remote_vertexai.md b/docs/docs/providers/inference/remote_vertexai.mdx similarity index 56% rename from docs/source/providers/inference/remote_vertexai.md rename to docs/docs/providers/inference/remote_vertexai.mdx index 962bbd76f..13a910d43 100644 --- a/docs/source/providers/inference/remote_vertexai.md +++ b/docs/docs/providers/inference/remote_vertexai.mdx @@ -1,3 +1,29 @@ +--- +description: | + Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages: + + • Enterprise-grade security: Uses Google Cloud's security controls and IAM + • Better integration: Seamless integration with other Google Cloud services + • Advanced features: Access to additional Vertex AI features like model tuning and monitoring + • Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys + + Configuration: + - Set VERTEX_AI_PROJECT environment variable (required) + - Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1) + - Use Google Cloud Application Default Credentials or service account key + + Authentication Setup: + Option 1 (Recommended): gcloud auth application-default login + Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path + + Available Models: + - vertex_ai/gemini-2.0-flash + - vertex_ai/gemini-2.5-flash + - vertex_ai/gemini-2.5-pro +sidebar_label: Remote - Vertexai +title: remote::vertexai +--- + # remote::vertexai ## Description @@ -35,6 +61,4 @@ Available Models: ```yaml project: ${env.VERTEX_AI_PROJECT:=} location: ${env.VERTEX_AI_LOCATION:=us-central1} - ``` - diff --git a/docs/source/providers/inference/remote_vllm.md b/docs/docs/providers/inference/remote_vllm.mdx similarity index 86% rename from docs/source/providers/inference/remote_vllm.md rename to docs/docs/providers/inference/remote_vllm.mdx index 172d35873..77b8e1355 100644 --- a/docs/source/providers/inference/remote_vllm.md +++ b/docs/docs/providers/inference/remote_vllm.mdx @@ -1,3 +1,9 @@ +--- +description: "Remote vLLM inference provider for connecting to vLLM servers." +sidebar_label: Remote - Vllm +title: remote::vllm +--- + # remote::vllm ## Description @@ -21,6 +27,4 @@ url: ${env.VLLM_URL:=} max_tokens: ${env.VLLM_MAX_TOKENS:=4096} api_token: ${env.VLLM_API_TOKEN:=fake} tls_verify: ${env.VLLM_TLS_VERIFY:=true} - ``` - diff --git a/docs/source/providers/inference/remote_watsonx.md b/docs/docs/providers/inference/remote_watsonx.mdx similarity index 82% rename from docs/source/providers/inference/remote_watsonx.md rename to docs/docs/providers/inference/remote_watsonx.mdx index e885a07fc..1ceccc3ed 100644 --- a/docs/source/providers/inference/remote_watsonx.md +++ b/docs/docs/providers/inference/remote_watsonx.mdx @@ -1,3 +1,9 @@ +--- +description: "IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform." +sidebar_label: Remote - Watsonx +title: remote::watsonx +--- + # remote::watsonx ## Description @@ -19,6 +25,4 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} api_key: ${env.WATSONX_API_KEY:=} project_id: ${env.WATSONX_PROJECT_ID:=} - ``` - diff --git a/docs/source/providers/openai.md b/docs/docs/providers/openai.mdx similarity index 90% rename from docs/source/providers/openai.md rename to docs/docs/providers/openai.mdx index 44a615456..1a907152f 100644 --- a/docs/source/providers/openai.md +++ b/docs/docs/providers/openai.mdx @@ -42,9 +42,7 @@ models = client.models.list() #### Responses -:::{note} -The Responses API implementation is still in active development. While it is quite usable, there are still unimplemented parts of the API. We'd love feedback on any use-cases you try that do not work to help prioritize the pieces left to implement. Please open issues in the [meta-llama/llama-stack](https://github.com/meta-llama/llama-stack) GitHub repository with details of anything that does not work. -::: +> **Note:** The Responses API implementation is still in active development. While it is quite usable, there are still unimplemented parts of the API. We'd love feedback on any use-cases you try that do not work to help prioritize the pieces left to implement. Please open issues in the [meta-llama/llama-stack](https://github.com/meta-llama/llama-stack) GitHub repository with details of anything that does not work. ##### Simple inference diff --git a/docs/docs/providers/post_training/index.mdx b/docs/docs/providers/post_training/index.mdx new file mode 100644 index 000000000..6faab7a05 --- /dev/null +++ b/docs/docs/providers/post_training/index.mdx @@ -0,0 +1,17 @@ +--- +sidebar_label: Post Training +title: Post_Training +--- + +# Post_Training + +## Overview + +This section contains documentation for all available providers for the **post_training** API. + +## Providers + +- [Huggingface-Gpu](./inline_huggingface-gpu) +- [Torchtune-Cpu](./inline_torchtune-cpu) +- [Torchtune-Gpu](./inline_torchtune-gpu) +- [Remote - Nvidia](./remote_nvidia) diff --git a/docs/source/providers/post_training/inline_huggingface-cpu.md b/docs/docs/providers/post_training/inline_huggingface-cpu.mdx similarity index 99% rename from docs/source/providers/post_training/inline_huggingface-cpu.md rename to docs/docs/providers/post_training/inline_huggingface-cpu.mdx index e663fe8f8..9b5f2a091 100644 --- a/docs/source/providers/post_training/inline_huggingface-cpu.md +++ b/docs/docs/providers/post_training/inline_huggingface-cpu.mdx @@ -38,4 +38,3 @@ device: cpu dpo_output_dir: ~/.llama/dummy/dpo_output ``` - diff --git a/docs/source/providers/post_training/inline_huggingface-gpu.md b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx similarity index 74% rename from docs/source/providers/post_training/inline_huggingface-gpu.md rename to docs/docs/providers/post_training/inline_huggingface-gpu.mdx index 21bf965fe..530ac5c4d 100644 --- a/docs/source/providers/post_training/inline_huggingface-gpu.md +++ b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx @@ -1,3 +1,9 @@ +--- +description: "HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem." +sidebar_label: Huggingface-Gpu +title: inline::huggingface-gpu +--- + # inline::huggingface-gpu ## Description @@ -11,11 +17,8 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin | `device` | `` | No | cuda | | | `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No | | | | `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface | | -| `chat_template` | `` | No | <|user|> -{input} -<|assistant|> -{output} | | -| `model_specific_config` | `` | No | {'trust_remote_code': True, 'attn_implementation': 'sdpa'} | | +| `chat_template` | `` | No | <|user|><br/>{input}<br/><|assistant|><br/>{output} | | +| `model_specific_config` | `` | No | {'trust_remote_code': True, 'attn_implementation': 'sdpa'} | | | `max_seq_length` | `` | No | 2048 | | | `gradient_checkpointing` | `` | No | False | | | `save_total_limit` | `` | No | 3 | | @@ -36,6 +39,4 @@ checkpoint_format: huggingface distributed_backend: null device: cpu dpo_output_dir: ~/.llama/dummy/dpo_output - ``` - diff --git a/docs/source/providers/post_training/inline_huggingface.md b/docs/docs/providers/post_training/inline_huggingface.mdx similarity index 99% rename from docs/source/providers/post_training/inline_huggingface.md rename to docs/docs/providers/post_training/inline_huggingface.mdx index 8b10fe79c..37c113c6f 100644 --- a/docs/source/providers/post_training/inline_huggingface.md +++ b/docs/docs/providers/post_training/inline_huggingface.mdx @@ -38,4 +38,3 @@ device: cpu dpo_output_dir: ~/.llama/dummy/dpo_output ``` - diff --git a/docs/source/providers/post_training/inline_torchtune-cpu.md b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx similarity index 70% rename from docs/source/providers/post_training/inline_torchtune-cpu.md rename to docs/docs/providers/post_training/inline_torchtune-cpu.mdx index 7204e56e8..f789392fc 100644 --- a/docs/source/providers/post_training/inline_torchtune-cpu.md +++ b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx @@ -1,3 +1,9 @@ +--- +description: "TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework." +sidebar_label: Torchtune-Cpu +title: inline::torchtune-cpu +--- + # inline::torchtune-cpu ## Description @@ -15,6 +21,4 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi ```yaml checkpoint_format: meta - ``` - diff --git a/docs/source/providers/post_training/inline_torchtune-gpu.md b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx similarity index 70% rename from docs/source/providers/post_training/inline_torchtune-gpu.md rename to docs/docs/providers/post_training/inline_torchtune-gpu.mdx index 98b94f6f6..bd87797af 100644 --- a/docs/source/providers/post_training/inline_torchtune-gpu.md +++ b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx @@ -1,3 +1,9 @@ +--- +description: "TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework." +sidebar_label: Torchtune-Gpu +title: inline::torchtune-gpu +--- + # inline::torchtune-gpu ## Description @@ -15,6 +21,4 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi ```yaml checkpoint_format: meta - ``` - diff --git a/docs/source/providers/post_training/inline_torchtune.md b/docs/docs/providers/post_training/inline_torchtune.md similarity index 100% rename from docs/source/providers/post_training/inline_torchtune.md rename to docs/docs/providers/post_training/inline_torchtune.md diff --git a/docs/source/providers/post_training/remote_nvidia.md b/docs/docs/providers/post_training/remote_nvidia.mdx similarity index 87% rename from docs/source/providers/post_training/remote_nvidia.md rename to docs/docs/providers/post_training/remote_nvidia.mdx index 9a381d872..448ac4c75 100644 --- a/docs/source/providers/post_training/remote_nvidia.md +++ b/docs/docs/providers/post_training/remote_nvidia.mdx @@ -1,3 +1,9 @@ +--- +description: "NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform." +sidebar_label: Remote - Nvidia +title: remote::nvidia +--- + # remote::nvidia ## Description @@ -23,6 +29,4 @@ api_key: ${env.NVIDIA_API_KEY:=} dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} project_id: ${env.NVIDIA_PROJECT_ID:=test-project} customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} - ``` - diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx new file mode 100644 index 000000000..591b92b8d --- /dev/null +++ b/docs/docs/providers/safety/index.mdx @@ -0,0 +1,19 @@ +--- +sidebar_label: Safety +title: Safety +--- + +# Safety + +## Overview + +This section contains documentation for all available providers for the **safety** API. + +## Providers + +- [Code-Scanner](./inline_code-scanner) +- [Llama-Guard](./inline_llama-guard) +- [Prompt-Guard](./inline_prompt-guard) +- [Remote - Bedrock](./remote_bedrock) +- [Remote - Nvidia](./remote_nvidia) +- [Remote - Sambanova](./remote_sambanova) diff --git a/docs/source/providers/safety/inline_code-scanner.md b/docs/docs/providers/safety/inline_code-scanner.mdx similarity index 50% rename from docs/source/providers/safety/inline_code-scanner.md rename to docs/docs/providers/safety/inline_code-scanner.mdx index 3a3e90b3d..3fc3c38a4 100644 --- a/docs/source/providers/safety/inline_code-scanner.md +++ b/docs/docs/providers/safety/inline_code-scanner.mdx @@ -1,3 +1,9 @@ +--- +description: "Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns." +sidebar_label: Code-Scanner +title: inline::code-scanner +--- + # inline::code-scanner ## Description @@ -8,6 +14,4 @@ Code Scanner safety provider for detecting security vulnerabilities and unsafe c ```yaml {} - ``` - diff --git a/docs/source/providers/safety/inline_llama-guard.md b/docs/docs/providers/safety/inline_llama-guard.mdx similarity index 67% rename from docs/source/providers/safety/inline_llama-guard.md rename to docs/docs/providers/safety/inline_llama-guard.mdx index 4f57898ec..65866c9b2 100644 --- a/docs/source/providers/safety/inline_llama-guard.md +++ b/docs/docs/providers/safety/inline_llama-guard.mdx @@ -1,3 +1,9 @@ +--- +description: "Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model." +sidebar_label: Llama-Guard +title: inline::llama-guard +--- + # inline::llama-guard ## Description @@ -14,6 +20,4 @@ Llama Guard safety provider for content moderation and safety filtering using Me ```yaml excluded_categories: [] - ``` - diff --git a/docs/source/providers/safety/inline_prompt-guard.md b/docs/docs/providers/safety/inline_prompt-guard.mdx similarity index 68% rename from docs/source/providers/safety/inline_prompt-guard.md rename to docs/docs/providers/safety/inline_prompt-guard.mdx index 10a6b8d3f..c52e03e4b 100644 --- a/docs/source/providers/safety/inline_prompt-guard.md +++ b/docs/docs/providers/safety/inline_prompt-guard.mdx @@ -1,3 +1,9 @@ +--- +description: "Prompt Guard safety provider for detecting and filtering unsafe prompts and content." +sidebar_label: Prompt-Guard +title: inline::prompt-guard +--- + # inline::prompt-guard ## Description @@ -14,6 +20,4 @@ Prompt Guard safety provider for detecting and filtering unsafe prompts and cont ```yaml guard_type: injection - ``` - diff --git a/docs/source/providers/safety/remote_bedrock.md b/docs/docs/providers/safety/remote_bedrock.mdx similarity index 92% rename from docs/source/providers/safety/remote_bedrock.md rename to docs/docs/providers/safety/remote_bedrock.mdx index 99d77dd72..5461d7cdc 100644 --- a/docs/source/providers/safety/remote_bedrock.md +++ b/docs/docs/providers/safety/remote_bedrock.mdx @@ -1,3 +1,9 @@ +--- +description: "AWS Bedrock safety provider for content moderation using AWS's safety services." +sidebar_label: Remote - Bedrock +title: remote::bedrock +--- + # remote::bedrock ## Description @@ -23,6 +29,4 @@ AWS Bedrock safety provider for content moderation using AWS's safety services. ```yaml {} - ``` - diff --git a/docs/source/providers/safety/remote_nvidia.md b/docs/docs/providers/safety/remote_nvidia.mdx similarity index 81% rename from docs/source/providers/safety/remote_nvidia.md rename to docs/docs/providers/safety/remote_nvidia.mdx index 40ae744a4..0f665e60a 100644 --- a/docs/source/providers/safety/remote_nvidia.md +++ b/docs/docs/providers/safety/remote_nvidia.mdx @@ -1,3 +1,9 @@ +--- +description: "NVIDIA's safety provider for content moderation and safety filtering." +sidebar_label: Remote - Nvidia +title: remote::nvidia +--- + # remote::nvidia ## Description @@ -16,6 +22,4 @@ NVIDIA's safety provider for content moderation and safety filtering. ```yaml guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} - ``` - diff --git a/docs/source/providers/safety/remote_sambanova.md b/docs/docs/providers/safety/remote_sambanova.mdx similarity index 77% rename from docs/source/providers/safety/remote_sambanova.md rename to docs/docs/providers/safety/remote_sambanova.mdx index 7e608f1b7..da70fce6c 100644 --- a/docs/source/providers/safety/remote_sambanova.md +++ b/docs/docs/providers/safety/remote_sambanova.mdx @@ -1,3 +1,9 @@ +--- +description: "SambaNova's safety provider for content moderation and safety filtering." +sidebar_label: Remote - Sambanova +title: remote::sambanova +--- + # remote::sambanova ## Description @@ -16,6 +22,4 @@ SambaNova's safety provider for content moderation and safety filtering. ```yaml url: https://api.sambanova.ai/v1 api_key: ${env.SAMBANOVA_API_KEY:=} - ``` - diff --git a/docs/docs/providers/scoring/index.mdx b/docs/docs/providers/scoring/index.mdx new file mode 100644 index 000000000..cb9bc539b --- /dev/null +++ b/docs/docs/providers/scoring/index.mdx @@ -0,0 +1,16 @@ +--- +sidebar_label: Scoring +title: Scoring +--- + +# Scoring + +## Overview + +This section contains documentation for all available providers for the **scoring** API. + +## Providers + +- [Basic](./inline_basic) +- [Braintrust](./inline_braintrust) +- [Llm-As-Judge](./inline_llm-as-judge) diff --git a/docs/source/providers/scoring/inline_basic.md b/docs/docs/providers/scoring/inline_basic.mdx similarity index 51% rename from docs/source/providers/scoring/inline_basic.md rename to docs/docs/providers/scoring/inline_basic.mdx index e9e50cff4..cbafbc40c 100644 --- a/docs/source/providers/scoring/inline_basic.md +++ b/docs/docs/providers/scoring/inline_basic.mdx @@ -1,3 +1,9 @@ +--- +description: "Basic scoring provider for simple evaluation metrics and scoring functions." +sidebar_label: Basic +title: inline::basic +--- + # inline::basic ## Description @@ -8,6 +14,4 @@ Basic scoring provider for simple evaluation metrics and scoring functions. ```yaml {} - ``` - diff --git a/docs/source/providers/scoring/inline_braintrust.md b/docs/docs/providers/scoring/inline_braintrust.mdx similarity index 70% rename from docs/source/providers/scoring/inline_braintrust.md rename to docs/docs/providers/scoring/inline_braintrust.mdx index 70a6a1e26..d12f9de25 100644 --- a/docs/source/providers/scoring/inline_braintrust.md +++ b/docs/docs/providers/scoring/inline_braintrust.mdx @@ -1,3 +1,9 @@ +--- +description: "Braintrust scoring provider for evaluation and scoring using the Braintrust platform." +sidebar_label: Braintrust +title: inline::braintrust +--- + # inline::braintrust ## Description @@ -14,6 +20,4 @@ Braintrust scoring provider for evaluation and scoring using the Braintrust plat ```yaml openai_api_key: ${env.OPENAI_API_KEY:=} - ``` - diff --git a/docs/source/providers/scoring/inline_llm-as-judge.md b/docs/docs/providers/scoring/inline_llm-as-judge.mdx similarity index 50% rename from docs/source/providers/scoring/inline_llm-as-judge.md rename to docs/docs/providers/scoring/inline_llm-as-judge.mdx index 971e02897..22f326623 100644 --- a/docs/source/providers/scoring/inline_llm-as-judge.md +++ b/docs/docs/providers/scoring/inline_llm-as-judge.mdx @@ -1,3 +1,9 @@ +--- +description: "LLM-as-judge scoring provider that uses language models to evaluate and score responses." +sidebar_label: Llm-As-Judge +title: inline::llm-as-judge +--- + # inline::llm-as-judge ## Description @@ -8,6 +14,4 @@ LLM-as-judge scoring provider that uses language models to evaluate and score re ```yaml {} - ``` - diff --git a/docs/source/providers/telemetry/index.md b/docs/docs/providers/telemetry/index.mdx similarity index 58% rename from docs/source/providers/telemetry/index.md rename to docs/docs/providers/telemetry/index.mdx index c7fbfed73..a13481e13 100644 --- a/docs/source/providers/telemetry/index.md +++ b/docs/docs/providers/telemetry/index.mdx @@ -1,3 +1,8 @@ +--- +sidebar_label: Telemetry +title: Telemetry +--- + # Telemetry ## Overview @@ -6,8 +11,4 @@ This section contains documentation for all available providers for the **teleme ## Providers -```{toctree} -:maxdepth: 1 - -inline_meta-reference -``` +- [Meta-Reference](./inline_meta-reference) diff --git a/docs/source/providers/telemetry/inline_meta-reference.md b/docs/docs/providers/telemetry/inline_meta-reference.mdx similarity index 73% rename from docs/source/providers/telemetry/inline_meta-reference.md rename to docs/docs/providers/telemetry/inline_meta-reference.mdx index 3e5f4b842..13fab87f3 100644 --- a/docs/source/providers/telemetry/inline_meta-reference.md +++ b/docs/docs/providers/telemetry/inline_meta-reference.mdx @@ -1,3 +1,9 @@ +--- +description: "Meta's reference implementation of telemetry and observability using OpenTelemetry." +sidebar_label: Meta-Reference +title: inline::meta-reference +--- + # inline::meta-reference ## Description @@ -10,7 +16,7 @@ Meta's reference implementation of telemetry and observability using OpenTelemet |-------|------|----------|---------|-------------| | `otel_exporter_otlp_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL (base URL for traces, metrics, and logs). If not set, the SDK will use OTEL_EXPORTER_OTLP_ENDPOINT environment variable. | | `service_name` | `` | No | ​ | The service name to use for telemetry | -| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [, ] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) | +| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [<TelemetrySink.CONSOLE: 'console'>, <TelemetrySink.SQLITE: 'sqlite'>] | List of telemetry sinks to enable (possible values: otel_trace, otel_metric, sqlite, console) | | `sqlite_db_path` | `` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces | ## Sample Configuration @@ -20,6 +26,4 @@ service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" sinks: ${env.TELEMETRY_SINKS:=console,sqlite} sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db otel_exporter_otlp_endpoint: ${env.OTEL_EXPORTER_OTLP_ENDPOINT:=} - ``` - diff --git a/docs/docs/providers/tool_runtime/index.mdx b/docs/docs/providers/tool_runtime/index.mdx new file mode 100644 index 000000000..051a8c436 --- /dev/null +++ b/docs/docs/providers/tool_runtime/index.mdx @@ -0,0 +1,19 @@ +--- +sidebar_label: Tool Runtime +title: Tool_Runtime +--- + +# Tool_Runtime + +## Overview + +This section contains documentation for all available providers for the **tool_runtime** API. + +## Providers + +- [Rag-Runtime](./inline_rag-runtime) +- [Remote - Bing-Search](./remote_bing-search) +- [Remote - Brave-Search](./remote_brave-search) +- [Remote - Model-Context-Protocol](./remote_model-context-protocol) +- [Remote - Tavily-Search](./remote_tavily-search) +- [Remote - Wolfram-Alpha](./remote_wolfram-alpha) diff --git a/docs/source/providers/tool_runtime/inline_rag-runtime.md b/docs/docs/providers/tool_runtime/inline_rag-runtime.mdx similarity index 50% rename from docs/source/providers/tool_runtime/inline_rag-runtime.md rename to docs/docs/providers/tool_runtime/inline_rag-runtime.mdx index 784b4fdad..97428c2e3 100644 --- a/docs/source/providers/tool_runtime/inline_rag-runtime.md +++ b/docs/docs/providers/tool_runtime/inline_rag-runtime.mdx @@ -1,3 +1,9 @@ +--- +description: "RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search." +sidebar_label: Rag-Runtime +title: inline::rag-runtime +--- + # inline::rag-runtime ## Description @@ -8,6 +14,4 @@ RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunki ```yaml {} - ``` - diff --git a/docs/source/providers/tool_runtime/remote_bing-search.md b/docs/docs/providers/tool_runtime/remote_bing-search.mdx similarity index 70% rename from docs/source/providers/tool_runtime/remote_bing-search.md rename to docs/docs/providers/tool_runtime/remote_bing-search.mdx index 0d5df7679..ec06bc20f 100644 --- a/docs/source/providers/tool_runtime/remote_bing-search.md +++ b/docs/docs/providers/tool_runtime/remote_bing-search.mdx @@ -1,3 +1,9 @@ +--- +description: "Bing Search tool for web search capabilities using Microsoft's search engine." +sidebar_label: Remote - Bing-Search +title: remote::bing-search +--- + # remote::bing-search ## Description @@ -15,6 +21,4 @@ Bing Search tool for web search capabilities using Microsoft's search engine. ```yaml api_key: ${env.BING_API_KEY:} - ``` - diff --git a/docs/source/providers/tool_runtime/remote_brave-search.md b/docs/docs/providers/tool_runtime/remote_brave-search.mdx similarity index 74% rename from docs/source/providers/tool_runtime/remote_brave-search.md rename to docs/docs/providers/tool_runtime/remote_brave-search.mdx index 26bc4010d..3aeed67d5 100644 --- a/docs/source/providers/tool_runtime/remote_brave-search.md +++ b/docs/docs/providers/tool_runtime/remote_brave-search.mdx @@ -1,3 +1,9 @@ +--- +description: "Brave Search tool for web search capabilities with privacy-focused results." +sidebar_label: Remote - Brave-Search +title: remote::brave-search +--- + # remote::brave-search ## Description @@ -16,6 +22,4 @@ Brave Search tool for web search capabilities with privacy-focused results. ```yaml api_key: ${env.BRAVE_SEARCH_API_KEY:=} max_results: 3 - ``` - diff --git a/docs/docs/providers/tool_runtime/remote_model-context-protocol.mdx b/docs/docs/providers/tool_runtime/remote_model-context-protocol.mdx new file mode 100644 index 000000000..869ca275a --- /dev/null +++ b/docs/docs/providers/tool_runtime/remote_model-context-protocol.mdx @@ -0,0 +1,17 @@ +--- +description: "Model Context Protocol (MCP) tool for standardized tool calling and context management." +sidebar_label: Remote - Model-Context-Protocol +title: remote::model-context-protocol +--- + +# remote::model-context-protocol + +## Description + +Model Context Protocol (MCP) tool for standardized tool calling and context management. + +## Sample Configuration + +```yaml +{} +``` diff --git a/docs/source/providers/tool_runtime/remote_tavily-search.md b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx similarity index 74% rename from docs/source/providers/tool_runtime/remote_tavily-search.md rename to docs/docs/providers/tool_runtime/remote_tavily-search.mdx index 3dc31534d..fdca31bbe 100644 --- a/docs/source/providers/tool_runtime/remote_tavily-search.md +++ b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx @@ -1,3 +1,9 @@ +--- +description: "Tavily Search tool for AI-optimized web search with structured results." +sidebar_label: Remote - Tavily-Search +title: remote::tavily-search +--- + # remote::tavily-search ## Description @@ -16,6 +22,4 @@ Tavily Search tool for AI-optimized web search with structured results. ```yaml api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 - ``` - diff --git a/docs/source/providers/tool_runtime/remote_wolfram-alpha.md b/docs/docs/providers/tool_runtime/remote_wolfram-alpha.mdx similarity index 68% rename from docs/source/providers/tool_runtime/remote_wolfram-alpha.md rename to docs/docs/providers/tool_runtime/remote_wolfram-alpha.mdx index 325c189fd..96bc41789 100644 --- a/docs/source/providers/tool_runtime/remote_wolfram-alpha.md +++ b/docs/docs/providers/tool_runtime/remote_wolfram-alpha.mdx @@ -1,3 +1,9 @@ +--- +description: "Wolfram Alpha tool for computational knowledge and mathematical calculations." +sidebar_label: Remote - Wolfram-Alpha +title: remote::wolfram-alpha +--- + # remote::wolfram-alpha ## Description @@ -14,6 +20,4 @@ Wolfram Alpha tool for computational knowledge and mathematical calculations. ```yaml api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} - ``` - diff --git a/docs/docs/providers/vector_io/index.mdx b/docs/docs/providers/vector_io/index.mdx new file mode 100644 index 000000000..58dca2d13 --- /dev/null +++ b/docs/docs/providers/vector_io/index.mdx @@ -0,0 +1,25 @@ +--- +sidebar_label: Vector Io +title: Vector_Io +--- + +# Vector_Io + +## Overview + +This section contains documentation for all available providers for the **vector_io** API. + +## Providers + +- [Chromadb](./inline_chromadb) +- [Faiss](./inline_faiss) +- [Meta-Reference](./inline_meta-reference) +- [Milvus](./inline_milvus) +- [Qdrant](./inline_qdrant) +- [Sqlite-Vec](./inline_sqlite-vec) +- [Sqlite Vec](./inline_sqlite_vec) +- [Remote - Chromadb](./remote_chromadb) +- [Remote - Milvus](./remote_milvus) +- [Remote - Pgvector](./remote_pgvector) +- [Remote - Qdrant](./remote_qdrant) +- [Remote - Weaviate](./remote_weaviate) diff --git a/docs/source/providers/vector_io/inline_chromadb.md b/docs/docs/providers/vector_io/inline_chromadb.mdx similarity index 60% rename from docs/source/providers/vector_io/inline_chromadb.md rename to docs/docs/providers/vector_io/inline_chromadb.mdx index 518e3f689..a1858eacc 100644 --- a/docs/source/providers/vector_io/inline_chromadb.md +++ b/docs/docs/providers/vector_io/inline_chromadb.mdx @@ -1,3 +1,40 @@ +--- +description: | + [Chroma](https://www.trychroma.com/) is an inline and remote vector + database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. + That means you're not limited to storing vectors in memory or in a separate service. + + ## Features + Chroma supports: + - Store embeddings and their metadata + - Vector search + - Full-text search + - Document storage + - Metadata filtering + - Multi-modal retrieval + + ## Usage + + To use Chrome in your Llama Stack project, follow these steps: + + 1. Install the necessary dependencies. + 2. Configure your Llama Stack project to use chroma. + 3. Start storing and querying vectors. + + ## Installation + + You can install chroma using pip: + + ```bash + pip install chromadb + ``` + + ## Documentation + See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. +sidebar_label: Chromadb +title: inline::chromadb +--- + # inline::chromadb ## Description @@ -51,6 +88,4 @@ db_path: ${env.CHROMADB_PATH} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_inline_registry.db - ``` - diff --git a/docs/source/providers/vector_io/inline_faiss.md b/docs/docs/providers/vector_io/inline_faiss.mdx similarity index 55% rename from docs/source/providers/vector_io/inline_faiss.md rename to docs/docs/providers/vector_io/inline_faiss.mdx index cfa18a839..03bc2a928 100644 --- a/docs/source/providers/vector_io/inline_faiss.md +++ b/docs/docs/providers/vector_io/inline_faiss.mdx @@ -1,3 +1,49 @@ +--- +description: | + [Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It + allows you to store and query vectors directly in memory. + That means you'll get fast and efficient vector retrieval. + + ## Features + + - Lightweight and easy to use + - Fully integrated with Llama Stack + - GPU support + - **Vector search** - FAISS supports pure vector similarity search using embeddings + + ## Search Modes + + **Supported:** + - **Vector Search** (`mode="vector"`): Performs vector similarity search using embeddings + + **Not Supported:** + - **Keyword Search** (`mode="keyword"`): Not supported by FAISS + - **Hybrid Search** (`mode="hybrid"`): Not supported by FAISS + + > **Note**: FAISS is designed as a pure vector similarity search library. See the [FAISS GitHub repository](https://github.com/facebookresearch/faiss) for more details about FAISS's core functionality. + + ## Usage + + To use Faiss in your Llama Stack project, follow these steps: + + 1. Install the necessary dependencies. + 2. Configure your Llama Stack project to use Faiss. + 3. Start storing and querying vectors. + + ## Installation + + You can install Faiss using pip: + + ```bash + pip install faiss-cpu + ``` + ## Documentation + See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for + more details about Faiss in general. +sidebar_label: Faiss +title: inline::faiss +--- + # inline::faiss ## Description @@ -57,6 +103,4 @@ more details about Faiss in general. kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db - ``` - diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/docs/providers/vector_io/inline_meta-reference.mdx similarity index 80% rename from docs/source/providers/vector_io/inline_meta-reference.md rename to docs/docs/providers/vector_io/inline_meta-reference.mdx index 6f269c441..bcad86750 100644 --- a/docs/source/providers/vector_io/inline_meta-reference.md +++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx @@ -1,3 +1,9 @@ +--- +description: "Meta's reference implementation of a vector database." +sidebar_label: Meta-Reference +title: inline::meta-reference +--- + # inline::meta-reference ## Description @@ -16,12 +22,9 @@ Meta's reference implementation of a vector database. kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db - ``` - ## Deprecation Notice -```{warning} +:::warning Please use the `inline::faiss` provider instead. -``` - +::: diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/docs/providers/vector_io/inline_milvus.mdx similarity index 87% rename from docs/source/providers/vector_io/inline_milvus.md rename to docs/docs/providers/vector_io/inline_milvus.mdx index 33ea4d179..7e6f15c81 100644 --- a/docs/source/providers/vector_io/inline_milvus.md +++ b/docs/docs/providers/vector_io/inline_milvus.mdx @@ -1,3 +1,9 @@ +--- +description: "Please refer to the remote provider documentation." +sidebar_label: Milvus +title: inline::milvus +--- + # inline::milvus ## Description @@ -21,6 +27,4 @@ db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy}/milvus.db kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_registry.db - ``` - diff --git a/docs/source/providers/vector_io/inline_qdrant.md b/docs/docs/providers/vector_io/inline_qdrant.mdx similarity index 56% rename from docs/source/providers/vector_io/inline_qdrant.md rename to docs/docs/providers/vector_io/inline_qdrant.mdx index b5072d220..5c9ab10f2 100644 --- a/docs/source/providers/vector_io/inline_qdrant.md +++ b/docs/docs/providers/vector_io/inline_qdrant.mdx @@ -1,3 +1,50 @@ +--- +description: | + [Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It + allows you to store and query vectors directly in memory. + That means you'll get fast and efficient vector retrieval. + + > By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in + > memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative. + > + > \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\] + + + + ## Features + + - Lightweight and easy to use + - Fully integrated with Llama Stack + - Apache 2.0 license terms + - Store embeddings and their metadata + - Supports search by + [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/) + and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search + - [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/) + - [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/) + - [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/) + + ## Usage + + To use Qdrant in your Llama Stack project, follow these steps: + + 1. Install the necessary dependencies. + 2. Configure your Llama Stack project to use Qdrant. + 3. Start storing and querying vectors. + + ## Installation + + You can install Qdrant using docker: + + ```bash + docker pull qdrant/qdrant + ``` + ## Documentation + See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general. +sidebar_label: Qdrant +title: inline::qdrant +--- + # inline::qdrant ## Description @@ -60,6 +107,4 @@ path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db - ``` - diff --git a/docs/source/providers/vector_io/inline_sqlite-vec.md b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx similarity index 51% rename from docs/source/providers/vector_io/inline_sqlite-vec.md rename to docs/docs/providers/vector_io/inline_sqlite-vec.mdx index 854bb9d08..aa6992a56 100644 --- a/docs/source/providers/vector_io/inline_sqlite-vec.md +++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx @@ -1,3 +1,205 @@ +--- +description: | + [SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It + allows you to store and query vectors directly within an SQLite database. + That means you're not limited to storing vectors in memory or in a separate service. + + ## Features + + - Lightweight and easy to use + - Fully integrated with Llama Stacks + - Uses disk-based storage for persistence, allowing for larger vector storage + + ### Comparison to Faiss + + The choice between Faiss and sqlite-vec should be made based on the needs of your application, + as they have different strengths. + + #### Choosing the Right Provider + + Scenario | Recommended Tool | Reason + -- |-----------------| -- + Online Analytical Processing (OLAP) | Faiss | Fast, in-memory searches + Online Transaction Processing (OLTP) | sqlite-vec | Frequent writes and reads + Frequent writes | sqlite-vec | Efficient disk-based storage and incremental indexing + Large datasets | sqlite-vec | Disk-based storage for larger vector storage + Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration + + #### Empirical Example + + Consider the histogram below in which 10,000 randomly generated strings were inserted + in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. + + ```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png + :alt: Comparison of SQLite-Vec and Faiss write times + :width: 400px + ``` + + You will notice that the average write time for `sqlite-vec` was 788ms, compared to + 47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather + uniformly spread across the [1500, 100000] interval. + + Looking at each individual write in the order that the documents are inserted you'll see the increase in + write speed as Faiss reindexes the vectors after each write. + ```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png + :alt: Comparison of SQLite-Vec and Faiss write times + :width: 400px + ``` + + In comparison, the read times for Faiss was on average 10% faster than sqlite-vec. + The modes of the two distributions highlight the differences much further where Faiss + will likely yield faster read performance. + + ```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png + :alt: Comparison of SQLite-Vec and Faiss read times + :width: 400px + ``` + + ## Usage + + To use sqlite-vec in your Llama Stack project, follow these steps: + + 1. Install the necessary dependencies. + 2. Configure your Llama Stack project to use SQLite-Vec. + 3. Start storing and querying vectors. + + The SQLite-vec provider supports three search modes: + + 1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings. + 2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5. + 3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates. + + Example with hybrid search: + ```python + response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7}, + ) + + # Using RRF ranker + response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={ + "mode": "hybrid", + "max_chunks": 3, + "score_threshold": 0.7, + "ranker": {"type": "rrf", "impact_factor": 60.0}, + }, + ) + + # Using weighted ranker + response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={ + "mode": "hybrid", + "max_chunks": 3, + "score_threshold": 0.7, + "ranker": {"type": "weighted", "alpha": 0.7}, # 70% vector, 30% keyword + }, + ) + ``` + + Example with explicit vector search: + ```python + response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7}, + ) + ``` + + Example with keyword search: + ```python + response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7}, + ) + ``` + + ## Supported Search Modes + + The SQLite vector store supports three search modes: + + 1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks + 2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks + 3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker + + ### Hybrid Search + + Hybrid search combines the strengths of both vector and keyword search by: + - Computing vector similarity scores + - Computing keyword match scores + - Using a ranker to combine these scores + + Two ranker types are supported: + + 1. **RRF (Reciprocal Rank Fusion)**: + - Combines ranks from both vector and keyword results + - Uses an impact factor (default: 60.0) to control the weight of higher-ranked results + - Good for balancing between vector and keyword results + - The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks + + 2. **Weighted**: + - Linearly combines normalized vector and keyword scores + - Uses an alpha parameter (0-1) to control the blend: + - alpha=0: Only use keyword scores + - alpha=1: Only use vector scores + - alpha=0.5: Equal weight to both (default) + + Example using RAGQueryConfig with different search modes: + + ```python + from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker + + # Vector search + config = RAGQueryConfig(mode="vector", max_chunks=5) + + # Keyword search + config = RAGQueryConfig(mode="keyword", max_chunks=5) + + # Hybrid search with custom RRF ranker + config = RAGQueryConfig( + mode="hybrid", + max_chunks=5, + ranker=RRFRanker(impact_factor=50.0), # Custom impact factor + ) + + # Hybrid search with weighted ranker + config = RAGQueryConfig( + mode="hybrid", + max_chunks=5, + ranker=WeightedRanker(alpha=0.7), # 70% vector, 30% keyword + ) + + # Hybrid search with default RRF ranker + config = RAGQueryConfig( + mode="hybrid", max_chunks=5 + ) # Will use RRF with impact_factor=60.0 + ``` + + Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored. + + ## Installation + + You can install SQLite-Vec using pip: + + ```bash + pip install sqlite-vec + ``` + + ## Documentation + + See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general. + + [^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759). +sidebar_label: Sqlite-Vec +title: inline::sqlite-vec +--- + # inline::sqlite-vec ## Description @@ -215,6 +417,4 @@ db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db - ``` - diff --git a/docs/source/providers/vector_io/inline_sqlite_vec.md b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx similarity index 85% rename from docs/source/providers/vector_io/inline_sqlite_vec.md rename to docs/docs/providers/vector_io/inline_sqlite_vec.mdx index 9e5654a50..7f69f617d 100644 --- a/docs/source/providers/vector_io/inline_sqlite_vec.md +++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx @@ -1,3 +1,9 @@ +--- +description: "Please refer to the sqlite-vec provider documentation." +sidebar_label: Sqlite Vec +title: inline::sqlite_vec +--- + # inline::sqlite_vec ## Description @@ -20,12 +26,9 @@ db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec_registry.db - ``` - ## Deprecation Notice -```{warning} +:::warning Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead. -``` - +::: diff --git a/docs/source/providers/vector_io/remote_chromadb.md b/docs/docs/providers/vector_io/remote_chromadb.mdx similarity index 59% rename from docs/source/providers/vector_io/remote_chromadb.md rename to docs/docs/providers/vector_io/remote_chromadb.mdx index badfebe90..807771003 100644 --- a/docs/source/providers/vector_io/remote_chromadb.md +++ b/docs/docs/providers/vector_io/remote_chromadb.mdx @@ -1,3 +1,40 @@ +--- +description: | + [Chroma](https://www.trychroma.com/) is an inline and remote vector + database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. + That means you're not limited to storing vectors in memory or in a separate service. + + ## Features + Chroma supports: + - Store embeddings and their metadata + - Vector search + - Full-text search + - Document storage + - Metadata filtering + - Multi-modal retrieval + + ## Usage + + To use Chrome in your Llama Stack project, follow these steps: + + 1. Install the necessary dependencies. + 2. Configure your Llama Stack project to use chroma. + 3. Start storing and querying vectors. + + ## Installation + + You can install chroma using pip: + + ```bash + pip install chromadb + ``` + + ## Documentation + See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. +sidebar_label: Remote - Chromadb +title: remote::chromadb +--- + # remote::chromadb ## Description @@ -50,6 +87,4 @@ url: ${env.CHROMADB_URL} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/chroma_remote_registry.db - ``` - diff --git a/docs/source/providers/vector_io/remote_milvus.md b/docs/docs/providers/vector_io/remote_milvus.mdx similarity index 50% rename from docs/source/providers/vector_io/remote_milvus.md rename to docs/docs/providers/vector_io/remote_milvus.mdx index 8974ada10..ae796837d 100644 --- a/docs/source/providers/vector_io/remote_milvus.md +++ b/docs/docs/providers/vector_io/remote_milvus.mdx @@ -1,3 +1,204 @@ +--- +description: | + [Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It + allows you to store and query vectors directly within a Milvus database. + That means you're not limited to storing vectors in memory or in a separate service. + + ## Features + + - Easy to use + - Fully integrated with Llama Stack + - Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations) + + ## Usage + + To use Milvus in your Llama Stack project, follow these steps: + + 1. Install the necessary dependencies. + 2. Configure your Llama Stack project to use Milvus. + 3. Start storing and querying vectors. + + ## Installation + + If you want to use inline Milvus, you can install: + + ```bash + pip install pymilvus[milvus-lite] + ``` + + If you want to use remote Milvus, you can install: + + ```bash + pip install pymilvus + ``` + + ## Configuration + + In Llama Stack, Milvus can be configured in two ways: + - **Inline (Local) Configuration** - Uses Milvus-Lite for local storage + - **Remote Configuration** - Connects to a remote Milvus server + + ### Inline (Local) Configuration + + The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files: + + ```yaml + vector_io: + - provider_id: milvus + provider_type: inline::milvus + config: + db_path: ~/.llama/distributions/together/milvus_store.db + ``` + + ### Remote Configuration + + Remote configuration is suitable for larger data storage requirements: + + #### Standard Remote Connection + + ```yaml + vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "http://:" + token: ":" + ``` + + #### TLS-Enabled Remote Connection (One-way TLS) + + For connections to Milvus instances with one-way TLS enabled: + + ```yaml + vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "https://:" + token: ":" + secure: True + server_pem_path: "/path/to/server.pem" + ``` + + #### Mutual TLS (mTLS) Remote Connection + + For connections to Milvus instances with mutual TLS (mTLS) enabled: + + ```yaml + vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "https://:" + token: ":" + secure: True + ca_pem_path: "/path/to/ca.pem" + client_pem_path: "/path/to/client.pem" + client_key_path: "/path/to/client.key" + ``` + + #### Key Parameters for TLS Configuration + + - **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`. + - **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS). + - **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS). + - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). + - **`client_key_path`**: Path to the **client private key** file (required for mTLS). + + ## Search Modes + + Milvus supports three different search modes for both inline and remote configurations: + + ### Vector Search + Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content. + + ```python + # Vector search example + search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="What is machine learning?", + search_mode="vector", + max_num_results=5, + ) + ``` + + ### Keyword Search + Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches. + + ```python + # Keyword search example + search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="Python programming language", + search_mode="keyword", + max_num_results=5, + ) + ``` + + ### Hybrid Search + Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching. + + #### Basic Hybrid Search + ```python + # Basic hybrid search example (uses RRF ranker with default impact_factor=60.0) + search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ) + ``` + + **Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009). + + #### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker + RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results. + + ```python + # Hybrid search with custom RRF parameters + search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ranking_options={ + "ranker": { + "type": "rrf", + "impact_factor": 100.0, # Higher values give more weight to top-ranked results + } + }, + ) + ``` + + #### Hybrid Search with Weighted Ranker + Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods. + + ```python + # Hybrid search with weighted ranker + search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ranking_options={ + "ranker": { + "type": "weighted", + "alpha": 0.7, # 70% vector search, 30% keyword search + } + }, + ) + ``` + + For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md). + + ## Documentation + See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. + + For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md). +sidebar_label: Remote - Milvus +title: remote::milvus +--- + # remote::milvus ## Description @@ -208,12 +409,11 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi | `token` | `str \| None` | No | | The token of the Milvus server | | `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | | `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | Config for KV store backend | -| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | - -```{note} - This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider. - ``` +| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | +:::note +This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider. +::: ## Sample Configuration @@ -223,6 +423,4 @@ token: ${env.MILVUS_TOKEN} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/milvus_remote_registry.db - ``` - diff --git a/docs/source/providers/vector_io/remote_pgvector.md b/docs/docs/providers/vector_io/remote_pgvector.mdx similarity index 53% rename from docs/source/providers/vector_io/remote_pgvector.md rename to docs/docs/providers/vector_io/remote_pgvector.mdx index 6312edabc..d21810c68 100644 --- a/docs/source/providers/vector_io/remote_pgvector.md +++ b/docs/docs/providers/vector_io/remote_pgvector.mdx @@ -1,3 +1,108 @@ +--- +description: | + [PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It + allows you to store and query vectors directly in memory. + That means you'll get fast and efficient vector retrieval. + + ## Features + + - Easy to use + - Fully integrated with Llama Stack + + There are three implementations of search for PGVectoIndex available: + + 1. Vector Search: + - How it works: + - Uses PostgreSQL's vector extension (pgvector) to perform similarity search + - Compares query embeddings against stored embeddings using Cosine distance or other distance metrics + - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance + + -Characteristics: + - Semantic understanding - finds documents similar in meaning even if they don't share keywords + - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions) + - Best for: Finding conceptually related content, handling synonyms, cross-language search + + 2. Keyword Search + - How it works: + - Uses PostgreSQL's full-text search capabilities with tsvector and ts_rank + - Converts text to searchable tokens using to_tsvector('english', text). Default language is English. + - Eg. SQL query: SELECT document, ts_rank(tokenized_content, plainto_tsquery('english', %s)) AS score + + - Characteristics: + - Lexical matching - finds exact keyword matches and variations + - Uses GIN (Generalized Inverted Index) for fast text search performance + - Scoring: Uses PostgreSQL's ts_rank function for relevance scoring + - Best for: Exact term matching, proper names, technical terms, Boolean-style queries + + 3. Hybrid Search + - How it works: + - Combines both vector and keyword search results + - Runs both searches independently, then merges results using configurable reranking + + - Two reranking strategies available: + - Reciprocal Rank Fusion (RRF) - (default: 60.0) + - Weighted Average - (default: 0.5) + + - Characteristics: + - Best of both worlds: semantic understanding + exact matching + - Documents appearing in both searches get boosted scores + - Configurable balance between semantic and lexical matching + - Best for: General-purpose search where you want both precision and recall + + 4. Database Schema + The PGVector implementation stores data optimized for all three search types: + CREATE TABLE vector_store_xxx ( + id TEXT PRIMARY KEY, + document JSONB, -- Original document + embedding vector(dimension), -- For vector search + content_text TEXT, -- Raw text content + tokenized_content TSVECTOR -- For keyword search + ); + + -- Indexes for performance + CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search + -- Vector index created automatically by pgvector + + ## Usage + + To use PGVector in your Llama Stack project, follow these steps: + + 1. Install the necessary dependencies. + 2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector). + 3. Start storing and querying vectors. + + ## This is an example how you can set up your environment for using PGVector + + 1. Export env vars: + ```bash + export ENABLE_PGVECTOR=true + export PGVECTOR_HOST=localhost + export PGVECTOR_PORT=5432 + export PGVECTOR_DB=llamastack + export PGVECTOR_USER=llamastack + export PGVECTOR_PASSWORD=llamastack + ``` + + 2. Create DB: + ```bash + psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';" + psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;" + psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;" + ``` + + ## Installation + + You can install PGVector using docker: + + ```bash + docker pull pgvector/pgvector:pg17 + ``` + ## Documentation + See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. +sidebar_label: Remote - Pgvector +title: remote::pgvector +--- + # remote::pgvector ## Description @@ -18,7 +123,7 @@ There are three implementations of search for PGVectoIndex available: - How it works: - Uses PostgreSQL's vector extension (pgvector) to perform similarity search - Compares query embeddings against stored embeddings using Cosine distance or other distance metrics - - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance + - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance -Characteristics: - Semantic understanding - finds documents similar in meaning even if they don't share keywords @@ -126,6 +231,4 @@ password: ${env.PGVECTOR_PASSWORD} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/pgvector_registry.db - ``` - diff --git a/docs/source/providers/vector_io/remote_qdrant.md b/docs/docs/providers/vector_io/remote_qdrant.mdx similarity index 88% rename from docs/source/providers/vector_io/remote_qdrant.md rename to docs/docs/providers/vector_io/remote_qdrant.mdx index 043141007..c44a2b937 100644 --- a/docs/source/providers/vector_io/remote_qdrant.md +++ b/docs/docs/providers/vector_io/remote_qdrant.mdx @@ -1,3 +1,9 @@ +--- +description: "Please refer to the inline provider documentation." +sidebar_label: Remote - Qdrant +title: remote::qdrant +--- + # remote::qdrant ## Description @@ -29,6 +35,4 @@ api_key: ${env.QDRANT_API_KEY:=} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/qdrant_registry.db - ``` - diff --git a/docs/source/providers/vector_io/remote_weaviate.md b/docs/docs/providers/vector_io/remote_weaviate.mdx similarity index 63% rename from docs/source/providers/vector_io/remote_weaviate.md rename to docs/docs/providers/vector_io/remote_weaviate.mdx index 8fb0f7c11..3f1e36422 100644 --- a/docs/source/providers/vector_io/remote_weaviate.md +++ b/docs/docs/providers/vector_io/remote_weaviate.mdx @@ -1,3 +1,38 @@ +--- +description: | + [Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack. + It allows you to store and query vectors directly within a Weaviate database. + That means you're not limited to storing vectors in memory or in a separate service. + + ## Features + Weaviate supports: + - Store embeddings and their metadata + - Vector search + - Full-text search + - Hybrid search + - Document storage + - Metadata filtering + - Multi-modal retrieval + + + ## Usage + + To use Weaviate in your Llama Stack project, follow these steps: + + 1. Install the necessary dependencies. + 2. Configure your Llama Stack project to use chroma. + 3. Start storing and querying vectors. + + ## Installation + + To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart). + + ## Documentation + See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general. +sidebar_label: Remote - Weaviate +title: remote::weaviate +--- + # remote::weaviate ## Description @@ -50,6 +85,4 @@ weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080} kvstore: type: sqlite db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/weaviate_registry.db - ``` - diff --git a/docs/source/providers/datasetio/index.md b/docs/source/providers/datasetio/index.md deleted file mode 100644 index 94a97e2ed..000000000 --- a/docs/source/providers/datasetio/index.md +++ /dev/null @@ -1,15 +0,0 @@ -# Datasetio - -## Overview - -This section contains documentation for all available providers for the **datasetio** API. - -## Providers - -```{toctree} -:maxdepth: 1 - -inline_localfs -remote_huggingface -remote_nvidia -``` diff --git a/docs/source/providers/eval/index.md b/docs/source/providers/eval/index.md deleted file mode 100644 index a14fada1d..000000000 --- a/docs/source/providers/eval/index.md +++ /dev/null @@ -1,16 +0,0 @@ -# Eval - -## Overview - -Llama Stack Evaluation API for running evaluations on model and agent candidates. - -This section contains documentation for all available providers for the **eval** API. - -## Providers - -```{toctree} -:maxdepth: 1 - -inline_meta-reference -remote_nvidia -``` diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md deleted file mode 100644 index c5720daef..000000000 --- a/docs/source/providers/inference/index.md +++ /dev/null @@ -1,42 +0,0 @@ -# Inference - -## Overview - -Llama Stack Inference API for generating completions, chat completions, and embeddings. - - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search. - -This section contains documentation for all available providers for the **inference** API. - -## Providers - -```{toctree} -:maxdepth: 1 - -inline_meta-reference -inline_sentence-transformers -remote_anthropic -remote_azure -remote_bedrock -remote_cerebras -remote_databricks -remote_fireworks -remote_gemini -remote_groq -remote_hf_endpoint -remote_hf_serverless -remote_llama-openai-compat -remote_nvidia -remote_ollama -remote_openai -remote_passthrough -remote_runpod -remote_sambanova -remote_tgi -remote_together -remote_vertexai -remote_vllm -remote_watsonx -``` diff --git a/docs/source/providers/inference/inline_sentence-transformers.md b/docs/source/providers/inference/inline_sentence-transformers.md deleted file mode 100644 index 57ec7f7d0..000000000 --- a/docs/source/providers/inference/inline_sentence-transformers.md +++ /dev/null @@ -1,13 +0,0 @@ -# inline::sentence-transformers - -## Description - -Sentence Transformers inference provider for text embeddings and similarity search. - -## Sample Configuration - -```yaml -{} - -``` - diff --git a/docs/source/providers/post_training/index.md b/docs/source/providers/post_training/index.md deleted file mode 100644 index e69f2a45a..000000000 --- a/docs/source/providers/post_training/index.md +++ /dev/null @@ -1,16 +0,0 @@ -# Post_Training - -## Overview - -This section contains documentation for all available providers for the **post_training** API. - -## Providers - -```{toctree} -:maxdepth: 1 - -inline_huggingface-gpu -inline_torchtune-cpu -inline_torchtune-gpu -remote_nvidia -``` diff --git a/docs/source/providers/safety/index.md b/docs/source/providers/safety/index.md deleted file mode 100644 index 5ddda2242..000000000 --- a/docs/source/providers/safety/index.md +++ /dev/null @@ -1,18 +0,0 @@ -# Safety - -## Overview - -This section contains documentation for all available providers for the **safety** API. - -## Providers - -```{toctree} -:maxdepth: 1 - -inline_code-scanner -inline_llama-guard -inline_prompt-guard -remote_bedrock -remote_nvidia -remote_sambanova -``` diff --git a/docs/source/providers/scoring/index.md b/docs/source/providers/scoring/index.md deleted file mode 100644 index f3bd48eb0..000000000 --- a/docs/source/providers/scoring/index.md +++ /dev/null @@ -1,15 +0,0 @@ -# Scoring - -## Overview - -This section contains documentation for all available providers for the **scoring** API. - -## Providers - -```{toctree} -:maxdepth: 1 - -inline_basic -inline_braintrust -inline_llm-as-judge -``` diff --git a/docs/source/providers/tool_runtime/index.md b/docs/source/providers/tool_runtime/index.md deleted file mode 100644 index 8d29aed43..000000000 --- a/docs/source/providers/tool_runtime/index.md +++ /dev/null @@ -1,18 +0,0 @@ -# Tool_Runtime - -## Overview - -This section contains documentation for all available providers for the **tool_runtime** API. - -## Providers - -```{toctree} -:maxdepth: 1 - -inline_rag-runtime -remote_bing-search -remote_brave-search -remote_model-context-protocol -remote_tavily-search -remote_wolfram-alpha -``` diff --git a/docs/source/providers/tool_runtime/remote_model-context-protocol.md b/docs/source/providers/tool_runtime/remote_model-context-protocol.md deleted file mode 100644 index cf9401c2c..000000000 --- a/docs/source/providers/tool_runtime/remote_model-context-protocol.md +++ /dev/null @@ -1,13 +0,0 @@ -# remote::model-context-protocol - -## Description - -Model Context Protocol (MCP) tool for standardized tool calling and context management. - -## Sample Configuration - -```yaml -{} - -``` - diff --git a/docs/source/providers/vector_io/index.md b/docs/source/providers/vector_io/index.md deleted file mode 100644 index 28ae523d7..000000000 --- a/docs/source/providers/vector_io/index.md +++ /dev/null @@ -1,24 +0,0 @@ -# Vector_Io - -## Overview - -This section contains documentation for all available providers for the **vector_io** API. - -## Providers - -```{toctree} -:maxdepth: 1 - -inline_chromadb -inline_faiss -inline_meta-reference -inline_milvus -inline_qdrant -inline_sqlite-vec -inline_sqlite_vec -remote_chromadb -remote_milvus -remote_pgvector -remote_qdrant -remote_weaviate -``` diff --git a/llama_stack/distributions/nvidia/doc_template.md b/llama_stack/distributions/nvidia/doc_template.md index 56e99e523..fbee17ef8 100644 --- a/llama_stack/distributions/nvidia/doc_template.md +++ b/llama_stack/distributions/nvidia/doc_template.md @@ -49,22 +49,22 @@ The deployed platform includes the NIM Proxy microservice, which is the service ### Datasetio API: NeMo Data Store The NeMo Data Store microservice serves as the default file storage solution for the NeMo microservices platform. It exposts APIs compatible with the Hugging Face Hub client (`HfApi`), so you can use the client to interact with Data Store. The `NVIDIA_DATASETS_URL` environment variable should point to your NeMo Data Store endpoint. -See the {repopath}`NVIDIA Datasetio docs::llama_stack/providers/remote/datasetio/nvidia/README.md` for supported features and example usage. +See the [NVIDIA Datasetio docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/datasetio/nvidia/README.md) for supported features and example usage. ### Eval API: NeMo Evaluator The NeMo Evaluator microservice supports evaluation of LLMs. Launching an Evaluation job with NeMo Evaluator requires an Evaluation Config (an object that contains metadata needed by the job). A Llama Stack Benchmark maps to an Evaluation Config, so registering a Benchmark creates an Evaluation Config in NeMo Evaluator. The `NVIDIA_EVALUATOR_URL` environment variable should point to your NeMo Microservices endpoint. -See the {repopath}`NVIDIA Eval docs::llama_stack/providers/remote/eval/nvidia/README.md` for supported features and example usage. +See the [NVIDIA Eval docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/eval/nvidia/README.md) for supported features and example usage. ### Post-Training API: NeMo Customizer -The NeMo Customizer microservice supports fine-tuning models. You can reference {repopath}`this list of supported models::llama_stack/providers/remote/post_training/nvidia/models.py` that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint. +The NeMo Customizer microservice supports fine-tuning models. You can reference [this list of supported models](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/models.py) that can be fine-tuned using Llama Stack. The `NVIDIA_CUSTOMIZER_URL` environment variable should point to your NeMo Microservices endpoint. -See the {repopath}`NVIDIA Post-Training docs::llama_stack/providers/remote/post_training/nvidia/README.md` for supported features and example usage. +See the [NVIDIA Post-Training docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/post_training/nvidia/README.md) for supported features and example usage. ### Safety API: NeMo Guardrails The NeMo Guardrails microservice sits between your application and the LLM, and adds checks and content moderation to a model. The `GUARDRAILS_SERVICE_URL` environment variable should point to your NeMo Microservices endpoint. -See the {repopath}`NVIDIA Safety docs::llama_stack/providers/remote/safety/nvidia/README.md` for supported features and example usage. +See the [NVIDIA Safety docs](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/safety/nvidia/README.md) for supported features and example usage. ## Deploying models In order to use a registered model with the Llama Stack APIs, ensure the corresponding NIM is deployed to your environment. For example, you can use the NIM Proxy microservice to deploy `meta/llama-3.2-1b-instruct`. @@ -138,4 +138,4 @@ llama stack run ./run.yaml \ ``` ## Example Notebooks -For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in {repopath}`docs/notebooks/nvidia`. +For examples of how to use the NVIDIA Distribution to run inference, fine-tune, evaluate, and run safety checks on your LLMs, you can reference the example notebooks in [docs/notebooks/nvidia](https://github.com/meta-llama/llama-stack/tree/main/docs/notebooks/nvidia). diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index e8237bc62..9816838e7 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -410,7 +410,7 @@ There are three implementations of search for PGVectoIndex available: - How it works: - Uses PostgreSQL's vector extension (pgvector) to perform similarity search - Compares query embeddings against stored embeddings using Cosine distance or other distance metrics - - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance + - Eg. SQL query: SELECT document, embedding <=> %s::vector AS distance FROM table ORDER BY distance -Characteristics: - Semantic understanding - finds documents similar in meaning even if they don't share keywords diff --git a/scripts/distro_codegen.py b/scripts/distro_codegen.py index b6698ef9a..ff5025b78 100755 --- a/scripts/distro_codegen.py +++ b/scripts/distro_codegen.py @@ -56,7 +56,7 @@ def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracke distro = template_func() yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name - doc_output_dir = REPO_ROOT / "docs/source/distributions" / f"{distro.distro_type}_distro" + doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro" change_tracker.add_paths(yaml_output_dir, doc_output_dir) distro.save_distribution( yaml_output_dir=yaml_output_dir, diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py index 17efa2138..207840d49 100755 --- a/scripts/provider_codegen.py +++ b/scripts/provider_codegen.py @@ -158,7 +158,7 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]: def generate_provider_docs(progress, provider_spec: Any, api_name: str) -> str: - """Generate markdown documentation for a provider.""" + """Generate MDX documentation for a provider.""" provider_type = provider_spec.provider_type config_class = provider_spec.config_class @@ -166,10 +166,7 @@ def generate_provider_docs(progress, provider_spec: Any, api_name: str) -> str: if "error" in config_info: progress.print(config_info["error"]) - md_lines = [] - md_lines.append(f"# {provider_type}") - md_lines.append("") - + # Extract description for frontmatter description = "" if hasattr(provider_spec, "description") and provider_spec.description: description = provider_spec.description @@ -182,6 +179,37 @@ def generate_provider_docs(progress, provider_spec: Any, api_name: str) -> str: elif config_info.get("docstring"): description = config_info["docstring"] + # Create sidebar label (clean up provider_type for display) + sidebar_label = provider_type.replace("::", " - ").replace("_", " ") + if sidebar_label.startswith("inline - "): + sidebar_label = sidebar_label[9:].title() # Remove "inline - " prefix and title case + else: + sidebar_label = sidebar_label.title() + + md_lines = [] + + # Add YAML frontmatter + md_lines.append("---") + if description: + # Handle multi-line descriptions in YAML - keep it simple for single line + if "\n" in description.strip(): + md_lines.append("description: |") + for line in description.strip().split("\n"): + # Avoid trailing whitespace by only adding spaces to non-empty lines + md_lines.append(f" {line}" if line.strip() else "") + else: + # For single line descriptions, format properly for YAML + clean_desc = description.strip().replace('"', '\\"') + md_lines.append(f'description: "{clean_desc}"') + md_lines.append(f"sidebar_label: {sidebar_label}") + md_lines.append(f"title: {provider_type}") + md_lines.append("---") + md_lines.append("") + + # Add main title + md_lines.append(f"# {provider_type}") + md_lines.append("") + if description: md_lines.append("## Description") md_lines.append("") @@ -198,16 +226,35 @@ def generate_provider_docs(progress, provider_spec: Any, api_name: str) -> str: field_type = field_info["type"].replace("|", "\\|") required = "Yes" if field_info["required"] else "No" default = str(field_info["default"]) if field_info["default"] is not None else "" - description = field_info["description"] or "" - md_lines.append(f"| `{field_name}` | `{field_type}` | {required} | {default} | {description} |") + # Handle multiline default values and escape problematic characters for MDX + if "\n" in default: + default = ( + default.replace("\n", "
") + .replace("<", "<") + .replace(">", ">") + .replace("{", "{") + .replace("}", "}") + ) + else: + default = ( + default.replace("<", "<").replace(">", ">").replace("{", "{").replace("}", "}") + ) + + description_text = field_info["description"] or "" + # Escape curly braces in description text for MDX compatibility + description_text = description_text.replace("{", "{").replace("}", "}") + + md_lines.append(f"| `{field_name}` | `{field_type}` | {required} | {default} | {description_text} |") md_lines.append("") if config_info.get("accepts_extra_config"): + md_lines.append(":::note") md_lines.append( - "```{note}\n This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider.\n ```\n" + "This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider." ) + md_lines.append(":::") md_lines.append("") if config_info.get("sample_config"): @@ -240,24 +287,71 @@ def generate_provider_docs(progress, provider_spec: Any, api_name: str) -> str: return obj sample_config_dict = convert_pydantic_to_dict(sample_config) - md_lines.append(yaml.dump(sample_config_dict, default_flow_style=False, sort_keys=False)) + # Strip trailing newlines from yaml.dump to prevent extra blank lines + yaml_output = yaml.dump(sample_config_dict, default_flow_style=False, sort_keys=False).rstrip() + md_lines.append(yaml_output) else: md_lines.append("# No sample configuration available.") except Exception as e: md_lines.append(f"# Error generating sample config: {str(e)}") md_lines.append("```") - md_lines.append("") if hasattr(provider_spec, "deprecation_warning") and provider_spec.deprecation_warning: md_lines.append("## Deprecation Notice") md_lines.append("") - md_lines.append(f"```{{warning}}\n{provider_spec.deprecation_warning}\n```") - md_lines.append("") + md_lines.append(":::warning") + md_lines.append(provider_spec.deprecation_warning) + md_lines.append(":::") if hasattr(provider_spec, "deprecation_error") and provider_spec.deprecation_error: md_lines.append("## Deprecation Error") md_lines.append("") - md_lines.append(f"❌ **Error**: {provider_spec.deprecation_error}") + md_lines.append(":::danger") + md_lines.append(f"**Error**: {provider_spec.deprecation_error}") + md_lines.append(":::") + + return "\n".join(md_lines) + "\n" + + +def generate_index_docs(api_name: str, api_docstring: str | None, provider_entries: list) -> str: + """Generate MDX documentation for the index file.""" + # Create sidebar label for the API + sidebar_label = api_name.replace("_", " ").title() + + md_lines = [] + + # Add YAML frontmatter for index + md_lines.append("---") + if api_docstring: + clean_desc = api_docstring.strip().replace('"', '\\"') + md_lines.append(f'description: "{clean_desc}"') + md_lines.append(f"sidebar_label: {sidebar_label}") + md_lines.append(f"title: {api_name.title()}") + md_lines.append("---") + md_lines.append("") + + # Add main content + md_lines.append(f"# {api_name.title()}") + md_lines.append("") + md_lines.append("## Overview") + md_lines.append("") + + if api_docstring: + cleaned_docstring = api_docstring.strip() + md_lines.append(f"{cleaned_docstring}") + md_lines.append("") + + md_lines.append(f"This section contains documentation for all available providers for the **{api_name}** API.") + md_lines.append("") + + md_lines.append("## Providers") + md_lines.append("") + + # For Docusaurus, create a simple list of links instead of toctree + for entry in provider_entries: + provider_name = entry["display_name"] + filename = entry["filename"] + md_lines.append(f"- [{provider_name}](./{filename})") return "\n".join(md_lines) + "\n" @@ -272,41 +366,35 @@ def process_provider_registry(progress, change_tracker: ChangedPathTracker) -> N for api, providers in provider_registry.items(): api_name = api.value - doc_output_dir = REPO_ROOT / "docs" / "source" / "providers" / api_name + doc_output_dir = REPO_ROOT / "docs" / "docs" / "providers" / api_name doc_output_dir.mkdir(parents=True, exist_ok=True) change_tracker.add_paths(doc_output_dir) - index_content = [] - index_content.append(f"# {api_name.title()}\n") - index_content.append("## Overview\n") - api_docstring = get_api_docstring(api_name) - if api_docstring: - cleaned_docstring = api_docstring.strip() - index_content.append(f"{cleaned_docstring}\n") - - index_content.append( - f"This section contains documentation for all available providers for the **{api_name}** API.\n" - ) - - index_content.append("## Providers\n") - - toctree_entries = [] + provider_entries = [] for provider_type, provider in sorted(providers.items()): filename = provider_type.replace("::", "_").replace(":", "_") - provider_doc_file = doc_output_dir / f"{filename}.md" + provider_doc_file = doc_output_dir / f"{filename}.mdx" provider_docs = generate_provider_docs(progress, provider, api_name) provider_doc_file.write_text(provider_docs) change_tracker.add_paths(provider_doc_file) - toctree_entries.append(f"{filename}") - index_content.append(f"```{{toctree}}\n:maxdepth: 1\n\n{'\n'.join(toctree_entries)}\n```\n") + # Create display name for the index + display_name = provider_type.replace("::", " - ").replace("_", " ") + if display_name.startswith("inline - "): + display_name = display_name[9:].title() + else: + display_name = display_name.title() - index_file = doc_output_dir / "index.md" - index_file.write_text("\n".join(index_content)) + provider_entries.append({"filename": filename, "display_name": display_name}) + + # Generate index file with frontmatter + index_content = generate_index_docs(api_name, api_docstring, provider_entries) + index_file = doc_output_dir / "index.mdx" + index_file.write_text(index_content) change_tracker.add_paths(index_file) except Exception as e: