diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cc316541a..ebbadefa6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: - id: check-added-large-files args: ['--maxkb=1000'] - id: end-of-file-fixer - exclude: '^(.*\.svg)$' + exclude: '^(.*\.svg|.*\.md)$' - id: no-commit-to-branch - id: check-yaml args: ["--unsafe"] @@ -95,6 +95,15 @@ repos: pass_filenames: false require_serial: true files: ^llama_stack/templates/.*$|^llama_stack/providers/.*/inference/.*/models\.py$ + - id: provider-codegen + name: Provider Codegen + additional_dependencies: + - uv==0.7.8 + entry: uv run --group codegen ./scripts/provider_codegen.py + language: python + pass_filenames: false + require_serial: true + files: ^llama_stack/providers/.*$ - id: openapi-codegen name: API Spec Codegen additional_dependencies: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index caabf1af8..b9b25cedf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -139,6 +139,8 @@ uv sync justification for bypassing the check. * Don't use unicode characters in the codebase. ASCII-only is preferred for compatibility or readability reasons. +* Providers configuration class should be Pydantic Field class. It should have a `description` field + that describes the configuration. These descriptions will be used to generate the provider documentation. ## Common Tasks @@ -157,10 +159,19 @@ cd llama-stack LLAMA_STACK_DIR=$(pwd) LLAMA_STACK_CLIENT_DIR=../llama-stack-client-python llama stack build --template <...> ``` +### Updating distribution configurations -### Updating Provider Configurations +If you have made changes to a provider's configuration in any form (introducing a new config key, or +changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML +files as well as the documentation. You should not change `docs/source/.../distributions/` files +manually as they are auto-generated. -If you have made changes to a provider's configuration in any form (introducing a new config key, or changing models, etc.), you should run `./scripts/distro_codegen.py` to re-generate various YAML files as well as the documentation. You should not change `docs/source/.../distributions/` files manually as they are auto-generated. +### Updating the provider documentation + +If you have made changes to a provider's configuration, you should run `./scripts/distro_codegen.py` +to re-generate the documentation. You should not change `docs/source/.../providers/` files manually +as they are auto-generated. +Note that the provider "description" field will be used to generate the provider documentation. ### Building the Documentation diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index ee7cdd4a9..8382758cc 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -6,7 +6,7 @@ Llama Stack is a stateful service with REST APIs to support the seamless transit environments. You can build and test using a local server first and deploy to a hosted endpoint for production. In this guide, we'll walk through how to build a RAG application locally using Llama Stack with [Ollama](https://ollama.com/) -as the inference [provider](../providers/index.md#inference) for a Llama Model. +as the inference [provider](../providers/inference/index) for a Llama Model. #### Step 1: Install and setup 1. Install [uv](https://docs.astral.sh/uv/) diff --git a/docs/source/providers/agents/index.md b/docs/source/providers/agents/index.md new file mode 100644 index 000000000..ebc134ce9 --- /dev/null +++ b/docs/source/providers/agents/index.md @@ -0,0 +1,5 @@ +# Agents Providers + +This section contains documentation for all available providers for the **agents** API. + +- [inline::meta-reference](inline_meta-reference.md) \ No newline at end of file diff --git a/docs/source/providers/agents/inline_meta-reference.md b/docs/source/providers/agents/inline_meta-reference.md new file mode 100644 index 000000000..cfc0c6881 --- /dev/null +++ b/docs/source/providers/agents/inline_meta-reference.md @@ -0,0 +1,26 @@ +# inline::meta-reference + +## Description + +Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `persistence_store` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | +| `responses_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | | + +## Sample Configuration + +```yaml +persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/agents_store.db +responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/responses_store.db + +``` + diff --git a/docs/source/providers/datasetio/index.md b/docs/source/providers/datasetio/index.md new file mode 100644 index 000000000..726bc75b8 --- /dev/null +++ b/docs/source/providers/datasetio/index.md @@ -0,0 +1,7 @@ +# Datasetio Providers + +This section contains documentation for all available providers for the **datasetio** API. + +- [inline::localfs](inline_localfs.md) +- [remote::huggingface](remote_huggingface.md) +- [remote::nvidia](remote_nvidia.md) \ No newline at end of file diff --git a/docs/source/providers/datasetio/inline_localfs.md b/docs/source/providers/datasetio/inline_localfs.md new file mode 100644 index 000000000..fbe4c40e3 --- /dev/null +++ b/docs/source/providers/datasetio/inline_localfs.md @@ -0,0 +1,22 @@ +# inline::localfs + +## Description + +Local filesystem-based dataset I/O provider for reading and writing datasets to local storage. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | + +## Sample Configuration + +```yaml +kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/localfs_datasetio.db + +``` + diff --git a/docs/source/providers/datasetio/remote_huggingface.md b/docs/source/providers/datasetio/remote_huggingface.md new file mode 100644 index 000000000..e2052602e --- /dev/null +++ b/docs/source/providers/datasetio/remote_huggingface.md @@ -0,0 +1,22 @@ +# remote::huggingface + +## Description + +HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | + +## Sample Configuration + +```yaml +kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/huggingface_datasetio.db + +``` + diff --git a/docs/source/providers/datasetio/remote_nvidia.md b/docs/source/providers/datasetio/remote_nvidia.md new file mode 100644 index 000000000..b5a672a54 --- /dev/null +++ b/docs/source/providers/datasetio/remote_nvidia.md @@ -0,0 +1,25 @@ +# remote::nvidia + +## Description + +NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The NVIDIA API key. | +| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. | +| `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. | +| `datasets_url` | `` | No | http://nemo.test | Base URL for the NeMo Dataset API | + +## Sample Configuration + +```yaml +api_key: ${env.NVIDIA_API_KEY:+} +dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} +project_id: ${env.NVIDIA_PROJECT_ID:=test-project} +datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test} + +``` + diff --git a/docs/source/providers/eval/index.md b/docs/source/providers/eval/index.md new file mode 100644 index 000000000..330380670 --- /dev/null +++ b/docs/source/providers/eval/index.md @@ -0,0 +1,6 @@ +# Eval Providers + +This section contains documentation for all available providers for the **eval** API. + +- [inline::meta-reference](inline_meta-reference.md) +- [remote::nvidia](remote_nvidia.md) \ No newline at end of file diff --git a/docs/source/providers/eval/inline_meta-reference.md b/docs/source/providers/eval/inline_meta-reference.md new file mode 100644 index 000000000..704741b5a --- /dev/null +++ b/docs/source/providers/eval/inline_meta-reference.md @@ -0,0 +1,22 @@ +# inline::meta-reference + +## Description + +Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | + +## Sample Configuration + +```yaml +kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/meta_reference_eval.db + +``` + diff --git a/docs/source/providers/eval/remote_nvidia.md b/docs/source/providers/eval/remote_nvidia.md new file mode 100644 index 000000000..cb764b511 --- /dev/null +++ b/docs/source/providers/eval/remote_nvidia.md @@ -0,0 +1,19 @@ +# remote::nvidia + +## Description + +NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `evaluator_url` | `` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service | + +## Sample Configuration + +```yaml +evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331} + +``` + diff --git a/docs/source/providers/files/index.md b/docs/source/providers/files/index.md new file mode 100644 index 000000000..25d9b05ba --- /dev/null +++ b/docs/source/providers/files/index.md @@ -0,0 +1,5 @@ +# Files Providers + +This section contains documentation for all available providers for the **files** API. + +- [inline::localfs](inline_localfs.md) \ No newline at end of file diff --git a/docs/source/providers/files/inline_localfs.md b/docs/source/providers/files/inline_localfs.md new file mode 100644 index 000000000..54c489c7d --- /dev/null +++ b/docs/source/providers/files/inline_localfs.md @@ -0,0 +1,24 @@ +# inline::localfs + +## Description + +Local filesystem-based file storage provider for managing files and documents locally. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `storage_dir` | `` | No | PydanticUndefined | Directory to store uploaded files | +| `metadata_store` | `utils.sqlstore.sqlstore.SqliteSqlStoreConfig \| utils.sqlstore.sqlstore.PostgresSqlStoreConfig` | No | sqlite | SQL store configuration for file metadata | +| `ttl_secs` | `` | No | 31536000 | | + +## Sample Configuration + +```yaml +storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/dummy/files} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/files_metadata.db + +``` + diff --git a/docs/source/providers/index.md b/docs/source/providers/index.md index 1f5026479..6689b58bc 100644 --- a/docs/source/providers/index.md +++ b/docs/source/providers/index.md @@ -18,60 +18,92 @@ Llama Stack supports external providers that live outside of the main codebase. ## Agents Run multi-step agentic workflows with LLMs with tool usage, memory (RAG), etc. +```{toctree} +:maxdepth: 1 + +agents/index +``` + ## DatasetIO Interfaces with datasets and data loaders. -## Eval -Generates outputs (via Inference or Agents) and perform scoring. - -## Inference -Runs inference with an LLM. - -## Post Training -Fine-tunes a model. - -#### Post Training Providers -The following providers are available for Post Training: - ```{toctree} :maxdepth: 1 -external -post_training/huggingface -post_training/torchtune -post_training/nvidia_nemo +datasetio/index +``` + +## Eval +Generates outputs (via Inference or Agents) and perform scoring. + +```{toctree} +:maxdepth: 1 + +eval/index +``` + +## Inference +Runs inference with an LLM. + +```{toctree} +:maxdepth: 1 + +inference/index +``` + +## Post Training +Fine-tunes a model. + +```{toctree} +:maxdepth: 1 + +post_training/index ``` ## Safety Applies safety policies to the output at a Systems (not only model) level. +```{toctree} +:maxdepth: 1 + +safety/index +``` + ## Scoring Evaluates the outputs of the system. +```{toctree} +:maxdepth: 1 + +scoring/index +``` + ## Telemetry Collects telemetry data from the system. +```{toctree} +:maxdepth: 1 + +telemetry/index +``` + ## Tool Runtime Is associated with the ToolGroup resouces. +```{toctree} +:maxdepth: 1 + +tool_runtime/index +``` + ## Vector IO Vector IO refers to operations on vector databases, such as adding documents, searching, and deleting documents. Vector IO plays a crucial role in [Retreival Augmented Generation (RAG)](../..//building_applications/rag), where the vector io and database are used to store and retrieve documents for retrieval. -#### Vector IO Providers -The following providers (i.e., databases) are available for Vector IO: - ```{toctree} :maxdepth: 1 -external -vector_io/faiss -vector_io/sqlite-vec -vector_io/chromadb -vector_io/pgvector -vector_io/qdrant -vector_io/milvus -vector_io/weaviate +vector_io/index ``` diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md new file mode 100644 index 000000000..05773efce --- /dev/null +++ b/docs/source/providers/inference/index.md @@ -0,0 +1,32 @@ +# Inference Providers + +This section contains documentation for all available providers for the **inference** API. + +- [inline::meta-reference](inline_meta-reference.md) +- [inline::sentence-transformers](inline_sentence-transformers.md) +- [inline::vllm](inline_vllm.md) +- [remote::anthropic](remote_anthropic.md) +- [remote::bedrock](remote_bedrock.md) +- [remote::cerebras](remote_cerebras.md) +- [remote::cerebras-openai-compat](remote_cerebras-openai-compat.md) +- [remote::databricks](remote_databricks.md) +- [remote::fireworks](remote_fireworks.md) +- [remote::fireworks-openai-compat](remote_fireworks-openai-compat.md) +- [remote::gemini](remote_gemini.md) +- [remote::groq](remote_groq.md) +- [remote::groq-openai-compat](remote_groq-openai-compat.md) +- [remote::hf::endpoint](remote_hf_endpoint.md) +- [remote::hf::serverless](remote_hf_serverless.md) +- [remote::llama-openai-compat](remote_llama-openai-compat.md) +- [remote::nvidia](remote_nvidia.md) +- [remote::ollama](remote_ollama.md) +- [remote::openai](remote_openai.md) +- [remote::passthrough](remote_passthrough.md) +- [remote::runpod](remote_runpod.md) +- [remote::sambanova](remote_sambanova.md) +- [remote::sambanova-openai-compat](remote_sambanova-openai-compat.md) +- [remote::tgi](remote_tgi.md) +- [remote::together](remote_together.md) +- [remote::together-openai-compat](remote_together-openai-compat.md) +- [remote::vllm](remote_vllm.md) +- [remote::watsonx](remote_watsonx.md) \ No newline at end of file diff --git a/docs/source/providers/inference/inline_meta-reference.md b/docs/source/providers/inference/inline_meta-reference.md new file mode 100644 index 000000000..eca12a839 --- /dev/null +++ b/docs/source/providers/inference/inline_meta-reference.md @@ -0,0 +1,32 @@ +# inline::meta-reference + +## Description + +Meta's reference implementation of inference with support for various model formats and optimization techniques. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `model` | `str \| None` | No | | | +| `torch_seed` | `int \| None` | No | | | +| `max_seq_len` | `` | No | 4096 | | +| `max_batch_size` | `` | No | 1 | | +| `model_parallel_size` | `int \| None` | No | | | +| `create_distributed_process_group` | `` | No | True | | +| `checkpoint_dir` | `str \| None` | No | | | +| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No | | | + +## Sample Configuration + +```yaml +model: Llama3.2-3B-Instruct +checkpoint_dir: ${env.CHECKPOINT_DIR:=null} +quantization: + type: ${env.QUANTIZATION_TYPE:=bf16} +model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0} +max_batch_size: ${env.MAX_BATCH_SIZE:=1} +max_seq_len: ${env.MAX_SEQ_LEN:=4096} + +``` + diff --git a/docs/source/providers/inference/inline_sentence-transformers.md b/docs/source/providers/inference/inline_sentence-transformers.md new file mode 100644 index 000000000..57ec7f7d0 --- /dev/null +++ b/docs/source/providers/inference/inline_sentence-transformers.md @@ -0,0 +1,13 @@ +# inline::sentence-transformers + +## Description + +Sentence Transformers inference provider for text embeddings and similarity search. + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/docs/source/providers/inference/inline_vllm.md b/docs/source/providers/inference/inline_vllm.md new file mode 100644 index 000000000..6ea34acb8 --- /dev/null +++ b/docs/source/providers/inference/inline_vllm.md @@ -0,0 +1,29 @@ +# inline::vllm + +## Description + +vLLM inference provider for high-performance model serving with PagedAttention and continuous batching. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `tensor_parallel_size` | `` | No | 1 | Number of tensor parallel replicas (number of GPUs to use). | +| `max_tokens` | `` | No | 4096 | Maximum number of tokens to generate. | +| `max_model_len` | `` | No | 4096 | Maximum context length to use during serving. | +| `max_num_seqs` | `` | No | 4 | Maximum parallel batch size for generation. | +| `enforce_eager` | `` | No | False | Whether to use eager mode for inference (otherwise cuda graphs are used). | +| `gpu_memory_utilization` | `` | No | 0.3 | How much GPU memory will be allocated when this provider has finished loading, including memory that was already allocated before loading. | + +## Sample Configuration + +```yaml +tensor_parallel_size: ${env.TENSOR_PARALLEL_SIZE:=1} +max_tokens: ${env.MAX_TOKENS:=4096} +max_model_len: ${env.MAX_MODEL_LEN:=4096} +max_num_seqs: ${env.MAX_NUM_SEQS:=4} +enforce_eager: ${env.ENFORCE_EAGER:=False} +gpu_memory_utilization: ${env.GPU_MEMORY_UTILIZATION:=0.3} + +``` + diff --git a/docs/source/providers/inference/remote_anthropic.md b/docs/source/providers/inference/remote_anthropic.md new file mode 100644 index 000000000..79d5a3f6e --- /dev/null +++ b/docs/source/providers/inference/remote_anthropic.md @@ -0,0 +1,19 @@ +# remote::anthropic + +## Description + +Anthropic inference provider for accessing Claude models and Anthropic's AI services. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | API key for Anthropic models | + +## Sample Configuration + +```yaml +api_key: ${env.ANTHROPIC_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_bedrock.md b/docs/source/providers/inference/remote_bedrock.md new file mode 100644 index 000000000..1454c54c2 --- /dev/null +++ b/docs/source/providers/inference/remote_bedrock.md @@ -0,0 +1,28 @@ +# remote::bedrock + +## Description + +AWS Bedrock inference provider for accessing various AI models through AWS's managed service. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | +| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | +| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | +| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION | +| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | +| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | +| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE | +| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. | +| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. | +| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). | + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/docs/source/providers/inference/remote_cerebras-openai-compat.md b/docs/source/providers/inference/remote_cerebras-openai-compat.md new file mode 100644 index 000000000..64b899246 --- /dev/null +++ b/docs/source/providers/inference/remote_cerebras-openai-compat.md @@ -0,0 +1,21 @@ +# remote::cerebras-openai-compat + +## Description + +Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The Cerebras API key | +| `openai_compat_api_base` | `` | No | https://api.cerebras.ai/v1 | The URL for the Cerebras API server | + +## Sample Configuration + +```yaml +openai_compat_api_base: https://api.cerebras.ai/v1 +api_key: ${env.CEREBRAS_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_cerebras.md b/docs/source/providers/inference/remote_cerebras.md new file mode 100644 index 000000000..c9793d7de --- /dev/null +++ b/docs/source/providers/inference/remote_cerebras.md @@ -0,0 +1,21 @@ +# remote::cerebras + +## Description + +Cerebras inference provider for running models on Cerebras Cloud platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `base_url` | `` | No | https://api.cerebras.ai | Base URL for the Cerebras API | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | Cerebras API Key | + +## Sample Configuration + +```yaml +base_url: https://api.cerebras.ai +api_key: ${env.CEREBRAS_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_databricks.md b/docs/source/providers/inference/remote_databricks.md new file mode 100644 index 000000000..c611d9414 --- /dev/null +++ b/docs/source/providers/inference/remote_databricks.md @@ -0,0 +1,21 @@ +# remote::databricks + +## Description + +Databricks inference provider for running models on Databricks' unified analytics platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | | The URL for the Databricks model serving endpoint | +| `api_token` | `` | No | | The Databricks API token | + +## Sample Configuration + +```yaml +url: ${env.DATABRICKS_URL} +api_token: ${env.DATABRICKS_API_TOKEN} + +``` + diff --git a/docs/source/providers/inference/remote_fireworks-openai-compat.md b/docs/source/providers/inference/remote_fireworks-openai-compat.md new file mode 100644 index 000000000..0a2bd0fe8 --- /dev/null +++ b/docs/source/providers/inference/remote_fireworks-openai-compat.md @@ -0,0 +1,21 @@ +# remote::fireworks-openai-compat + +## Description + +Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The Fireworks API key | +| `openai_compat_api_base` | `` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks API server | + +## Sample Configuration + +```yaml +openai_compat_api_base: https://api.fireworks.ai/inference/v1 +api_key: ${env.FIREWORKS_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_fireworks.md b/docs/source/providers/inference/remote_fireworks.md new file mode 100644 index 000000000..351586c34 --- /dev/null +++ b/docs/source/providers/inference/remote_fireworks.md @@ -0,0 +1,21 @@ +# remote::fireworks + +## Description + +Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Fireworks.ai API Key | + +## Sample Configuration + +```yaml +url: https://api.fireworks.ai/inference/v1 +api_key: ${env.FIREWORKS_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_gemini.md b/docs/source/providers/inference/remote_gemini.md new file mode 100644 index 000000000..cafcd787d --- /dev/null +++ b/docs/source/providers/inference/remote_gemini.md @@ -0,0 +1,19 @@ +# remote::gemini + +## Description + +Google Gemini inference provider for accessing Gemini models and Google's AI services. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | API key for Gemini models | + +## Sample Configuration + +```yaml +api_key: ${env.GEMINI_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_groq-openai-compat.md b/docs/source/providers/inference/remote_groq-openai-compat.md new file mode 100644 index 000000000..e424bedd2 --- /dev/null +++ b/docs/source/providers/inference/remote_groq-openai-compat.md @@ -0,0 +1,21 @@ +# remote::groq-openai-compat + +## Description + +Groq OpenAI-compatible provider for using Groq models with OpenAI API format. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The Groq API key | +| `openai_compat_api_base` | `` | No | https://api.groq.com/openai/v1 | The URL for the Groq API server | + +## Sample Configuration + +```yaml +openai_compat_api_base: https://api.groq.com/openai/v1 +api_key: ${env.GROQ_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_groq.md b/docs/source/providers/inference/remote_groq.md new file mode 100644 index 000000000..4f734f263 --- /dev/null +++ b/docs/source/providers/inference/remote_groq.md @@ -0,0 +1,21 @@ +# remote::groq + +## Description + +Groq inference provider for ultra-fast inference using Groq's LPU technology. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The Groq API key | +| `url` | `` | No | https://api.groq.com | The URL for the Groq AI server | + +## Sample Configuration + +```yaml +url: https://api.groq.com +api_key: ${env.GROQ_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_hf_endpoint.md b/docs/source/providers/inference/remote_hf_endpoint.md new file mode 100644 index 000000000..f9ca6b538 --- /dev/null +++ b/docs/source/providers/inference/remote_hf_endpoint.md @@ -0,0 +1,21 @@ +# remote::hf::endpoint + +## Description + +HuggingFace Inference Endpoints provider for dedicated model serving. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `endpoint_name` | `` | No | PydanticUndefined | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | +| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | + +## Sample Configuration + +```yaml +endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} +api_token: ${env.HF_API_TOKEN} + +``` + diff --git a/docs/source/providers/inference/remote_hf_serverless.md b/docs/source/providers/inference/remote_hf_serverless.md new file mode 100644 index 000000000..345af3e49 --- /dev/null +++ b/docs/source/providers/inference/remote_hf_serverless.md @@ -0,0 +1,21 @@ +# remote::hf::serverless + +## Description + +HuggingFace Inference API serverless provider for on-demand model inference. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `huggingface_repo` | `` | No | PydanticUndefined | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') | +| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | + +## Sample Configuration + +```yaml +huggingface_repo: ${env.INFERENCE_MODEL} +api_token: ${env.HF_API_TOKEN} + +``` + diff --git a/docs/source/providers/inference/remote_llama-openai-compat.md b/docs/source/providers/inference/remote_llama-openai-compat.md new file mode 100644 index 000000000..5c97aebc3 --- /dev/null +++ b/docs/source/providers/inference/remote_llama-openai-compat.md @@ -0,0 +1,21 @@ +# remote::llama-openai-compat + +## Description + +Llama OpenAI-compatible provider for using Llama models with OpenAI API format. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The Llama API key | +| `openai_compat_api_base` | `` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | + +## Sample Configuration + +```yaml +openai_compat_api_base: https://api.llama.com/compat/v1/ +api_key: ${env.LLAMA_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_nvidia.md b/docs/source/providers/inference/remote_nvidia.md new file mode 100644 index 000000000..eca2ec544 --- /dev/null +++ b/docs/source/providers/inference/remote_nvidia.md @@ -0,0 +1,24 @@ +# remote::nvidia + +## Description + +NVIDIA inference provider for accessing NVIDIA NIM models and AI services. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service | +| `timeout` | `` | No | 60 | Timeout for the HTTP requests | +| `append_api_version` | `` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | + +## Sample Configuration + +```yaml +url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} +api_key: ${env.NVIDIA_API_KEY:+} +append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True} + +``` + diff --git a/docs/source/providers/inference/remote_ollama.md b/docs/source/providers/inference/remote_ollama.md new file mode 100644 index 000000000..7c5fc9437 --- /dev/null +++ b/docs/source/providers/inference/remote_ollama.md @@ -0,0 +1,21 @@ +# remote::ollama + +## Description + +Ollama inference provider for running local models through the Ollama runtime. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | http://localhost:11434 | | +| `raise_on_connect_error` | `` | No | True | | + +## Sample Configuration + +```yaml +url: ${env.OLLAMA_URL:=http://localhost:11434} +raise_on_connect_error: true + +``` + diff --git a/docs/source/providers/inference/remote_openai.md b/docs/source/providers/inference/remote_openai.md new file mode 100644 index 000000000..b4cfb5880 --- /dev/null +++ b/docs/source/providers/inference/remote_openai.md @@ -0,0 +1,19 @@ +# remote::openai + +## Description + +OpenAI inference provider for accessing GPT models and other OpenAI services. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | API key for OpenAI models | + +## Sample Configuration + +```yaml +api_key: ${env.OPENAI_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_passthrough.md b/docs/source/providers/inference/remote_passthrough.md new file mode 100644 index 000000000..9005e5339 --- /dev/null +++ b/docs/source/providers/inference/remote_passthrough.md @@ -0,0 +1,21 @@ +# remote::passthrough + +## Description + +Passthrough inference provider for connecting to any external inference service not directly supported. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | | The URL for the passthrough endpoint | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint | + +## Sample Configuration + +```yaml +url: ${env.PASSTHROUGH_URL} +api_key: ${env.PASSTHROUGH_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_runpod.md b/docs/source/providers/inference/remote_runpod.md new file mode 100644 index 000000000..b543606d5 --- /dev/null +++ b/docs/source/providers/inference/remote_runpod.md @@ -0,0 +1,21 @@ +# remote::runpod + +## Description + +RunPod inference provider for running models on RunPod's cloud GPU platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint | +| `api_token` | `str \| None` | No | | The API token | + +## Sample Configuration + +```yaml +url: ${env.RUNPOD_URL:+} +api_token: ${env.RUNPOD_API_TOKEN:+} + +``` + diff --git a/docs/source/providers/inference/remote_sambanova-openai-compat.md b/docs/source/providers/inference/remote_sambanova-openai-compat.md new file mode 100644 index 000000000..c213d962f --- /dev/null +++ b/docs/source/providers/inference/remote_sambanova-openai-compat.md @@ -0,0 +1,21 @@ +# remote::sambanova-openai-compat + +## Description + +SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The SambaNova API key | +| `openai_compat_api_base` | `` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova API server | + +## Sample Configuration + +```yaml +openai_compat_api_base: https://api.sambanova.ai/v1 +api_key: ${env.SAMBANOVA_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_sambanova.md b/docs/source/providers/inference/remote_sambanova.md new file mode 100644 index 000000000..006c41ac1 --- /dev/null +++ b/docs/source/providers/inference/remote_sambanova.md @@ -0,0 +1,21 @@ +# remote::sambanova + +## Description + +SambaNova inference provider for running models on SambaNova's dataflow architecture. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key | + +## Sample Configuration + +```yaml +url: https://api.sambanova.ai/v1 +api_key: ${env.SAMBANOVA_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_tgi.md b/docs/source/providers/inference/remote_tgi.md new file mode 100644 index 000000000..c4a749b0b --- /dev/null +++ b/docs/source/providers/inference/remote_tgi.md @@ -0,0 +1,19 @@ +# remote::tgi + +## Description + +Text Generation Inference (TGI) provider for HuggingFace model serving. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | PydanticUndefined | The URL for the TGI serving endpoint | + +## Sample Configuration + +```yaml +url: ${env.TGI_URL} + +``` + diff --git a/docs/source/providers/inference/remote_together-openai-compat.md b/docs/source/providers/inference/remote_together-openai-compat.md new file mode 100644 index 000000000..833fa8cb0 --- /dev/null +++ b/docs/source/providers/inference/remote_together-openai-compat.md @@ -0,0 +1,21 @@ +# remote::together-openai-compat + +## Description + +Together AI OpenAI-compatible provider for using Together models with OpenAI API format. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The Together API key | +| `openai_compat_api_base` | `` | No | https://api.together.xyz/v1 | The URL for the Together API server | + +## Sample Configuration + +```yaml +openai_compat_api_base: https://api.together.xyz/v1 +api_key: ${env.TOGETHER_API_KEY} + +``` + diff --git a/docs/source/providers/inference/remote_together.md b/docs/source/providers/inference/remote_together.md new file mode 100644 index 000000000..4e0e0a9ce --- /dev/null +++ b/docs/source/providers/inference/remote_together.md @@ -0,0 +1,21 @@ +# remote::together + +## Description + +Together AI inference provider for open-source models and collaborative AI development. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | https://api.together.xyz/v1 | The URL for the Together AI server | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The Together AI API Key | + +## Sample Configuration + +```yaml +url: https://api.together.xyz/v1 +api_key: ${env.TOGETHER_API_KEY:+} + +``` + diff --git a/docs/source/providers/inference/remote_vllm.md b/docs/source/providers/inference/remote_vllm.md new file mode 100644 index 000000000..6c725fb41 --- /dev/null +++ b/docs/source/providers/inference/remote_vllm.md @@ -0,0 +1,25 @@ +# remote::vllm + +## Description + +Remote vLLM inference provider for connecting to vLLM servers. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint | +| `max_tokens` | `` | No | 4096 | Maximum number of tokens to generate. | +| `api_token` | `str \| None` | No | fake | The API token | +| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. | + +## Sample Configuration + +```yaml +url: ${env.VLLM_URL} +max_tokens: ${env.VLLM_MAX_TOKENS:=4096} +api_token: ${env.VLLM_API_TOKEN:=fake} +tls_verify: ${env.VLLM_TLS_VERIFY:=true} + +``` + diff --git a/docs/source/providers/inference/remote_watsonx.md b/docs/source/providers/inference/remote_watsonx.md new file mode 100644 index 000000000..fce0373fa --- /dev/null +++ b/docs/source/providers/inference/remote_watsonx.md @@ -0,0 +1,24 @@ +# remote::watsonx + +## Description + +IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The watsonx API key, only needed of using the hosted service | +| `project_id` | `str \| None` | No | | The Project ID key, only needed of using the hosted service | +| `timeout` | `` | No | 60 | Timeout for the HTTP requests | + +## Sample Configuration + +```yaml +url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} +api_key: ${env.WATSONX_API_KEY:+} +project_id: ${env.WATSONX_PROJECT_ID:+} + +``` + diff --git a/docs/source/providers/post_training/index.md b/docs/source/providers/post_training/index.md new file mode 100644 index 000000000..35d10d14b --- /dev/null +++ b/docs/source/providers/post_training/index.md @@ -0,0 +1,7 @@ +# Post_Training Providers + +This section contains documentation for all available providers for the **post_training** API. + +- [inline::huggingface](inline_huggingface.md) +- [inline::torchtune](inline_torchtune.md) +- [remote::nvidia](remote_nvidia.md) \ No newline at end of file diff --git a/docs/source/providers/post_training/inline_huggingface.md b/docs/source/providers/post_training/inline_huggingface.md new file mode 100644 index 000000000..82b08bf7a --- /dev/null +++ b/docs/source/providers/post_training/inline_huggingface.md @@ -0,0 +1,36 @@ +# inline::huggingface + +## Description + +HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `device` | `` | No | cuda | | +| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No | | | +| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface | | +| `chat_template` | `` | No | <|user|> +{input} +<|assistant|> +{output} | | +| `model_specific_config` | `` | No | {'trust_remote_code': True, 'attn_implementation': 'sdpa'} | | +| `max_seq_length` | `` | No | 2048 | | +| `gradient_checkpointing` | `` | No | False | | +| `save_total_limit` | `` | No | 3 | | +| `logging_steps` | `` | No | 10 | | +| `warmup_ratio` | `` | No | 0.1 | | +| `weight_decay` | `` | No | 0.01 | | +| `dataloader_num_workers` | `` | No | 4 | | +| `dataloader_pin_memory` | `` | No | True | | + +## Sample Configuration + +```yaml +checkpoint_format: huggingface +distributed_backend: null +device: cpu + +``` + diff --git a/docs/source/providers/post_training/inline_torchtune.md b/docs/source/providers/post_training/inline_torchtune.md new file mode 100644 index 000000000..82730e54b --- /dev/null +++ b/docs/source/providers/post_training/inline_torchtune.md @@ -0,0 +1,20 @@ +# inline::torchtune + +## Description + +TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `torch_seed` | `int \| None` | No | | | +| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | | + +## Sample Configuration + +```yaml +checkpoint_format: meta + +``` + diff --git a/docs/source/providers/post_training/remote_nvidia.md b/docs/source/providers/post_training/remote_nvidia.md new file mode 100644 index 000000000..050afb763 --- /dev/null +++ b/docs/source/providers/post_training/remote_nvidia.md @@ -0,0 +1,28 @@ +# remote::nvidia + +## Description + +NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The NVIDIA API key. | +| `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. | +| `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. | +| `customizer_url` | `str \| None` | No | | Base URL for the NeMo Customizer API | +| `timeout` | `` | No | 300 | Timeout for the NVIDIA Post Training API | +| `max_retries` | `` | No | 3 | Maximum number of retries for the NVIDIA Post Training API | +| `output_model_dir` | `` | No | test-example-model@v1 | Directory to save the output model | + +## Sample Configuration + +```yaml +api_key: ${env.NVIDIA_API_KEY:+} +dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default} +project_id: ${env.NVIDIA_PROJECT_ID:=test-project} +customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test} + +``` + diff --git a/docs/source/providers/safety/index.md b/docs/source/providers/safety/index.md new file mode 100644 index 000000000..1a245c13d --- /dev/null +++ b/docs/source/providers/safety/index.md @@ -0,0 +1,10 @@ +# Safety Providers + +This section contains documentation for all available providers for the **safety** API. + +- [inline::code-scanner](inline_code-scanner.md) +- [inline::llama-guard](inline_llama-guard.md) +- [inline::prompt-guard](inline_prompt-guard.md) +- [remote::bedrock](remote_bedrock.md) +- [remote::nvidia](remote_nvidia.md) +- [remote::sambanova](remote_sambanova.md) \ No newline at end of file diff --git a/docs/source/providers/safety/inline_code-scanner.md b/docs/source/providers/safety/inline_code-scanner.md new file mode 100644 index 000000000..3a3e90b3d --- /dev/null +++ b/docs/source/providers/safety/inline_code-scanner.md @@ -0,0 +1,13 @@ +# inline::code-scanner + +## Description + +Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns. + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/docs/source/providers/safety/inline_llama-guard.md b/docs/source/providers/safety/inline_llama-guard.md new file mode 100644 index 000000000..4f57898ec --- /dev/null +++ b/docs/source/providers/safety/inline_llama-guard.md @@ -0,0 +1,19 @@ +# inline::llama-guard + +## Description + +Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `excluded_categories` | `list[str` | No | [] | | + +## Sample Configuration + +```yaml +excluded_categories: [] + +``` + diff --git a/docs/source/providers/safety/inline_prompt-guard.md b/docs/source/providers/safety/inline_prompt-guard.md new file mode 100644 index 000000000..10a6b8d3f --- /dev/null +++ b/docs/source/providers/safety/inline_prompt-guard.md @@ -0,0 +1,19 @@ +# inline::prompt-guard + +## Description + +Prompt Guard safety provider for detecting and filtering unsafe prompts and content. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `guard_type` | `` | No | injection | | + +## Sample Configuration + +```yaml +guard_type: injection + +``` + diff --git a/docs/source/providers/safety/remote_bedrock.md b/docs/source/providers/safety/remote_bedrock.md new file mode 100644 index 000000000..3c1d6bcb0 --- /dev/null +++ b/docs/source/providers/safety/remote_bedrock.md @@ -0,0 +1,28 @@ +# remote::bedrock + +## Description + +AWS Bedrock safety provider for content moderation using AWS's safety services. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | +| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | +| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | +| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION | +| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | +| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | +| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE | +| `connect_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. | +| `read_timeout` | `float \| None` | No | 60 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. | +| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). | + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/docs/source/providers/safety/remote_nvidia.md b/docs/source/providers/safety/remote_nvidia.md new file mode 100644 index 000000000..40ae744a4 --- /dev/null +++ b/docs/source/providers/safety/remote_nvidia.md @@ -0,0 +1,21 @@ +# remote::nvidia + +## Description + +NVIDIA's safety provider for content moderation and safety filtering. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `guardrails_service_url` | `` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service | +| `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store | + +## Sample Configuration + +```yaml +guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331} +config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check} + +``` + diff --git a/docs/source/providers/safety/remote_sambanova.md b/docs/source/providers/safety/remote_sambanova.md new file mode 100644 index 000000000..c680f9764 --- /dev/null +++ b/docs/source/providers/safety/remote_sambanova.md @@ -0,0 +1,21 @@ +# remote::sambanova + +## Description + +SambaNova's safety provider for content moderation and safety filtering. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key | + +## Sample Configuration + +```yaml +url: https://api.sambanova.ai/v1 +api_key: ${env.SAMBANOVA_API_KEY} + +``` + diff --git a/docs/source/providers/scoring/index.md b/docs/source/providers/scoring/index.md new file mode 100644 index 000000000..3cf7af537 --- /dev/null +++ b/docs/source/providers/scoring/index.md @@ -0,0 +1,7 @@ +# Scoring Providers + +This section contains documentation for all available providers for the **scoring** API. + +- [inline::basic](inline_basic.md) +- [inline::braintrust](inline_braintrust.md) +- [inline::llm-as-judge](inline_llm-as-judge.md) \ No newline at end of file diff --git a/docs/source/providers/scoring/inline_basic.md b/docs/source/providers/scoring/inline_basic.md new file mode 100644 index 000000000..e9e50cff4 --- /dev/null +++ b/docs/source/providers/scoring/inline_basic.md @@ -0,0 +1,13 @@ +# inline::basic + +## Description + +Basic scoring provider for simple evaluation metrics and scoring functions. + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/docs/source/providers/scoring/inline_braintrust.md b/docs/source/providers/scoring/inline_braintrust.md new file mode 100644 index 000000000..dae0f246e --- /dev/null +++ b/docs/source/providers/scoring/inline_braintrust.md @@ -0,0 +1,19 @@ +# inline::braintrust + +## Description + +Braintrust scoring provider for evaluation and scoring using the Braintrust platform. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `openai_api_key` | `str \| None` | No | | The OpenAI API Key | + +## Sample Configuration + +```yaml +openai_api_key: ${env.OPENAI_API_KEY:+} + +``` + diff --git a/docs/source/providers/scoring/inline_llm-as-judge.md b/docs/source/providers/scoring/inline_llm-as-judge.md new file mode 100644 index 000000000..971e02897 --- /dev/null +++ b/docs/source/providers/scoring/inline_llm-as-judge.md @@ -0,0 +1,13 @@ +# inline::llm-as-judge + +## Description + +LLM-as-judge scoring provider that uses language models to evaluate and score responses. + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/docs/source/providers/telemetry/index.md b/docs/source/providers/telemetry/index.md new file mode 100644 index 000000000..e2b221b50 --- /dev/null +++ b/docs/source/providers/telemetry/index.md @@ -0,0 +1,5 @@ +# Telemetry Providers + +This section contains documentation for all available providers for the **telemetry** API. + +- [inline::meta-reference](inline_meta-reference.md) \ No newline at end of file diff --git a/docs/source/providers/telemetry/inline_meta-reference.md b/docs/source/providers/telemetry/inline_meta-reference.md new file mode 100644 index 000000000..775dba86d --- /dev/null +++ b/docs/source/providers/telemetry/inline_meta-reference.md @@ -0,0 +1,25 @@ +# inline::meta-reference + +## Description + +Meta's reference implementation of telemetry and observability using OpenTelemetry. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `otel_trace_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for traces | +| `otel_metric_endpoint` | `str \| None` | No | | The OpenTelemetry collector endpoint URL for metrics | +| `service_name` | `` | No | ​ | The service name to use for telemetry | +| `sinks` | `list[inline.telemetry.meta_reference.config.TelemetrySink` | No | [, ] | List of telemetry sinks to enable (possible values: otel, sqlite, console) | +| `sqlite_db_path` | `` | No | ~/.llama/runtime/trace_store.db | The path to the SQLite database to use for storing traces | + +## Sample Configuration + +```yaml +service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" +sinks: ${env.TELEMETRY_SINKS:=console,sqlite} +sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/trace_store.db + +``` + diff --git a/docs/source/providers/tool_runtime/index.md b/docs/source/providers/tool_runtime/index.md new file mode 100644 index 000000000..f162c4f9c --- /dev/null +++ b/docs/source/providers/tool_runtime/index.md @@ -0,0 +1,10 @@ +# Tool_Runtime Providers + +This section contains documentation for all available providers for the **tool_runtime** API. + +- [inline::rag-runtime](inline_rag-runtime.md) +- [remote::bing-search](remote_bing-search.md) +- [remote::brave-search](remote_brave-search.md) +- [remote::model-context-protocol](remote_model-context-protocol.md) +- [remote::tavily-search](remote_tavily-search.md) +- [remote::wolfram-alpha](remote_wolfram-alpha.md) \ No newline at end of file diff --git a/docs/source/providers/tool_runtime/inline_rag-runtime.md b/docs/source/providers/tool_runtime/inline_rag-runtime.md new file mode 100644 index 000000000..784b4fdad --- /dev/null +++ b/docs/source/providers/tool_runtime/inline_rag-runtime.md @@ -0,0 +1,13 @@ +# inline::rag-runtime + +## Description + +RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search. + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/docs/source/providers/tool_runtime/remote_bing-search.md b/docs/source/providers/tool_runtime/remote_bing-search.md new file mode 100644 index 000000000..0d5df7679 --- /dev/null +++ b/docs/source/providers/tool_runtime/remote_bing-search.md @@ -0,0 +1,20 @@ +# remote::bing-search + +## Description + +Bing Search tool for web search capabilities using Microsoft's search engine. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | | +| `top_k` | `` | No | 3 | | + +## Sample Configuration + +```yaml +api_key: ${env.BING_API_KEY:} + +``` + diff --git a/docs/source/providers/tool_runtime/remote_brave-search.md b/docs/source/providers/tool_runtime/remote_brave-search.md new file mode 100644 index 000000000..db10fdc4f --- /dev/null +++ b/docs/source/providers/tool_runtime/remote_brave-search.md @@ -0,0 +1,21 @@ +# remote::brave-search + +## Description + +Brave Search tool for web search capabilities with privacy-focused results. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The Brave Search API Key | +| `max_results` | `` | No | 3 | The maximum number of results to return | + +## Sample Configuration + +```yaml +api_key: ${env.BRAVE_SEARCH_API_KEY:+} +max_results: 3 + +``` + diff --git a/docs/source/providers/tool_runtime/remote_model-context-protocol.md b/docs/source/providers/tool_runtime/remote_model-context-protocol.md new file mode 100644 index 000000000..cf9401c2c --- /dev/null +++ b/docs/source/providers/tool_runtime/remote_model-context-protocol.md @@ -0,0 +1,13 @@ +# remote::model-context-protocol + +## Description + +Model Context Protocol (MCP) tool for standardized tool calling and context management. + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/docs/source/providers/tool_runtime/remote_tavily-search.md b/docs/source/providers/tool_runtime/remote_tavily-search.md new file mode 100644 index 000000000..7d1c7fd7f --- /dev/null +++ b/docs/source/providers/tool_runtime/remote_tavily-search.md @@ -0,0 +1,21 @@ +# remote::tavily-search + +## Description + +Tavily Search tool for AI-optimized web search with structured results. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | The Tavily Search API Key | +| `max_results` | `` | No | 3 | The maximum number of results to return | + +## Sample Configuration + +```yaml +api_key: ${env.TAVILY_SEARCH_API_KEY:+} +max_results: 3 + +``` + diff --git a/docs/source/providers/tool_runtime/remote_wolfram-alpha.md b/docs/source/providers/tool_runtime/remote_wolfram-alpha.md new file mode 100644 index 000000000..d44c93f72 --- /dev/null +++ b/docs/source/providers/tool_runtime/remote_wolfram-alpha.md @@ -0,0 +1,19 @@ +# remote::wolfram-alpha + +## Description + +Wolfram Alpha tool for computational knowledge and mathematical calculations. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `api_key` | `str \| None` | No | | | + +## Sample Configuration + +```yaml +api_key: ${env.WOLFRAM_ALPHA_API_KEY:+} + +``` + diff --git a/docs/source/providers/vector_io/index.md b/docs/source/providers/vector_io/index.md new file mode 100644 index 000000000..870d04401 --- /dev/null +++ b/docs/source/providers/vector_io/index.md @@ -0,0 +1,16 @@ +# Vector_Io Providers + +This section contains documentation for all available providers for the **vector_io** API. + +- [inline::chromadb](inline_chromadb.md) +- [inline::faiss](inline_faiss.md) +- [inline::meta-reference](inline_meta-reference.md) +- [inline::milvus](inline_milvus.md) +- [inline::qdrant](inline_qdrant.md) +- [inline::sqlite-vec](inline_sqlite-vec.md) +- [inline::sqlite_vec](inline_sqlite_vec.md) +- [remote::chromadb](remote_chromadb.md) +- [remote::milvus](remote_milvus.md) +- [remote::pgvector](remote_pgvector.md) +- [remote::qdrant](remote_qdrant.md) +- [remote::weaviate](remote_weaviate.md) \ No newline at end of file diff --git a/docs/source/providers/vector_io/inline_chromadb.md b/docs/source/providers/vector_io/inline_chromadb.md new file mode 100644 index 000000000..172215414 --- /dev/null +++ b/docs/source/providers/vector_io/inline_chromadb.md @@ -0,0 +1,52 @@ +# inline::chromadb + +## Description + + +[Chroma](https://www.trychroma.com/) is an inline and remote vector +database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features +Chroma supports: +- Store embeddings and their metadata +- Vector search +- Full-text search +- Document storage +- Metadata filtering +- Multi-modal retrieval + +## Usage + +To use Chrome in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use chroma. +3. Start storing and querying vectors. + +## Installation + +You can install chroma using pip: + +```bash +pip install chromadb +``` + +## Documentation +See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. + + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `db_path` | `` | No | PydanticUndefined | | + +## Sample Configuration + +```yaml +db_path: ${env.CHROMADB_PATH} + +``` + diff --git a/docs/source/providers/vector_io/faiss.md b/docs/source/providers/vector_io/inline_faiss.md similarity index 60% rename from docs/source/providers/vector_io/faiss.md rename to docs/source/providers/vector_io/inline_faiss.md index c8a2efbe4..2dcf4625b 100644 --- a/docs/source/providers/vector_io/faiss.md +++ b/docs/source/providers/vector_io/inline_faiss.md @@ -1,7 +1,7 @@ ---- -orphan: true ---- -# Faiss +# inline::faiss + +## Description + [Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It allows you to store and query vectors directly in memory. @@ -31,3 +31,21 @@ pip install faiss-cpu ## Documentation See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for more details about Faiss in general. + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | + +## Sample Configuration + +```yaml +kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db + +``` + diff --git a/docs/source/providers/vector_io/inline_meta-reference.md b/docs/source/providers/vector_io/inline_meta-reference.md new file mode 100644 index 000000000..c9ca12ff2 --- /dev/null +++ b/docs/source/providers/vector_io/inline_meta-reference.md @@ -0,0 +1,26 @@ +# inline::meta-reference + +## Description + +Meta's reference implementation of a vector database. + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | + +## Sample Configuration + +```yaml +kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/faiss_store.db + +``` + +## Deprecation Notice + +⚠️ **Warning**: Please use the `inline::faiss` provider instead. + diff --git a/docs/source/providers/vector_io/inline_milvus.md b/docs/source/providers/vector_io/inline_milvus.md new file mode 100644 index 000000000..1e9bbfc79 --- /dev/null +++ b/docs/source/providers/vector_io/inline_milvus.md @@ -0,0 +1,26 @@ +# inline::milvus + +## Description + + +Please refer to the remote provider documentation. + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `db_path` | `` | No | PydanticUndefined | | +| `kvstore` | `utils.kvstore.config.RedisKVStoreConfig \| utils.kvstore.config.SqliteKVStoreConfig \| utils.kvstore.config.PostgresKVStoreConfig \| utils.kvstore.config.MongoDBKVStoreConfig` | No | sqlite | | + +## Sample Configuration + +```yaml +db_path: ${env.MILVUS_DB_PATH:=~/.llama/dummy/milvus.db} +kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/${env.MILVUS_KVSTORE_DB_PATH:=~/.llama/dummy/milvus_registry.db} + +``` + diff --git a/docs/source/providers/vector_io/qdrant.md b/docs/source/providers/vector_io/inline_qdrant.md similarity index 83% rename from docs/source/providers/vector_io/qdrant.md rename to docs/source/providers/vector_io/inline_qdrant.md index 8b0cbeef8..63e2d81d8 100644 --- a/docs/source/providers/vector_io/qdrant.md +++ b/docs/source/providers/vector_io/inline_qdrant.md @@ -1,7 +1,7 @@ ---- -orphan: true ---- -# Qdrant +# inline::qdrant + +## Description + [Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It allows you to store and query vectors directly in memory. @@ -44,3 +44,18 @@ docker pull qdrant/qdrant ``` ## Documentation See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general. + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `path` | `` | No | PydanticUndefined | | + +## Sample Configuration + +```yaml +path: ${env.QDRANT_PATH:=~/.llama/~/.llama/dummy}/qdrant.db + +``` + diff --git a/docs/source/providers/vector_io/sqlite-vec.md b/docs/source/providers/vector_io/inline_sqlite-vec.md similarity index 95% rename from docs/source/providers/vector_io/sqlite-vec.md rename to docs/source/providers/vector_io/inline_sqlite-vec.md index 3c7c4cbee..fd3ec1dc4 100644 --- a/docs/source/providers/vector_io/sqlite-vec.md +++ b/docs/source/providers/vector_io/inline_sqlite-vec.md @@ -1,7 +1,7 @@ ---- -orphan: true ---- -# SQLite-Vec +# inline::sqlite-vec + +## Description + [SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It allows you to store and query vectors directly within an SQLite database. @@ -199,3 +199,18 @@ pip install sqlite-vec See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general. [^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759). + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `db_path` | `` | No | PydanticUndefined | | + +## Sample Configuration + +```yaml +db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db + +``` + diff --git a/docs/source/providers/vector_io/inline_sqlite_vec.md b/docs/source/providers/vector_io/inline_sqlite_vec.md new file mode 100644 index 000000000..e4b69c9ab --- /dev/null +++ b/docs/source/providers/vector_io/inline_sqlite_vec.md @@ -0,0 +1,25 @@ +# inline::sqlite_vec + +## Description + + +Please refer to the sqlite-vec provider documentation. + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `db_path` | `` | No | PydanticUndefined | | + +## Sample Configuration + +```yaml +db_path: ${env.SQLITE_STORE_DIR:=~/.llama/dummy}/sqlite_vec.db + +``` + +## Deprecation Notice + +⚠️ **Warning**: Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead. + diff --git a/docs/source/providers/vector_io/chromadb.md b/docs/source/providers/vector_io/remote_chromadb.md similarity index 75% rename from docs/source/providers/vector_io/chromadb.md rename to docs/source/providers/vector_io/remote_chromadb.md index 3f0c56f61..cc1dcc4d1 100644 --- a/docs/source/providers/vector_io/chromadb.md +++ b/docs/source/providers/vector_io/remote_chromadb.md @@ -1,7 +1,7 @@ ---- -orphan: true ---- -# Chroma +# remote::chromadb + +## Description + [Chroma](https://www.trychroma.com/) is an inline and remote vector database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. @@ -34,3 +34,18 @@ pip install chromadb ## Documentation See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `url` | `str \| None` | No | PydanticUndefined | | + +## Sample Configuration + +```yaml +url: ${env.CHROMADB_URL} + +``` + diff --git a/docs/source/providers/vector_io/milvus.md b/docs/source/providers/vector_io/remote_milvus.md similarity index 74% rename from docs/source/providers/vector_io/milvus.md rename to docs/source/providers/vector_io/remote_milvus.md index e030c85f8..9017f0e22 100644 --- a/docs/source/providers/vector_io/milvus.md +++ b/docs/source/providers/vector_io/remote_milvus.md @@ -1,7 +1,7 @@ ---- -orphan: true ---- -# Milvus +# remote::milvus + +## Description + [Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It allows you to store and query vectors directly within a Milvus database. @@ -96,7 +96,7 @@ vector_io: #### Key Parameters for TLS Configuration - **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`. -- **`server_pem_path`**: Path to the **server certificate** for verifying the server’s identity (used in one-way TLS). +- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS). - **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS). - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). - **`client_key_path`**: Path to the **client private key** file (required for mTLS). @@ -105,3 +105,24 @@ vector_io: See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md). + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `uri` | `` | No | PydanticUndefined | The URI of the Milvus server | +| `token` | `str \| None` | No | PydanticUndefined | The token of the Milvus server | +| `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | +| `config` | `dict` | No | {} | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | + +> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider. + +## Sample Configuration + +```yaml +uri: ${env.MILVUS_ENDPOINT} +token: ${env.MILVUS_TOKEN} + +``` + diff --git a/docs/source/providers/vector_io/pgvector.md b/docs/source/providers/vector_io/remote_pgvector.md similarity index 55% rename from docs/source/providers/vector_io/pgvector.md rename to docs/source/providers/vector_io/remote_pgvector.md index 070e2c16d..685b98f37 100644 --- a/docs/source/providers/vector_io/pgvector.md +++ b/docs/source/providers/vector_io/remote_pgvector.md @@ -1,7 +1,7 @@ ---- -orphan: true ---- -# Postgres PGVector +# remote::pgvector + +## Description + [PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It allows you to store and query vectors directly in memory. @@ -29,3 +29,26 @@ docker pull pgvector/pgvector:pg17 ``` ## Documentation See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `host` | `str \| None` | No | localhost | | +| `port` | `int \| None` | No | 5432 | | +| `db` | `str \| None` | No | postgres | | +| `user` | `str \| None` | No | postgres | | +| `password` | `str \| None` | No | mysecretpassword | | + +## Sample Configuration + +```yaml +host: ${env.PGVECTOR_HOST:=localhost} +port: ${env.PGVECTOR_PORT:=5432} +db: ${env.PGVECTOR_DB} +user: ${env.PGVECTOR_USER} +password: ${env.PGVECTOR_PASSWORD} + +``` + diff --git a/docs/source/providers/vector_io/remote_qdrant.md b/docs/source/providers/vector_io/remote_qdrant.md new file mode 100644 index 000000000..14c821f35 --- /dev/null +++ b/docs/source/providers/vector_io/remote_qdrant.md @@ -0,0 +1,30 @@ +# remote::qdrant + +## Description + + +Please refer to the inline provider documentation. + + +## Configuration + +| Field | Type | Required | Default | Description | +|-------|------|----------|---------|-------------| +| `location` | `str \| None` | No | | | +| `url` | `str \| None` | No | | | +| `port` | `int \| None` | No | 6333 | | +| `grpc_port` | `` | No | 6334 | | +| `prefer_grpc` | `` | No | False | | +| `https` | `bool \| None` | No | | | +| `api_key` | `str \| None` | No | | | +| `prefix` | `str \| None` | No | | | +| `timeout` | `int \| None` | No | | | +| `host` | `str \| None` | No | | | + +## Sample Configuration + +```yaml +api_key: ${env.QDRANT_API_KEY} + +``` + diff --git a/docs/source/providers/vector_io/weaviate.md b/docs/source/providers/vector_io/remote_weaviate.md similarity index 91% rename from docs/source/providers/vector_io/weaviate.md rename to docs/source/providers/vector_io/remote_weaviate.md index 78c0ddb5b..b7f811c35 100644 --- a/docs/source/providers/vector_io/weaviate.md +++ b/docs/source/providers/vector_io/remote_weaviate.md @@ -1,7 +1,7 @@ ---- -orphan: true ---- -# Weaviate +# remote::weaviate + +## Description + [Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack. It allows you to store and query vectors directly within a Weaviate database. @@ -31,3 +31,12 @@ To install Weaviate see the [Weaviate quickstart documentation](https://weaviate ## Documentation See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general. + + +## Sample Configuration + +```yaml +{} + +``` + diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index 221ed9027..efe8a98fe 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -141,6 +141,12 @@ Fully-qualified name of the module to import. The module is expected to have: provider_data_validator: str | None = Field( default=None, ) + description: str | None = Field( + default=None, + description=""" +A description of the provider. This is used to display in the documentation. +""", + ) @json_schema_type @@ -167,6 +173,12 @@ Fully-qualified name of the module to import. The module is expected to have: provider_data_validator: str | None = Field( default=None, ) + description: str | None = Field( + default=None, + description=""" +A description of the provider. This is used to display in the documentation. +""", + ) class RemoteProviderConfig(BaseModel): diff --git a/llama_stack/providers/inline/telemetry/meta_reference/config.py b/llama_stack/providers/inline/telemetry/meta_reference/config.py index 50dd8a788..1e4b0c070 100644 --- a/llama_stack/providers/inline/telemetry/meta_reference/config.py +++ b/llama_stack/providers/inline/telemetry/meta_reference/config.py @@ -38,7 +38,7 @@ class TelemetryConfig(BaseModel): description="List of telemetry sinks to enable (possible values: otel, sqlite, console)", ) sqlite_db_path: str = Field( - default=(RUNTIME_BASE_DIR / "trace_store.db").as_posix(), + default_factory=lambda: (RUNTIME_BASE_DIR / "trace_store.db").as_posix(), description="The path to the SQLite database to use for storing traces", ) diff --git a/llama_stack/providers/registry/agents.py b/llama_stack/providers/registry/agents.py index e47f84c65..834e81b96 100644 --- a/llama_stack/providers/registry/agents.py +++ b/llama_stack/providers/registry/agents.py @@ -35,5 +35,6 @@ def available_providers() -> list[ProviderSpec]: Api.tool_runtime, Api.tool_groups, ], + description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.", ), ] diff --git a/llama_stack/providers/registry/datasetio.py b/llama_stack/providers/registry/datasetio.py index 152cc9cb9..43cde83fb 100644 --- a/llama_stack/providers/registry/datasetio.py +++ b/llama_stack/providers/registry/datasetio.py @@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.inline.datasetio.localfs", config_class="llama_stack.providers.inline.datasetio.localfs.LocalFSDatasetIOConfig", api_dependencies=[], + description="Local filesystem-based dataset I/O provider for reading and writing datasets to local storage.", ), remote_provider_spec( api=Api.datasetio, @@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.remote.datasetio.huggingface", config_class="llama_stack.providers.remote.datasetio.huggingface.HuggingfaceDatasetIOConfig", + description="HuggingFace datasets provider for accessing and managing datasets from the HuggingFace Hub.", ), ), remote_provider_spec( @@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.remote.datasetio.nvidia", config_class="llama_stack.providers.remote.datasetio.nvidia.NvidiaDatasetIOConfig", + description="NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform.", ), ), ] diff --git a/llama_stack/providers/registry/eval.py b/llama_stack/providers/registry/eval.py index c9c29bbe0..9f0d17916 100644 --- a/llama_stack/providers/registry/eval.py +++ b/llama_stack/providers/registry/eval.py @@ -23,6 +23,7 @@ def available_providers() -> list[ProviderSpec]: Api.inference, Api.agents, ], + description="Meta's reference implementation of evaluation tasks with support for multiple languages and evaluation metrics.", ), remote_provider_spec( api=Api.eval, @@ -33,6 +34,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.remote.eval.nvidia", config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig", + description="NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform.", ), api_dependencies=[ Api.datasetio, diff --git a/llama_stack/providers/registry/files.py b/llama_stack/providers/registry/files.py index dc5443c3a..e894debaf 100644 --- a/llama_stack/providers/registry/files.py +++ b/llama_stack/providers/registry/files.py @@ -21,5 +21,6 @@ def available_providers() -> list[ProviderSpec]: pip_packages=sql_store_pip_packages, module="llama_stack.providers.inline.files.localfs", config_class="llama_stack.providers.inline.files.localfs.config.LocalfsFilesImplConfig", + description="Local filesystem-based file storage provider for managing files and documents locally.", ), ] diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 47be57eee..217870ec9 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -35,6 +35,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=META_REFERENCE_DEPS, module="llama_stack.providers.inline.inference.meta_reference", config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig", + description="Meta's reference implementation of inference with support for various model formats and optimization techniques.", ), InlineProviderSpec( api=Api.inference, @@ -44,6 +45,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.inline.inference.vllm", config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig", + description="vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.", ), InlineProviderSpec( api=Api.inference, @@ -54,6 +56,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.inline.inference.sentence_transformers", config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig", + description="Sentence Transformers inference provider for text embeddings and similarity search.", ), remote_provider_spec( api=Api.inference, @@ -64,6 +67,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.remote.inference.cerebras", config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig", + description="Cerebras inference provider for running models on Cerebras Cloud platform.", ), ), remote_provider_spec( @@ -73,6 +77,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["ollama", "aiohttp", "h11>=0.16.0"], config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", module="llama_stack.providers.remote.inference.ollama", + description="Ollama inference provider for running local models through the Ollama runtime.", ), ), remote_provider_spec( @@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["openai"], module="llama_stack.providers.remote.inference.vllm", config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig", + description="Remote vLLM inference provider for connecting to vLLM servers.", ), ), remote_provider_spec( @@ -91,6 +97,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["huggingface_hub", "aiohttp"], module="llama_stack.providers.remote.inference.tgi", config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig", + description="Text Generation Inference (TGI) provider for HuggingFace model serving.", ), ), remote_provider_spec( @@ -100,6 +107,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["huggingface_hub", "aiohttp"], module="llama_stack.providers.remote.inference.tgi", config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig", + description="HuggingFace Inference API serverless provider for on-demand model inference.", ), ), remote_provider_spec( @@ -109,6 +117,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["huggingface_hub", "aiohttp"], module="llama_stack.providers.remote.inference.tgi", config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig", + description="HuggingFace Inference Endpoints provider for dedicated model serving.", ), ), remote_provider_spec( @@ -121,6 +130,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.fireworks", config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig", provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator", + description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.", ), ), remote_provider_spec( @@ -133,6 +143,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.together", config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig", provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator", + description="Together AI inference provider for open-source models and collaborative AI development.", ), ), remote_provider_spec( @@ -142,6 +153,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["boto3"], module="llama_stack.providers.remote.inference.bedrock", config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig", + description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.", ), ), remote_provider_spec( @@ -153,6 +165,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.remote.inference.databricks", config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig", + description="Databricks inference provider for running models on Databricks' unified analytics platform.", ), ), remote_provider_spec( @@ -164,6 +177,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.remote.inference.nvidia", config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig", + description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.", ), ), remote_provider_spec( @@ -173,6 +187,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["openai"], module="llama_stack.providers.remote.inference.runpod", config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig", + description="RunPod inference provider for running models on RunPod's cloud GPU platform.", ), ), remote_provider_spec( @@ -183,6 +198,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.openai", config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig", provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator", + description="OpenAI inference provider for accessing GPT models and other OpenAI services.", ), ), remote_provider_spec( @@ -193,6 +209,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.anthropic", config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig", provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator", + description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.", ), ), remote_provider_spec( @@ -203,6 +220,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.gemini", config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig", provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator", + description="Google Gemini inference provider for accessing Gemini models and Google's AI services.", ), ), remote_provider_spec( @@ -213,6 +231,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.groq", config_class="llama_stack.providers.remote.inference.groq.GroqConfig", provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator", + description="Groq inference provider for ultra-fast inference using Groq's LPU technology.", ), ), remote_provider_spec( @@ -223,6 +242,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.fireworks_openai_compat", config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator", + description="Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.", ), ), remote_provider_spec( @@ -233,6 +253,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.llama_openai_compat", config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator", + description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.", ), ), remote_provider_spec( @@ -243,6 +264,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.together_openai_compat", config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator", + description="Together AI OpenAI-compatible provider for using Together models with OpenAI API format.", ), ), remote_provider_spec( @@ -253,6 +275,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.groq_openai_compat", config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator", + description="Groq OpenAI-compatible provider for using Groq models with OpenAI API format.", ), ), remote_provider_spec( @@ -263,6 +286,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.sambanova_openai_compat", config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator", + description="SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.", ), ), remote_provider_spec( @@ -273,6 +297,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.cerebras_openai_compat", config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator", + description="Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.", ), ), remote_provider_spec( @@ -283,6 +308,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.sambanova", config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig", provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator", + description="SambaNova inference provider for running models on SambaNova's dataflow architecture.", ), ), remote_provider_spec( @@ -293,6 +319,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.passthrough", config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig", provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator", + description="Passthrough inference provider for connecting to any external inference service not directly supported.", ), ), remote_provider_spec( @@ -303,6 +330,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.inference.watsonx", config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig", provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator", + description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.", ), ), ] diff --git a/llama_stack/providers/registry/post_training.py b/llama_stack/providers/registry/post_training.py index d752b8819..ffd64ef7c 100644 --- a/llama_stack/providers/registry/post_training.py +++ b/llama_stack/providers/registry/post_training.py @@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]: Api.datasetio, Api.datasets, ], + description="TorchTune-based post-training provider for fine-tuning and optimizing models using Meta's TorchTune framework.", ), InlineProviderSpec( api=Api.post_training, @@ -31,6 +32,7 @@ def available_providers() -> list[ProviderSpec]: Api.datasetio, Api.datasets, ], + description="HuggingFace-based post-training provider for fine-tuning models using the HuggingFace ecosystem.", ), remote_provider_spec( api=Api.post_training, @@ -39,6 +41,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["requests", "aiohttp"], module="llama_stack.providers.remote.post_training.nvidia", config_class="llama_stack.providers.remote.post_training.nvidia.NvidiaPostTrainingConfig", + description="NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform.", ), ), ] diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py index f0fe1e9f5..9dd791bd8 100644 --- a/llama_stack/providers/registry/safety.py +++ b/llama_stack/providers/registry/safety.py @@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.inline.safety.prompt_guard", config_class="llama_stack.providers.inline.safety.prompt_guard.PromptGuardConfig", + description="Prompt Guard safety provider for detecting and filtering unsafe prompts and content.", ), InlineProviderSpec( api=Api.safety, @@ -35,6 +36,7 @@ def available_providers() -> list[ProviderSpec]: api_dependencies=[ Api.inference, ], + description="Llama Guard safety provider for content moderation and safety filtering using Meta's Llama Guard model.", ), InlineProviderSpec( api=Api.safety, @@ -44,6 +46,7 @@ def available_providers() -> list[ProviderSpec]: ], module="llama_stack.providers.inline.safety.code_scanner", config_class="llama_stack.providers.inline.safety.code_scanner.CodeScannerConfig", + description="Code Scanner safety provider for detecting security vulnerabilities and unsafe code patterns.", ), remote_provider_spec( api=Api.safety, @@ -52,6 +55,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["boto3"], module="llama_stack.providers.remote.safety.bedrock", config_class="llama_stack.providers.remote.safety.bedrock.BedrockSafetyConfig", + description="AWS Bedrock safety provider for content moderation using AWS's safety services.", ), ), remote_provider_spec( @@ -61,6 +65,7 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["requests"], module="llama_stack.providers.remote.safety.nvidia", config_class="llama_stack.providers.remote.safety.nvidia.NVIDIASafetyConfig", + description="NVIDIA's safety provider for content moderation and safety filtering.", ), ), remote_provider_spec( @@ -71,6 +76,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.safety.sambanova", config_class="llama_stack.providers.remote.safety.sambanova.SambaNovaSafetyConfig", provider_data_validator="llama_stack.providers.remote.safety.sambanova.config.SambaNovaProviderDataValidator", + description="SambaNova's safety provider for content moderation and safety filtering.", ), ), ] diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py index 244b06842..79293d888 100644 --- a/llama_stack/providers/registry/scoring.py +++ b/llama_stack/providers/registry/scoring.py @@ -20,6 +20,7 @@ def available_providers() -> list[ProviderSpec]: Api.datasetio, Api.datasets, ], + description="Basic scoring provider for simple evaluation metrics and scoring functions.", ), InlineProviderSpec( api=Api.scoring, @@ -32,6 +33,7 @@ def available_providers() -> list[ProviderSpec]: Api.datasets, Api.inference, ], + description="LLM-as-judge scoring provider that uses language models to evaluate and score responses.", ), InlineProviderSpec( api=Api.scoring, @@ -44,5 +46,6 @@ def available_providers() -> list[ProviderSpec]: Api.datasets, ], provider_data_validator="llama_stack.providers.inline.scoring.braintrust.BraintrustProviderDataValidator", + description="Braintrust scoring provider for evaluation and scoring using the Braintrust platform.", ), ] diff --git a/llama_stack/providers/registry/telemetry.py b/llama_stack/providers/registry/telemetry.py index 14da06126..b50b422c1 100644 --- a/llama_stack/providers/registry/telemetry.py +++ b/llama_stack/providers/registry/telemetry.py @@ -24,5 +24,6 @@ def available_providers() -> list[ProviderSpec]: optional_api_dependencies=[Api.datasetio], module="llama_stack.providers.inline.telemetry.meta_reference", config_class="llama_stack.providers.inline.telemetry.meta_reference.config.TelemetryConfig", + description="Meta's reference implementation of telemetry and observability using OpenTelemetry.", ), ] diff --git a/llama_stack/providers/registry/tool_runtime.py b/llama_stack/providers/registry/tool_runtime.py index fa359f6b5..0dc880408 100644 --- a/llama_stack/providers/registry/tool_runtime.py +++ b/llama_stack/providers/registry/tool_runtime.py @@ -33,6 +33,7 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.inline.tool_runtime.rag", config_class="llama_stack.providers.inline.tool_runtime.rag.config.RagToolRuntimeConfig", api_dependencies=[Api.vector_io, Api.inference], + description="RAG (Retrieval-Augmented Generation) tool runtime for document ingestion, chunking, and semantic search.", ), remote_provider_spec( api=Api.tool_runtime, @@ -42,6 +43,7 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.remote.tool_runtime.brave_search.config.BraveSearchToolConfig", pip_packages=["requests"], provider_data_validator="llama_stack.providers.remote.tool_runtime.brave_search.BraveSearchToolProviderDataValidator", + description="Brave Search tool for web search capabilities with privacy-focused results.", ), ), remote_provider_spec( @@ -52,6 +54,7 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.remote.tool_runtime.bing_search.config.BingSearchToolConfig", pip_packages=["requests"], provider_data_validator="llama_stack.providers.remote.tool_runtime.bing_search.BingSearchToolProviderDataValidator", + description="Bing Search tool for web search capabilities using Microsoft's search engine.", ), ), remote_provider_spec( @@ -62,6 +65,7 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.remote.tool_runtime.tavily_search.config.TavilySearchToolConfig", pip_packages=["requests"], provider_data_validator="llama_stack.providers.remote.tool_runtime.tavily_search.TavilySearchToolProviderDataValidator", + description="Tavily Search tool for AI-optimized web search with structured results.", ), ), remote_provider_spec( @@ -72,6 +76,7 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.remote.tool_runtime.wolfram_alpha.config.WolframAlphaToolConfig", pip_packages=["requests"], provider_data_validator="llama_stack.providers.remote.tool_runtime.wolfram_alpha.WolframAlphaToolProviderDataValidator", + description="Wolfram Alpha tool for computational knowledge and mathematical calculations.", ), ), remote_provider_spec( @@ -82,6 +87,7 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig", pip_packages=["mcp"], provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator", + description="Model Context Protocol (MCP) tool for standardized tool calling and context management.", ), ), ] diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index 6f4366142..5e233b94c 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -25,6 +25,7 @@ def available_providers() -> list[ProviderSpec]: deprecation_warning="Please use the `inline::faiss` provider instead.", api_dependencies=[Api.inference], optional_api_dependencies=[Api.files], + description="Meta's reference implementation of a vector database.", ), InlineProviderSpec( api=Api.vector_io, @@ -34,6 +35,36 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.inline.vector_io.faiss.FaissVectorIOConfig", api_dependencies=[Api.inference], optional_api_dependencies=[Api.files], + description=""" +[Faiss](https://github.com/facebookresearch/faiss) is an inline vector database provider for Llama Stack. It +allows you to store and query vectors directly in memory. +That means you'll get fast and efficient vector retrieval. + +## Features + +- Lightweight and easy to use +- Fully integrated with Llama Stack +- GPU support + +## Usage + +To use Faiss in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use Faiss. +3. Start storing and querying vectors. + +## Installation + +You can install Faiss using pip: + +```bash +pip install faiss-cpu +``` +## Documentation +See [Faiss' documentation](https://faiss.ai/) or the [Faiss Wiki](https://github.com/facebookresearch/faiss/wiki) for +more details about Faiss in general. +""", ), # NOTE: sqlite-vec cannot be bundled into the container image because it does not have a # source distribution and the wheels are not available for all platforms. @@ -45,6 +76,204 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.inline.vector_io.sqlite_vec.SQLiteVectorIOConfig", api_dependencies=[Api.inference], optional_api_dependencies=[Api.files], + description=""" +[SQLite-Vec](https://github.com/asg017/sqlite-vec) is an inline vector database provider for Llama Stack. It +allows you to store and query vectors directly within an SQLite database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features + +- Lightweight and easy to use +- Fully integrated with Llama Stacks +- Uses disk-based storage for persistence, allowing for larger vector storage + +### Comparison to Faiss + +The choice between Faiss and sqlite-vec should be made based on the needs of your application, +as they have different strengths. + +#### Choosing the Right Provider + +Scenario | Recommended Tool | Reason +-- |-----------------| -- +Online Analytical Processing (OLAP) | Faiss | Fast, in-memory searches +Online Transaction Processing (OLTP) | sqlite-vec | Frequent writes and reads +Frequent writes | sqlite-vec | Efficient disk-based storage and incremental indexing +Large datasets | sqlite-vec | Disk-based storage for larger vector storage +Datasets that can fit in memory, frequent reads | Faiss | Optimized for speed, indexing, and GPU acceleration + +#### Empirical Example + +Consider the histogram below in which 10,000 randomly generated strings were inserted +in batches of 100 into both Faiss and sqlite-vec using `client.tool_runtime.rag_tool.insert()`. + +```{image} ../../../../_static/providers/vector_io/write_time_comparison_sqlite-vec-faiss.png +:alt: Comparison of SQLite-Vec and Faiss write times +:width: 400px +``` + +You will notice that the average write time for `sqlite-vec` was 788ms, compared to +47,640ms for Faiss. While the number is jarring, if you look at the distribution, you can see that it is rather +uniformly spread across the [1500, 100000] interval. + +Looking at each individual write in the order that the documents are inserted you'll see the increase in +write speed as Faiss reindexes the vectors after each write. +```{image} ../../../../_static/providers/vector_io/write_time_sequence_sqlite-vec-faiss.png +:alt: Comparison of SQLite-Vec and Faiss write times +:width: 400px +``` + +In comparison, the read times for Faiss was on average 10% faster than sqlite-vec. +The modes of the two distributions highlight the differences much further where Faiss +will likely yield faster read performance. + +```{image} ../../../../_static/providers/vector_io/read_time_comparison_sqlite-vec-faiss.png +:alt: Comparison of SQLite-Vec and Faiss read times +:width: 400px +``` + +## Usage + +To use sqlite-vec in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use SQLite-Vec. +3. Start storing and querying vectors. + +The SQLite-vec provider supports three search modes: + +1. **Vector Search** (`mode="vector"`): Performs pure vector similarity search using the embeddings. +2. **Keyword Search** (`mode="keyword"`): Performs full-text search using SQLite's FTS5. +3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword search for better results. First performs keyword search to get candidate matches, then applies vector similarity search on those candidates. + +Example with hybrid search: +```python +response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7}, +) + +# Using RRF ranker +response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={ + "mode": "hybrid", + "max_chunks": 3, + "score_threshold": 0.7, + "ranker": {"type": "rrf", "impact_factor": 60.0}, + }, +) + +# Using weighted ranker +response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={ + "mode": "hybrid", + "max_chunks": 3, + "score_threshold": 0.7, + "ranker": {"type": "weighted", "alpha": 0.7}, # 70% vector, 30% keyword + }, +) +``` + +Example with explicit vector search: +```python +response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7}, +) +``` + +Example with keyword search: +```python +response = await vector_io.query_chunks( + vector_db_id="my_db", + query="your query here", + params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7}, +) +``` + +## Supported Search Modes + +The SQLite vector store supports three search modes: + +1. **Vector Search** (`mode="vector"`): Uses vector similarity to find relevant chunks +2. **Keyword Search** (`mode="keyword"`): Uses keyword matching to find relevant chunks +3. **Hybrid Search** (`mode="hybrid"`): Combines both vector and keyword scores using a ranker + +### Hybrid Search + +Hybrid search combines the strengths of both vector and keyword search by: +- Computing vector similarity scores +- Computing keyword match scores +- Using a ranker to combine these scores + +Two ranker types are supported: + +1. **RRF (Reciprocal Rank Fusion)**: + - Combines ranks from both vector and keyword results + - Uses an impact factor (default: 60.0) to control the weight of higher-ranked results + - Good for balancing between vector and keyword results + - The default impact factor of 60.0 comes from the original RRF paper by Cormack et al. (2009) [^1], which found this value to provide optimal performance across various retrieval tasks + +2. **Weighted**: + - Linearly combines normalized vector and keyword scores + - Uses an alpha parameter (0-1) to control the blend: + - alpha=0: Only use keyword scores + - alpha=1: Only use vector scores + - alpha=0.5: Equal weight to both (default) + +Example using RAGQueryConfig with different search modes: + +```python +from llama_stack.apis.tools import RAGQueryConfig, RRFRanker, WeightedRanker + +# Vector search +config = RAGQueryConfig(mode="vector", max_chunks=5) + +# Keyword search +config = RAGQueryConfig(mode="keyword", max_chunks=5) + +# Hybrid search with custom RRF ranker +config = RAGQueryConfig( + mode="hybrid", + max_chunks=5, + ranker=RRFRanker(impact_factor=50.0), # Custom impact factor +) + +# Hybrid search with weighted ranker +config = RAGQueryConfig( + mode="hybrid", + max_chunks=5, + ranker=WeightedRanker(alpha=0.7), # 70% vector, 30% keyword +) + +# Hybrid search with default RRF ranker +config = RAGQueryConfig( + mode="hybrid", max_chunks=5 +) # Will use RRF with impact_factor=60.0 +``` + +Note: The ranker configuration is only used in hybrid mode. For vector or keyword modes, the ranker parameter is ignored. + +## Installation + +You can install SQLite-Vec using pip: + +```bash +pip install sqlite-vec +``` + +## Documentation + +See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) for more details about sqlite-vec in general. + +[^1]: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). [Reciprocal rank fusion outperforms condorcet and individual rank learning methods](https://dl.acm.org/doi/10.1145/1571941.1572114). In Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval (pp. 758-759). +""", ), InlineProviderSpec( api=Api.vector_io, @@ -55,6 +284,9 @@ def available_providers() -> list[ProviderSpec]: deprecation_warning="Please use the `inline::sqlite-vec` provider (notice the hyphen instead of underscore) instead.", api_dependencies=[Api.inference], optional_api_dependencies=[Api.files], + description=""" +Please refer to the sqlite-vec provider documentation. +""", ), remote_provider_spec( Api.vector_io, @@ -63,6 +295,39 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["chromadb-client"], module="llama_stack.providers.remote.vector_io.chroma", config_class="llama_stack.providers.remote.vector_io.chroma.ChromaVectorIOConfig", + description=""" +[Chroma](https://www.trychroma.com/) is an inline and remote vector +database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features +Chroma supports: +- Store embeddings and their metadata +- Vector search +- Full-text search +- Document storage +- Metadata filtering +- Multi-modal retrieval + +## Usage + +To use Chrome in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use chroma. +3. Start storing and querying vectors. + +## Installation + +You can install chroma using pip: + +```bash +pip install chromadb +``` + +## Documentation +See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. +""", ), api_dependencies=[Api.inference], ), @@ -73,6 +338,40 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.inline.vector_io.chroma", config_class="llama_stack.providers.inline.vector_io.chroma.ChromaVectorIOConfig", api_dependencies=[Api.inference], + description=""" +[Chroma](https://www.trychroma.com/) is an inline and remote vector +database provider for Llama Stack. It allows you to store and query vectors directly within a Chroma database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features +Chroma supports: +- Store embeddings and their metadata +- Vector search +- Full-text search +- Document storage +- Metadata filtering +- Multi-modal retrieval + +## Usage + +To use Chrome in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use chroma. +3. Start storing and querying vectors. + +## Installation + +You can install chroma using pip: + +```bash +pip install chromadb +``` + +## Documentation +See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introduction) for more details about Chroma in general. + +""", ), remote_provider_spec( Api.vector_io, @@ -81,6 +380,34 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["psycopg2-binary"], module="llama_stack.providers.remote.vector_io.pgvector", config_class="llama_stack.providers.remote.vector_io.pgvector.PGVectorVectorIOConfig", + description=""" +[PGVector](https://github.com/pgvector/pgvector) is a remote vector database provider for Llama Stack. It +allows you to store and query vectors directly in memory. +That means you'll get fast and efficient vector retrieval. + +## Features + +- Easy to use +- Fully integrated with Llama Stack + +## Usage + +To use PGVector in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use Faiss. +3. Start storing and querying vectors. + +## Installation + +You can install PGVector using docker: + +```bash +docker pull pgvector/pgvector:pg17 +``` +## Documentation +See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general. +""", ), api_dependencies=[Api.inference], ), @@ -92,6 +419,36 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.remote.vector_io.weaviate", config_class="llama_stack.providers.remote.vector_io.weaviate.WeaviateVectorIOConfig", provider_data_validator="llama_stack.providers.remote.vector_io.weaviate.WeaviateRequestProviderData", + description=""" +[Weaviate](https://weaviate.io/) is a vector database provider for Llama Stack. +It allows you to store and query vectors directly within a Weaviate database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features +Weaviate supports: +- Store embeddings and their metadata +- Vector search +- Full-text search +- Hybrid search +- Document storage +- Metadata filtering +- Multi-modal retrieval + +## Usage + +To use Weaviate in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use chroma. +3. Start storing and querying vectors. + +## Installation + +To install Weaviate see the [Weaviate quickstart documentation](https://weaviate.io/developers/weaviate/quickstart). + +## Documentation +See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more details about Weaviate in general. +""", ), api_dependencies=[Api.inference], ), @@ -102,6 +459,49 @@ def available_providers() -> list[ProviderSpec]: module="llama_stack.providers.inline.vector_io.qdrant", config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig", api_dependencies=[Api.inference], + description=r""" +[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It +allows you to store and query vectors directly in memory. +That means you'll get fast and efficient vector retrieval. + +> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in +> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative. +> +> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\] + + + +## Features + +- Lightweight and easy to use +- Fully integrated with Llama Stack +- Apache 2.0 license terms +- Store embeddings and their metadata +- Supports search by + [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/) + and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search +- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/) +- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/) +- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/) + +## Usage + +To use Qdrant in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use Qdrant. +3. Start storing and querying vectors. + +## Installation + +You can install Qdrant using docker: + +```bash +docker pull qdrant/qdrant +``` +## Documentation +See the [Qdrant documentation](https://qdrant.tech/documentation/) for more details about Qdrant in general. +""", ), remote_provider_spec( Api.vector_io, @@ -110,6 +510,9 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["qdrant-client"], module="llama_stack.providers.remote.vector_io.qdrant", config_class="llama_stack.providers.remote.vector_io.qdrant.QdrantVectorIOConfig", + description=""" +Please refer to the inline provider documentation. +""", ), api_dependencies=[Api.inference], ), @@ -120,6 +523,110 @@ def available_providers() -> list[ProviderSpec]: pip_packages=["pymilvus"], module="llama_stack.providers.remote.vector_io.milvus", config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig", + description=""" +[Milvus](https://milvus.io/) is an inline and remote vector database provider for Llama Stack. It +allows you to store and query vectors directly within a Milvus database. +That means you're not limited to storing vectors in memory or in a separate service. + +## Features + +- Easy to use +- Fully integrated with Llama Stack + +## Usage + +To use Milvus in your Llama Stack project, follow these steps: + +1. Install the necessary dependencies. +2. Configure your Llama Stack project to use Milvus. +3. Start storing and querying vectors. + +## Installation + +You can install Milvus using pymilvus: + +```bash +pip install pymilvus +``` + +## Configuration + +In Llama Stack, Milvus can be configured in two ways: +- **Inline (Local) Configuration** - Uses Milvus-Lite for local storage +- **Remote Configuration** - Connects to a remote Milvus server + +### Inline (Local) Configuration + +The simplest method is local configuration, which requires setting `db_path`, a path for locally storing Milvus-Lite files: + +```yaml +vector_io: + - provider_id: milvus + provider_type: inline::milvus + config: + db_path: ~/.llama/distributions/together/milvus_store.db +``` + +### Remote Configuration + +Remote configuration is suitable for larger data storage requirements: + +#### Standard Remote Connection + +```yaml +vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "http://:" + token: ":" +``` + +#### TLS-Enabled Remote Connection (One-way TLS) + +For connections to Milvus instances with one-way TLS enabled: + +```yaml +vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "https://:" + token: ":" + secure: True + server_pem_path: "/path/to/server.pem" +``` + +#### Mutual TLS (mTLS) Remote Connection + +For connections to Milvus instances with mutual TLS (mTLS) enabled: + +```yaml +vector_io: + - provider_id: milvus + provider_type: remote::milvus + config: + uri: "https://:" + token: ":" + secure: True + ca_pem_path: "/path/to/ca.pem" + client_pem_path: "/path/to/client.pem" + client_key_path: "/path/to/client.key" +``` + +#### Key Parameters for TLS Configuration + +- **`secure`**: Enables TLS encryption when set to `true`. Defaults to `false`. +- **`server_pem_path`**: Path to the **server certificate** for verifying the server's identity (used in one-way TLS). +- **`ca_pem_path`**: Path to the **Certificate Authority (CA) certificate** for validating the server certificate (required in mTLS). +- **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). +- **`client_key_path`**: Path to the **client private key** file (required for mTLS). + +## Documentation +See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. + +For more details on TLS configuration, refer to the [TLS setup guide](https://milvus.io/docs/tls.md). +""", ), api_dependencies=[Api.inference], ), @@ -131,5 +638,8 @@ def available_providers() -> list[ProviderSpec]: config_class="llama_stack.providers.inline.vector_io.milvus.MilvusVectorIOConfig", api_dependencies=[Api.inference], optional_api_dependencies=[Api.files], + description=""" +Please refer to the remote provider documentation. +""", ), ] diff --git a/llama_stack/providers/remote/vector_io/milvus/config.py b/llama_stack/providers/remote/vector_io/milvus/config.py index 9bdc7ed5c..b42233d6d 100644 --- a/llama_stack/providers/remote/vector_io/milvus/config.py +++ b/llama_stack/providers/remote/vector_io/milvus/config.py @@ -6,17 +6,19 @@ from typing import Any -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field from llama_stack.schema_utils import json_schema_type @json_schema_type class MilvusVectorIOConfig(BaseModel): - uri: str - token: str | None = None - consistency_level: str = "Strong" + uri: str = Field(description="The URI of the Milvus server") + token: str | None = Field(description="The token of the Milvus server") + consistency_level: str = Field(description="The consistency level of the Milvus server", default="Strong") + # This configuration allows additional fields to be passed through to the underlying Milvus client. + # See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. model_config = ConfigDict(extra="allow") @classmethod diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py new file mode 100755 index 000000000..eff04a40f --- /dev/null +++ b/scripts/provider_codegen.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import subprocess +import sys +from pathlib import Path +from typing import Any + +from rich.progress import Progress, SpinnerColumn, TextColumn + +from llama_stack.distribution.distribution import get_provider_registry + +REPO_ROOT = Path(__file__).parent.parent + + +class ChangedPathTracker: + """Track a list of paths we may have changed.""" + + def __init__(self): + self._changed_paths = [] + + def add_paths(self, *paths): + for path in paths: + path = str(path) + if path not in self._changed_paths: + self._changed_paths.append(path) + + def changed_paths(self): + return self._changed_paths + + +def get_config_class_info(config_class_path: str) -> dict[str, Any]: + """Extract configuration information from a config class.""" + try: + module_path, class_name = config_class_path.rsplit(".", 1) + module = __import__(module_path, fromlist=[class_name]) + config_class = getattr(module, class_name) + + docstring = config_class.__doc__ or "" + + accepts_extra_config = False + try: + schema = config_class.model_json_schema() + if schema.get("additionalProperties") is True: + accepts_extra_config = True + except Exception: + if hasattr(config_class, "model_config"): + model_config = config_class.model_config + if hasattr(model_config, "extra") and model_config.extra == "allow": + accepts_extra_config = True + elif isinstance(model_config, dict) and model_config.get("extra") == "allow": + accepts_extra_config = True + + fields_info = {} + if hasattr(config_class, "model_fields"): + for field_name, field in config_class.model_fields.items(): + field_type = str(field.annotation) if field.annotation else "Any" + field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "") + field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "") + field_type = field_type.replace("llama_stack.apis.inference.inference.", "") + field_type = field_type.replace("llama_stack.providers.", "") + + default_value = field.default + if field.default_factory is not None: + try: + default_value = field.default_factory() + # HACK ALERT: + # If the default value contains a path that looks like it came from RUNTIME_BASE_DIR, + # replace it with a generic ~/.llama/ path for documentation + if isinstance(default_value, str) and "/.llama/" in default_value: + if ".llama/" in default_value: + path_part = default_value.split(".llama/")[-1] + default_value = f"~/.llama/{path_part}" + except Exception: + default_value = "" + elif field.default is None: + default_value = "" + + field_info = { + "type": field_type, + "description": field.description or "", + "default": default_value, + "required": field.default is None and not field.is_required, + } + fields_info[field_name] = field_info + + if accepts_extra_config: + config_description = "Additional configuration options that will be forwarded to the underlying provider" + try: + import inspect + + source = inspect.getsource(config_class) + lines = source.split("\n") + + for i, line in enumerate(lines): + if "model_config" in line and "ConfigDict" in line and 'extra="allow"' in line: + comments = [] + for j in range(i - 1, -1, -1): + stripped = lines[j].strip() + if stripped.startswith("#"): + comments.append(stripped[1:].strip()) + elif stripped == "": + continue + else: + break + + if comments: + config_description = " ".join(reversed(comments)) + break + except Exception: + pass + + fields_info["config"] = { + "type": "dict", + "description": config_description, + "default": "{}", + "required": False, + } + + return { + "docstring": docstring, + "fields": fields_info, + "sample_config": getattr(config_class, "sample_run_config", None), + "accepts_extra_config": accepts_extra_config, + } + except Exception as e: + return { + "error": f"Failed to load config class {config_class_path}: {str(e)}", + "docstring": "", + "fields": {}, + "sample_config": None, + "accepts_extra_config": False, + } + + +def generate_provider_docs(provider_spec: Any, api_name: str) -> str: + """Generate markdown documentation for a provider.""" + provider_type = provider_spec.provider_type + config_class = provider_spec.config_class + + config_info = get_config_class_info(config_class) + + md_lines = [] + md_lines.append(f"# {provider_type}") + md_lines.append("") + + description = "" + if hasattr(provider_spec, "description") and provider_spec.description: + description = provider_spec.description + elif ( + hasattr(provider_spec, "adapter") + and hasattr(provider_spec.adapter, "description") + and provider_spec.adapter.description + ): + description = provider_spec.adapter.description + elif config_info.get("docstring"): + description = config_info["docstring"] + + if description: + md_lines.append("## Description") + md_lines.append("") + md_lines.append(description) + md_lines.append("") + + if config_info.get("fields"): + md_lines.append("## Configuration") + md_lines.append("") + md_lines.append("| Field | Type | Required | Default | Description |") + md_lines.append("|-------|------|----------|---------|-------------|") + + for field_name, field_info in config_info["fields"].items(): + field_type = field_info["type"].replace("|", "\\|") + required = "Yes" if field_info["required"] else "No" + default = str(field_info["default"]) if field_info["default"] is not None else "" + description = field_info["description"] or "" + + md_lines.append(f"| `{field_name}` | `{field_type}` | {required} | {default} | {description} |") + + md_lines.append("") + + if config_info.get("accepts_extra_config"): + md_lines.append( + "> **Note**: This configuration class accepts additional fields beyond those listed above. You can pass any additional configuration options that will be forwarded to the underlying provider." + ) + md_lines.append("") + + if config_info.get("sample_config"): + md_lines.append("## Sample Configuration") + md_lines.append("") + md_lines.append("```yaml") + try: + sample_config_func = config_info["sample_config"] + import inspect + + import yaml + + if sample_config_func is not None: + sig = inspect.signature(sample_config_func) + if "__distro_dir__" in sig.parameters: + sample_config = sample_config_func(__distro_dir__="~/.llama/dummy") + else: + sample_config = sample_config_func() + + def convert_pydantic_to_dict(obj): + if hasattr(obj, "model_dump"): + return obj.model_dump() + elif hasattr(obj, "dict"): + return obj.dict() + elif isinstance(obj, dict): + return {k: convert_pydantic_to_dict(v) for k, v in obj.items()} + elif isinstance(obj, list): + return [convert_pydantic_to_dict(item) for item in obj] + else: + return obj + + sample_config_dict = convert_pydantic_to_dict(sample_config) + md_lines.append(yaml.dump(sample_config_dict, default_flow_style=False, sort_keys=False)) + else: + md_lines.append("# No sample configuration available.") + except Exception as e: + md_lines.append(f"# Error generating sample config: {str(e)}") + md_lines.append("```") + md_lines.append("") + + if hasattr(provider_spec, "deprecation_warning") and provider_spec.deprecation_warning: + md_lines.append("## Deprecation Notice") + md_lines.append("") + md_lines.append(f"⚠️ **Warning**: {provider_spec.deprecation_warning}") + md_lines.append("") + + if hasattr(provider_spec, "deprecation_error") and provider_spec.deprecation_error: + md_lines.append("## Deprecation Error") + md_lines.append("") + md_lines.append(f"❌ **Error**: {provider_spec.deprecation_error}") + + return "\n".join(md_lines) + "\n" + + +def process_provider_registry(progress, change_tracker: ChangedPathTracker) -> None: + """Process the complete provider registry.""" + progress.print("Processing provider registry") + + try: + provider_registry = get_provider_registry() + + for api, providers in provider_registry.items(): + api_name = api.value + + doc_output_dir = REPO_ROOT / "docs" / "source" / "providers" / api_name + doc_output_dir.mkdir(parents=True, exist_ok=True) + change_tracker.add_paths(doc_output_dir) + + index_content = [] + index_content.append(f"# {api_name.title()} Providers") + index_content.append("") + index_content.append( + f"This section contains documentation for all available providers for the **{api_name}** API." + ) + index_content.append("") + + for provider_type, provider in sorted(providers.items()): + provider_doc_file = doc_output_dir / f"{provider_type.replace('::', '_').replace(':', '_')}.md" + + provider_docs = generate_provider_docs(provider, api_name) + + provider_doc_file.write_text(provider_docs) + change_tracker.add_paths(provider_doc_file) + + index_content.append(f"- [{provider_type}]({provider_doc_file.name})") + + index_file = doc_output_dir / "index.md" + index_file.write_text("\n".join(index_content)) + change_tracker.add_paths(index_file) + + except Exception as e: + progress.print(f"[red]Error processing provider registry: {str(e)}") + raise e + + +def check_for_changes(change_tracker: ChangedPathTracker) -> bool: + """Check if there are any uncommitted changes, including new files.""" + has_changes = False + for path in change_tracker.changed_paths(): + result = subprocess.run( + ["git", "diff", "--exit-code", path], + cwd=REPO_ROOT, + capture_output=True, + ) + if result.returncode != 0: + print(f"Change detected in '{path}'.", file=sys.stderr) + has_changes = True + status_result = subprocess.run( + ["git", "status", "--porcelain", path], + cwd=REPO_ROOT, + capture_output=True, + text=True, + ) + for line in status_result.stdout.splitlines(): + if line.startswith("??"): + print(f"New file detected: '{path}'.", file=sys.stderr) + has_changes = True + return has_changes + + +def main(): + change_tracker = ChangedPathTracker() + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + ) as progress: + task = progress.add_task("Processing provider registry...", total=1) + + process_provider_registry(progress, change_tracker) + progress.update(task, advance=1) + + if check_for_changes(change_tracker): + print( + "Provider documentation changes detected. Please commit the changes.", + file=sys.stderr, + ) + sys.exit(1) + + sys.exit(0) + + +if __name__ == "__main__": + main()