diff --git a/docs/source/concepts/apis.md b/docs/source/concepts/apis.md index 5a10d6498..f8f73a928 100644 --- a/docs/source/concepts/apis.md +++ b/docs/source/concepts/apis.md @@ -18,3 +18,4 @@ We are working on adding a few more APIs to complete the application lifecycle. - **Batch Inference**: run inference on a dataset of inputs - **Batch Agents**: run agents on a dataset of inputs - **Synthetic Data Generation**: generate synthetic data for model development +- **Batches**: OpenAI-compatible batch management for inference diff --git a/docs/source/providers/agents/index.md b/docs/source/providers/agents/index.md index 92bf9edc0..a2c48d4b9 100644 --- a/docs/source/providers/agents/index.md +++ b/docs/source/providers/agents/index.md @@ -2,6 +2,15 @@ ## Overview +Agents API for creating and interacting with agentic systems. + + Main functionalities provided by this API: + - Create agents with specific instructions and ability to use tools. + - Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn". + - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details). + - Agents can be provided with various shields (see the Safety API for more details). + - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details. + This section contains documentation for all available providers for the **agents** API. ## Providers diff --git a/docs/source/providers/batches/index.md b/docs/source/providers/batches/index.md index d2405ecf7..2a39a626c 100644 --- a/docs/source/providers/batches/index.md +++ b/docs/source/providers/batches/index.md @@ -2,6 +2,14 @@ ## Overview +Protocol for batch processing API operations. + + The Batches API enables efficient processing of multiple requests in a single operation, + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. + + Note: This API is currently under active development and may undergo changes. + This section contains documentation for all available providers for the **batches** API. ## Providers diff --git a/docs/source/providers/eval/index.md b/docs/source/providers/eval/index.md index d180d256c..a14fada1d 100644 --- a/docs/source/providers/eval/index.md +++ b/docs/source/providers/eval/index.md @@ -2,6 +2,8 @@ ## Overview +Llama Stack Evaluation API for running evaluations on model and agent candidates. + This section contains documentation for all available providers for the **eval** API. ## Providers diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md index 1c7bc86b9..cdde3a18a 100644 --- a/docs/source/providers/inference/index.md +++ b/docs/source/providers/inference/index.md @@ -2,6 +2,12 @@ ## Overview +Llama Stack Inference API for generating completions, chat completions, and embeddings. + + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + This section contains documentation for all available providers for the **inference** API. ## Providers diff --git a/llama_stack/apis/batches/batches.py b/llama_stack/apis/batches/batches.py index 72742d4fa..81ab44ccd 100644 --- a/llama_stack/apis/batches/batches.py +++ b/llama_stack/apis/batches/batches.py @@ -39,7 +39,14 @@ class ListBatchesResponse(BaseModel): @runtime_checkable class Batches(Protocol): - """Protocol for batch processing API operations.""" + """Protocol for batch processing API operations. + + The Batches API enables efficient processing of multiple requests in a single operation, + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. + + Note: This API is currently under active development and may undergo changes. + """ @webmethod(route="/openai/v1/batches", method="POST") async def create_batch( diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py index 84c45fe27..beaeeae38 100755 --- a/scripts/provider_codegen.py +++ b/scripts/provider_codegen.py @@ -18,6 +18,23 @@ from llama_stack.core.distribution import get_provider_registry REPO_ROOT = Path(__file__).parent.parent +def get_api_docstring(api_name: str) -> str | None: + """Extract docstring from the API protocol class.""" + try: + # Import the API module dynamically + api_module = __import__(f"llama_stack.apis.{api_name}", fromlist=[api_name.title()]) + + # Get the main protocol class (usually capitalized API name) + protocol_class_name = api_name.title() + if hasattr(api_module, protocol_class_name): + protocol_class = getattr(api_module, protocol_class_name) + return protocol_class.__doc__ + except (ImportError, AttributeError): + pass + + return None + + class ChangedPathTracker: """Track a list of paths we may have changed.""" @@ -261,6 +278,11 @@ def process_provider_registry(progress, change_tracker: ChangedPathTracker) -> N index_content.append(f"# {api_name.title()}\n") index_content.append("## Overview\n") + api_docstring = get_api_docstring(api_name) + if api_docstring: + cleaned_docstring = api_docstring.strip() + index_content.append(f"{cleaned_docstring}\n") + index_content.append( f"This section contains documentation for all available providers for the **{api_name}** API.\n" )