Update API docs

2025-12-23 05:22:26 +00:00 · 2025-07-02 11:34:34 -07:00 · 2025-07-02 11:34:34 -07:00 · 8f96b61c43
commit 8f96b61c43
parent 4d0d2d685f
26 changed files with 1397 additions and 32 deletions
--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@ -14,7 +14,15 @@ from llama_stack.schema_utils import json_schema_type, webmethod


 class FilteringFunction(Enum):
-    """The type of filtering function."""
+    """The type of filtering function.
+
+    :cvar none: No filtering applied, accept all generated synthetic data
+    :cvar random: Random sampling of generated data points
+    :cvar top_k: Keep only the top-k highest scoring synthetic data samples
+    :cvar top_p: Nucleus-style filtering, keep samples exceeding cumulative score threshold
+    :cvar top_k_top_p: Combined top-k and top-p filtering strategy
+    :cvar sigmoid: Apply sigmoid function for probability-based filtering
+    """

    none = "none"
    random = "random"
@ -26,7 +34,12 @@ class FilteringFunction(Enum):

@json_schema_type
 class SyntheticDataGenerationRequest(BaseModel):
-    """Request to generate synthetic data. A small batch of prompts and a filtering function"""
+    """Request to generate synthetic data. A small batch of prompts and a filtering function
+
+    :param dialogs: List of conversation messages to use as input for synthetic data generation
+    :param filtering_function: Type of filtering to apply to generated synthetic data samples
+    :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
+    """

    dialogs: list[Message]
    filtering_function: FilteringFunction = FilteringFunction.none
@ -35,7 +48,11 @@ class SyntheticDataGenerationRequest(BaseModel):

@json_schema_type
 class SyntheticDataGenerationResponse(BaseModel):
-    """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
+    """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.
+
+    :param synthetic_data: List of generated synthetic data samples that passed the filtering criteria
+    :param statistics: (Optional) Statistical information about the generation process and filtering results
+    """

    synthetic_data: list[dict[str, Any]]
    statistics: dict[str, Any] | None = None
@ -48,4 +65,12 @@ class SyntheticDataGeneration(Protocol):
        dialogs: list[Message],
        filtering_function: FilteringFunction = FilteringFunction.none,
        model: str | None = None,
-    ) -> SyntheticDataGenerationResponse: ...
+    ) -> SyntheticDataGenerationResponse:
+        """Generate synthetic data based on input dialogs and apply filtering.
+
+        :param dialogs: List of conversation messages to use as input for synthetic data generation
+        :param filtering_function: Type of filtering to apply to generated synthetic data samples
+        :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
+        :returns: Response containing filtered synthetic data samples and optional statistics
+        """
+        ...