a

2025-07-29 15:23:51 +00:00 · 2025-07-01 17:22:24 -07:00 · 2025-07-01 17:22:24 -07:00 · 2788761f6e
commit 2788761f6e
parent bcdb6fcc15
5 changed files with 73 additions and 26 deletions
--- a/api_update_plan.md
+++ b/api_update_plan.md
@ -235,9 +235,9 @@ Before finalizing documentation, verify:
 [x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
 [x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
 [x] 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
-17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
-18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
-19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation
+[x] 17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
+[x] 18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
+[x] 19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation
 20. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/telemetry/telemetry.py` - Telemetry and monitoring
 21. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/providers/providers.py` - Provider management
 22. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/inspect/inspect.py` - System inspection
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -4912,7 +4912,7 @@
            "post": {
                "responses": {
                    "200": {
-                        "description": "OK",
+                        "description": "Response containing filtered synthetic data samples and optional statistics",
                        "content": {
                            "application/json": {
                                "schema": {
@ -4937,7 +4937,7 @@
                "tags": [
                    "SyntheticDataGeneration (Coming Soon)"
                ],
-                "description": "",
+                "description": "Generate synthetic data based on input dialogs and apply filtering.",
                "parameters": [],
                "requestBody": {
                    "content": {
@ -10888,9 +10888,9 @@
                            "tool",
                            "tool_group"
                        ],
-                        "title": "ResourceType",
                        "const": "shield",
-                        "default": "shield"
+                        "default": "shield",
+                        "description": "The resource type, always shield"
                    },
                    "params": {
                        "type": "object",
@ -10915,7 +10915,8 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
+                        "description": "(Optional) Configuration parameters for the shield"
                    }
                },
                "additionalProperties": false,
@ -10925,7 +10926,7 @@
                    "type"
                ],
                "title": "Shield",
-                "description": "A safety shield resource that can be used to check content"
+                "description": "A safety shield resource that can be used to check content."
            },
            "Span": {
                "type": "object",
@ -16334,7 +16335,8 @@
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/Message"
-                        }
+                        },
+                        "description": "List of conversation messages to use as input for synthetic data generation"
                    },
                    "filtering_function": {
                        "type": "string",
@ -16346,11 +16348,11 @@
                            "top_k_top_p",
                            "sigmoid"
                        ],
-                        "title": "FilteringFunction",
-                        "description": "The type of filtering function."
+                        "description": "Type of filtering to apply to generated synthetic data samples"
                    },
                    "model": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint"
                    }
                },
                "additionalProperties": false,
@ -16389,7 +16391,8 @@
                                    }
                                ]
                            }
-                        }
+                        },
+                        "description": "List of generated synthetic data samples that passed the filtering criteria"
                    },
                    "statistics": {
                        "type": "object",
@ -16414,7 +16417,8 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
+                        "description": "(Optional) Statistical information about the generation process and filtering results"
                    }
                },
                "additionalProperties": false,
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -3475,7 +3475,8 @@ paths:
    post:
      responses:
        '200':
-          description: OK
+          description: >-
+            Response containing filtered synthetic data samples and optional statistics
          content:
            application/json:
              schema:
@ -3492,7 +3493,8 @@ paths:
          $ref: '#/components/responses/DefaultError'
      tags:
        - SyntheticDataGeneration (Coming Soon)
-      description: ''
+      description: >-
+        Generate synthetic data based on input dialogs and apply filtering.
      parameters: []
      requestBody:
        content:
@ -7851,9 +7853,9 @@ components:
            - benchmark
            - tool
            - tool_group
-          title: ResourceType
          const: shield
          default: shield
+          description: The resource type, always shield
        params:
          type: object
          additionalProperties:
@ -7864,6 +7866,8 @@ components:
              - type: string
              - type: array
              - type: object
+          description: >-
+            (Optional) Configuration parameters for the shield
      additionalProperties: false
      required:
        - identifier
@ -7871,7 +7875,7 @@ components:
        - type
      title: Shield
      description: >-
-        A safety shield resource that can be used to check content
+        A safety shield resource that can be used to check content.
    Span:
      type: object
      properties:
@ -11777,6 +11781,8 @@ components:
          type: array
          items:
            $ref: '#/components/schemas/Message'
+          description: >-
+            List of conversation messages to use as input for synthetic data generation
        filtering_function:
          type: string
          enum:
@ -11786,10 +11792,13 @@ components:
            - top_p
            - top_k_top_p
            - sigmoid
-          title: FilteringFunction
-          description: The type of filtering function.
+          description: >-
+            Type of filtering to apply to generated synthetic data samples
        model:
          type: string
+          description: >-
+            (Optional) The identifier of the model to use. The model must be registered
+            with Llama Stack and available via the /models endpoint
      additionalProperties: false
      required:
        - dialogs
@ -11810,6 +11819,8 @@ components:
                - type: string
                - type: array
                - type: object
+          description: >-
+            List of generated synthetic data samples that passed the filtering criteria
        statistics:
          type: object
          additionalProperties:
@ -11820,6 +11831,9 @@ components:
              - type: string
              - type: array
              - type: object
+          description: >-
+            (Optional) Statistical information about the generation process and filtering
+            results
      additionalProperties: false
      required:
        - synthetic_data
--- a/llama_stack/apis/shields/shields.py
+++ b/llama_stack/apis/shields/shields.py
@ -19,7 +19,11 @@ class CommonShieldFields(BaseModel):

@json_schema_type
 class Shield(CommonShieldFields, Resource):
-    """A safety shield resource that can be used to check content"""
+    """A safety shield resource that can be used to check content.
+
+    :param params: (Optional) Configuration parameters for the shield
+    :param type: The resource type, always shield
+    """

    type: Literal[ResourceType.shield] = ResourceType.shield

--- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
+++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py
@ -14,7 +14,15 @@ from llama_stack.schema_utils import json_schema_type, webmethod


 class FilteringFunction(Enum):
-    """The type of filtering function."""
+    """The type of filtering function.
+
+    :cvar none: No filtering applied, accept all generated synthetic data
+    :cvar random: Random sampling of generated data points
+    :cvar top_k: Keep only the top-k highest scoring synthetic data samples
+    :cvar top_p: Nucleus-style filtering, keep samples exceeding cumulative score threshold
+    :cvar top_k_top_p: Combined top-k and top-p filtering strategy
+    :cvar sigmoid: Apply sigmoid function for probability-based filtering
+    """

    none = "none"
    random = "random"
@ -26,7 +34,12 @@ class FilteringFunction(Enum):

@json_schema_type
 class SyntheticDataGenerationRequest(BaseModel):
-    """Request to generate synthetic data. A small batch of prompts and a filtering function"""
+    """Request to generate synthetic data. A small batch of prompts and a filtering function
+
+    :param dialogs: List of conversation messages to use as input for synthetic data generation
+    :param filtering_function: Type of filtering to apply to generated synthetic data samples
+    :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
+    """

    dialogs: list[Message]
    filtering_function: FilteringFunction = FilteringFunction.none
@ -35,7 +48,11 @@ class SyntheticDataGenerationRequest(BaseModel):

@json_schema_type
 class SyntheticDataGenerationResponse(BaseModel):
-    """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
+    """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.
+
+    :param synthetic_data: List of generated synthetic data samples that passed the filtering criteria
+    :param statistics: (Optional) Statistical information about the generation process and filtering results
+    """

    synthetic_data: list[dict[str, Any]]
    statistics: dict[str, Any] | None = None
@ -48,4 +65,12 @@ class SyntheticDataGeneration(Protocol):
        dialogs: list[Message],
        filtering_function: FilteringFunction = FilteringFunction.none,
        model: str | None = None,
-    ) -> SyntheticDataGenerationResponse: ...
+    ) -> SyntheticDataGenerationResponse:
+        """Generate synthetic data based on input dialogs and apply filtering.
+
+        :param dialogs: List of conversation messages to use as input for synthetic data generation
+        :param filtering_function: Type of filtering to apply to generated synthetic data samples
+        :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
+        :returns: Response containing filtered synthetic data samples and optional statistics
+        """
+        ...