diff --git a/api_update_plan.md b/api_update_plan.md index ec93f1440..026156bbd 100644 --- a/api_update_plan.md +++ b/api_update_plan.md @@ -235,9 +235,9 @@ Before finalizing documentation, verify: [x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system [x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions [x] 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework -17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields -18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations -19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation +[x] 17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields +[x] 18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations +[x] 19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation 20. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/telemetry/telemetry.py` - Telemetry and monitoring 21. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/providers/providers.py` - Provider management 22. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/inspect/inspect.py` - System inspection diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index c28135dd2..6b59a3e25 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -4912,7 +4912,7 @@ "post": { "responses": { "200": { - "description": "OK", + "description": "Response containing filtered synthetic data samples and optional statistics", "content": { "application/json": { "schema": { @@ -4937,7 +4937,7 @@ "tags": [ "SyntheticDataGeneration (Coming Soon)" ], - "description": "", + "description": "Generate synthetic data based on input dialogs and apply filtering.", "parameters": [], "requestBody": { "content": { @@ -10888,9 +10888,9 @@ "tool", "tool_group" ], - "title": "ResourceType", "const": "shield", - "default": "shield" + "default": "shield", + "description": "The resource type, always shield" }, "params": { "type": "object", @@ -10915,7 +10915,8 @@ "type": "object" } ] - } + }, + "description": "(Optional) Configuration parameters for the shield" } }, "additionalProperties": false, @@ -10925,7 +10926,7 @@ "type" ], "title": "Shield", - "description": "A safety shield resource that can be used to check content" + "description": "A safety shield resource that can be used to check content." }, "Span": { "type": "object", @@ -16334,7 +16335,8 @@ "type": "array", "items": { "$ref": "#/components/schemas/Message" - } + }, + "description": "List of conversation messages to use as input for synthetic data generation" }, "filtering_function": { "type": "string", @@ -16346,11 +16348,11 @@ "top_k_top_p", "sigmoid" ], - "title": "FilteringFunction", - "description": "The type of filtering function." + "description": "Type of filtering to apply to generated synthetic data samples" }, "model": { - "type": "string" + "type": "string", + "description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint" } }, "additionalProperties": false, @@ -16389,7 +16391,8 @@ } ] } - } + }, + "description": "List of generated synthetic data samples that passed the filtering criteria" }, "statistics": { "type": "object", @@ -16414,7 +16417,8 @@ "type": "object" } ] - } + }, + "description": "(Optional) Statistical information about the generation process and filtering results" } }, "additionalProperties": false, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 995ff8601..0c576b7c9 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -3475,7 +3475,8 @@ paths: post: responses: '200': - description: OK + description: >- + Response containing filtered synthetic data samples and optional statistics content: application/json: schema: @@ -3492,7 +3493,8 @@ paths: $ref: '#/components/responses/DefaultError' tags: - SyntheticDataGeneration (Coming Soon) - description: '' + description: >- + Generate synthetic data based on input dialogs and apply filtering. parameters: [] requestBody: content: @@ -7851,9 +7853,9 @@ components: - benchmark - tool - tool_group - title: ResourceType const: shield default: shield + description: The resource type, always shield params: type: object additionalProperties: @@ -7864,6 +7866,8 @@ components: - type: string - type: array - type: object + description: >- + (Optional) Configuration parameters for the shield additionalProperties: false required: - identifier @@ -7871,7 +7875,7 @@ components: - type title: Shield description: >- - A safety shield resource that can be used to check content + A safety shield resource that can be used to check content. Span: type: object properties: @@ -11777,6 +11781,8 @@ components: type: array items: $ref: '#/components/schemas/Message' + description: >- + List of conversation messages to use as input for synthetic data generation filtering_function: type: string enum: @@ -11786,10 +11792,13 @@ components: - top_p - top_k_top_p - sigmoid - title: FilteringFunction - description: The type of filtering function. + description: >- + Type of filtering to apply to generated synthetic data samples model: type: string + description: >- + (Optional) The identifier of the model to use. The model must be registered + with Llama Stack and available via the /models endpoint additionalProperties: false required: - dialogs @@ -11810,6 +11819,8 @@ components: - type: string - type: array - type: object + description: >- + List of generated synthetic data samples that passed the filtering criteria statistics: type: object additionalProperties: @@ -11820,6 +11831,9 @@ components: - type: string - type: array - type: object + description: >- + (Optional) Statistical information about the generation process and filtering + results additionalProperties: false required: - synthetic_data diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py index ce1f73d8e..5d3e55c55 100644 --- a/llama_stack/apis/shields/shields.py +++ b/llama_stack/apis/shields/shields.py @@ -19,7 +19,11 @@ class CommonShieldFields(BaseModel): @json_schema_type class Shield(CommonShieldFields, Resource): - """A safety shield resource that can be used to check content""" + """A safety shield resource that can be used to check content. + + :param params: (Optional) Configuration parameters for the shield + :param type: The resource type, always shield + """ type: Literal[ResourceType.shield] = ResourceType.shield diff --git a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py index 91e550da9..a7af44b28 100644 --- a/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py +++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py @@ -14,7 +14,15 @@ from llama_stack.schema_utils import json_schema_type, webmethod class FilteringFunction(Enum): - """The type of filtering function.""" + """The type of filtering function. + + :cvar none: No filtering applied, accept all generated synthetic data + :cvar random: Random sampling of generated data points + :cvar top_k: Keep only the top-k highest scoring synthetic data samples + :cvar top_p: Nucleus-style filtering, keep samples exceeding cumulative score threshold + :cvar top_k_top_p: Combined top-k and top-p filtering strategy + :cvar sigmoid: Apply sigmoid function for probability-based filtering + """ none = "none" random = "random" @@ -26,7 +34,12 @@ class FilteringFunction(Enum): @json_schema_type class SyntheticDataGenerationRequest(BaseModel): - """Request to generate synthetic data. A small batch of prompts and a filtering function""" + """Request to generate synthetic data. A small batch of prompts and a filtering function + + :param dialogs: List of conversation messages to use as input for synthetic data generation + :param filtering_function: Type of filtering to apply to generated synthetic data samples + :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint + """ dialogs: list[Message] filtering_function: FilteringFunction = FilteringFunction.none @@ -35,7 +48,11 @@ class SyntheticDataGenerationRequest(BaseModel): @json_schema_type class SyntheticDataGenerationResponse(BaseModel): - """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.""" + """Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold. + + :param synthetic_data: List of generated synthetic data samples that passed the filtering criteria + :param statistics: (Optional) Statistical information about the generation process and filtering results + """ synthetic_data: list[dict[str, Any]] statistics: dict[str, Any] | None = None @@ -48,4 +65,12 @@ class SyntheticDataGeneration(Protocol): dialogs: list[Message], filtering_function: FilteringFunction = FilteringFunction.none, model: str | None = None, - ) -> SyntheticDataGenerationResponse: ... + ) -> SyntheticDataGenerationResponse: + """Generate synthetic data based on input dialogs and apply filtering. + + :param dialogs: List of conversation messages to use as input for synthetic data generation + :param filtering_function: Type of filtering to apply to generated synthetic data samples + :param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint + :returns: Response containing filtered synthetic data samples and optional statistics + """ + ...