This commit is contained in:
Sai Soundararaj 2025-07-01 17:22:24 -07:00
parent bcdb6fcc15
commit 2788761f6e
5 changed files with 73 additions and 26 deletions

View file

@ -235,9 +235,9 @@ Before finalizing documentation, verify:
[x] 14. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring/scoring.py` - Scoring system
[x] 15. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/scoring_functions/scoring_functions.py` - Scoring function definitions
[x] 16. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/benchmarks/benchmarks.py` - Benchmarking framework
17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation
[x] 17. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/shields/shields.py` - Safety shields
[x] 18. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/batch_inference/batch_inference.py` - Batch inference operations
[x] 19. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py` - Data generation
20. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/telemetry/telemetry.py` - Telemetry and monitoring
21. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/providers/providers.py` - Provider management
22. `/Users/saip/Documents/GitHub/llama-stack/llama_stack/apis/inspect/inspect.py` - System inspection

View file

@ -4912,7 +4912,7 @@
"post": {
"responses": {
"200": {
"description": "OK",
"description": "Response containing filtered synthetic data samples and optional statistics",
"content": {
"application/json": {
"schema": {
@ -4937,7 +4937,7 @@
"tags": [
"SyntheticDataGeneration (Coming Soon)"
],
"description": "",
"description": "Generate synthetic data based on input dialogs and apply filtering.",
"parameters": [],
"requestBody": {
"content": {
@ -10888,9 +10888,9 @@
"tool",
"tool_group"
],
"title": "ResourceType",
"const": "shield",
"default": "shield"
"default": "shield",
"description": "The resource type, always shield"
},
"params": {
"type": "object",
@ -10915,7 +10915,8 @@
"type": "object"
}
]
}
},
"description": "(Optional) Configuration parameters for the shield"
}
},
"additionalProperties": false,
@ -10925,7 +10926,7 @@
"type"
],
"title": "Shield",
"description": "A safety shield resource that can be used to check content"
"description": "A safety shield resource that can be used to check content."
},
"Span": {
"type": "object",
@ -16334,7 +16335,8 @@
"type": "array",
"items": {
"$ref": "#/components/schemas/Message"
}
},
"description": "List of conversation messages to use as input for synthetic data generation"
},
"filtering_function": {
"type": "string",
@ -16346,11 +16348,11 @@
"top_k_top_p",
"sigmoid"
],
"title": "FilteringFunction",
"description": "The type of filtering function."
"description": "Type of filtering to apply to generated synthetic data samples"
},
"model": {
"type": "string"
"type": "string",
"description": "(Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint"
}
},
"additionalProperties": false,
@ -16389,7 +16391,8 @@
}
]
}
}
},
"description": "List of generated synthetic data samples that passed the filtering criteria"
},
"statistics": {
"type": "object",
@ -16414,7 +16417,8 @@
"type": "object"
}
]
}
},
"description": "(Optional) Statistical information about the generation process and filtering results"
}
},
"additionalProperties": false,

View file

@ -3475,7 +3475,8 @@ paths:
post:
responses:
'200':
description: OK
description: >-
Response containing filtered synthetic data samples and optional statistics
content:
application/json:
schema:
@ -3492,7 +3493,8 @@ paths:
$ref: '#/components/responses/DefaultError'
tags:
- SyntheticDataGeneration (Coming Soon)
description: ''
description: >-
Generate synthetic data based on input dialogs and apply filtering.
parameters: []
requestBody:
content:
@ -7851,9 +7853,9 @@ components:
- benchmark
- tool
- tool_group
title: ResourceType
const: shield
default: shield
description: The resource type, always shield
params:
type: object
additionalProperties:
@ -7864,6 +7866,8 @@ components:
- type: string
- type: array
- type: object
description: >-
(Optional) Configuration parameters for the shield
additionalProperties: false
required:
- identifier
@ -7871,7 +7875,7 @@ components:
- type
title: Shield
description: >-
A safety shield resource that can be used to check content
A safety shield resource that can be used to check content.
Span:
type: object
properties:
@ -11777,6 +11781,8 @@ components:
type: array
items:
$ref: '#/components/schemas/Message'
description: >-
List of conversation messages to use as input for synthetic data generation
filtering_function:
type: string
enum:
@ -11786,10 +11792,13 @@ components:
- top_p
- top_k_top_p
- sigmoid
title: FilteringFunction
description: The type of filtering function.
description: >-
Type of filtering to apply to generated synthetic data samples
model:
type: string
description: >-
(Optional) The identifier of the model to use. The model must be registered
with Llama Stack and available via the /models endpoint
additionalProperties: false
required:
- dialogs
@ -11810,6 +11819,8 @@ components:
- type: string
- type: array
- type: object
description: >-
List of generated synthetic data samples that passed the filtering criteria
statistics:
type: object
additionalProperties:
@ -11820,6 +11831,9 @@ components:
- type: string
- type: array
- type: object
description: >-
(Optional) Statistical information about the generation process and filtering
results
additionalProperties: false
required:
- synthetic_data

View file

@ -19,7 +19,11 @@ class CommonShieldFields(BaseModel):
@json_schema_type
class Shield(CommonShieldFields, Resource):
"""A safety shield resource that can be used to check content"""
"""A safety shield resource that can be used to check content.
:param params: (Optional) Configuration parameters for the shield
:param type: The resource type, always shield
"""
type: Literal[ResourceType.shield] = ResourceType.shield

View file

@ -14,7 +14,15 @@ from llama_stack.schema_utils import json_schema_type, webmethod
class FilteringFunction(Enum):
"""The type of filtering function."""
"""The type of filtering function.
:cvar none: No filtering applied, accept all generated synthetic data
:cvar random: Random sampling of generated data points
:cvar top_k: Keep only the top-k highest scoring synthetic data samples
:cvar top_p: Nucleus-style filtering, keep samples exceeding cumulative score threshold
:cvar top_k_top_p: Combined top-k and top-p filtering strategy
:cvar sigmoid: Apply sigmoid function for probability-based filtering
"""
none = "none"
random = "random"
@ -26,7 +34,12 @@ class FilteringFunction(Enum):
@json_schema_type
class SyntheticDataGenerationRequest(BaseModel):
"""Request to generate synthetic data. A small batch of prompts and a filtering function"""
"""Request to generate synthetic data. A small batch of prompts and a filtering function
:param dialogs: List of conversation messages to use as input for synthetic data generation
:param filtering_function: Type of filtering to apply to generated synthetic data samples
:param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
"""
dialogs: list[Message]
filtering_function: FilteringFunction = FilteringFunction.none
@ -35,7 +48,11 @@ class SyntheticDataGenerationRequest(BaseModel):
@json_schema_type
class SyntheticDataGenerationResponse(BaseModel):
"""Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."""
"""Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.
:param synthetic_data: List of generated synthetic data samples that passed the filtering criteria
:param statistics: (Optional) Statistical information about the generation process and filtering results
"""
synthetic_data: list[dict[str, Any]]
statistics: dict[str, Any] | None = None
@ -48,4 +65,12 @@ class SyntheticDataGeneration(Protocol):
dialogs: list[Message],
filtering_function: FilteringFunction = FilteringFunction.none,
model: str | None = None,
) -> SyntheticDataGenerationResponse: ...
) -> SyntheticDataGenerationResponse:
"""Generate synthetic data based on input dialogs and apply filtering.
:param dialogs: List of conversation messages to use as input for synthetic data generation
:param filtering_function: Type of filtering to apply to generated synthetic data samples
:param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
:returns: Response containing filtered synthetic data samples and optional statistics
"""
...