mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-11 13:44:38 +00:00
fixes to reward stuff
This commit is contained in:
parent
eb12bfbef0
commit
956f07b04c
3 changed files with 461 additions and 11 deletions
|
@ -750,6 +750,70 @@ components:
|
|||
- tool_calls
|
||||
- tool_responses
|
||||
type: object
|
||||
RewardScoringRequest:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
model:
|
||||
type: string
|
||||
prompt_generations:
|
||||
items:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
generation:
|
||||
$ref: '#/components/schemas/Message'
|
||||
message_history:
|
||||
items:
|
||||
$ref: '#/components/schemas/Message'
|
||||
type: array
|
||||
prompt:
|
||||
$ref: '#/components/schemas/Message'
|
||||
required:
|
||||
- prompt
|
||||
- message_history
|
||||
- generation
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- prompt_generations
|
||||
- model
|
||||
title: Request to score a reward function. A list of prompts and a list of responses
|
||||
per prompt.
|
||||
type: object
|
||||
RewardScoringResponse:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
scored_generations:
|
||||
items:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
prompt_generation:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
generation:
|
||||
$ref: '#/components/schemas/Message'
|
||||
message_history:
|
||||
items:
|
||||
$ref: '#/components/schemas/Message'
|
||||
type: array
|
||||
prompt:
|
||||
$ref: '#/components/schemas/Message'
|
||||
required:
|
||||
- prompt
|
||||
- message_history
|
||||
- generation
|
||||
type: object
|
||||
score:
|
||||
type: number
|
||||
required:
|
||||
- prompt_generation
|
||||
- score
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- scored_generations
|
||||
title: Response from the reward scoring. Batch of (prompt, response, score)
|
||||
tuples that pass the threshold.
|
||||
type: object
|
||||
ShieldConfig:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
|
@ -774,6 +838,53 @@ components:
|
|||
- shield_type
|
||||
- params
|
||||
type: object
|
||||
SyntheticDataGenerationRequest:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
filtering_function:
|
||||
default: none
|
||||
enum:
|
||||
- none
|
||||
- random
|
||||
- top_k
|
||||
- top_p
|
||||
- top_k_top_p
|
||||
- sigmoid
|
||||
title: The type of filtering function.
|
||||
type: string
|
||||
prompts:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- prompts
|
||||
- filtering_function
|
||||
title: Request to generate synthetic data. A small batch of prompts and a filtering
|
||||
function
|
||||
type: object
|
||||
SyntheticDataGenerationResponse:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
statistics:
|
||||
additionalProperties:
|
||||
type: number
|
||||
type: object
|
||||
synthetic_data:
|
||||
items:
|
||||
maxItems: 3
|
||||
minItems: 3
|
||||
prefixItems:
|
||||
- type: string
|
||||
- type: string
|
||||
- type: number
|
||||
type: array
|
||||
type: array
|
||||
required:
|
||||
- synthetic_data
|
||||
- statistics
|
||||
title: Response from the synthetic data generation. Batch of (prompt, response,
|
||||
score) tuples that pass the threshold.
|
||||
type: object
|
||||
URL:
|
||||
format: uri
|
||||
pattern: ^(https?://|file://|data:)
|
||||
|
@ -878,13 +989,51 @@ paths:
|
|||
description: Normal completion response. **OR** streamed completion response.
|
||||
tags:
|
||||
- Inference
|
||||
/reward_scoring/score:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RewardScoringRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RewardScoringResponse'
|
||||
description: OK
|
||||
tags:
|
||||
- RewardScoring
|
||||
/synthetic_data_generation/generate:
|
||||
post:
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SyntheticDataGenerationRequest'
|
||||
required: true
|
||||
responses:
|
||||
'200':
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/SyntheticDataGenerationResponse'
|
||||
description: OK
|
||||
tags:
|
||||
- SyntheticDataGeneration
|
||||
security:
|
||||
- Default: []
|
||||
servers:
|
||||
- url: http://llama.meta.com
|
||||
tags:
|
||||
- name: AgenticSystem
|
||||
- name: RewardScoring
|
||||
- name: Inference
|
||||
- name: SyntheticDataGeneration
|
||||
- name: AgenticSystem
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/ShieldConfig" />
|
||||
name: ShieldConfig
|
||||
- description: <SchemaDefinition schemaRef="#/components/schemas/AgenticSystemCreateRequest"
|
||||
|
@ -952,11 +1101,39 @@ tags:
|
|||
<SchemaDefinition schemaRef="#/components/schemas/CompletionResponseStreamChunk"
|
||||
/>'
|
||||
name: CompletionResponseStreamChunk
|
||||
- description: 'Request to generate synthetic data. A small batch of prompts and a
|
||||
filtering function
|
||||
|
||||
|
||||
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationRequest"
|
||||
/>'
|
||||
name: SyntheticDataGenerationRequest
|
||||
- description: 'Response from the synthetic data generation. Batch of (prompt, response,
|
||||
score) tuples that pass the threshold.
|
||||
|
||||
|
||||
<SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
|
||||
/>'
|
||||
name: SyntheticDataGenerationResponse
|
||||
- description: 'Request to score a reward function. A list of prompts and a list of
|
||||
responses per prompt.
|
||||
|
||||
|
||||
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringRequest" />'
|
||||
name: RewardScoringRequest
|
||||
- description: 'Response from the reward scoring. Batch of (prompt, response, score)
|
||||
tuples that pass the threshold.
|
||||
|
||||
|
||||
<SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
|
||||
name: RewardScoringResponse
|
||||
x-tagGroups:
|
||||
- name: Operations
|
||||
tags:
|
||||
- AgenticSystem
|
||||
- Inference
|
||||
- RewardScoring
|
||||
- SyntheticDataGeneration
|
||||
- name: Types
|
||||
tags:
|
||||
- AgenticSystemCreateRequest
|
||||
|
@ -973,5 +1150,9 @@ x-tagGroups:
|
|||
- CompletionResponse
|
||||
- CompletionResponseStreamChunk
|
||||
- Message
|
||||
- RewardScoringRequest
|
||||
- RewardScoringResponse
|
||||
- ShieldConfig
|
||||
- SyntheticDataGenerationRequest
|
||||
- SyntheticDataGenerationResponse
|
||||
- URL
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue