update reward model scoring apis

This commit is contained in:
Hardik Shah 2024-06-26 16:25:42 -07:00
parent 04f89ad315
commit c9a75c4628
3 changed files with 182 additions and 127 deletions

View file

@ -1,108 +1,13 @@
openapi: 3.0.0 openapi: 3.0.0
info: info:
title: Reward Model Service title: Reward Model Service API
version: 1.0.0 version: 0.0.1
components:
schemas:
Message:
# TODO: Keep in sync with /chat_completion
type: object
properties:
role:
type: string
description: Role of the entity in the message.
text:
type: string
description: Text content of the message.
attachments:
type: array
items:
$ref: '#/components/schemas/Attachment'
Completion:
# TODO: Keep in sync with /chat_completion
type: object
properties:
id:
type: string
description: Unique identifier for the completion.
role:
type: string
text:
type: string
attachments:
type: array
items:
$ref: '#/components/schemas/Attachment'
tokens:
type: array
items:
type: integer
logprobs:
type: array
items:
type: number
finish_reason:
type: string
# Not Used in API but this json encoded version will be used in batch callsite
PromptResponsePair:
type: object
properties:
id:
type: string
description: Unique identifier for the prompt-response pair.
prompt:
type: array
items:
$ref: '#/components/schemas/Message'
response:
$ref: '#/components/schemas/Completion'
# Not Used in API but this json encoded version will be used in batch callsite
PromptResponseScore:
type: object
properties:
id:
type: string
description: Identifier carried over from the user provided id.
raw_score:
type: number
format: float
description: Raw score before any scoring function is applied.
score:
type: number
format: float
description: Final score after applying the scoring function.
BatchRewardModelScoring:
type: object
properties:
job_id:
type: string
description: Unique identifier for the job.
created:
type: string
format: date-time
description: Timestamp when the job was created.
status:
type: string
enum: [started, completed, running]
description: Current status of the job.
input_path:
type: string
description: User provided input path.
success_file_path:
type: string
description: Path to the success file containing scores.
error_file_path:
type: string
description: Path to the error file.
metadata:
type: object
additionalProperties: true
description: User provided metadata carried forward in the response.
paths: paths:
/reward_model_scoring/: /reward_model_scoring/:
post: post:
summary: Score a pair of prompt and response using a reward model summary: Score a prompt-response pair using a reward model
description: Take a pair of prompt and responses and score them using a reward model description: |
This endpoint scores a given prompt-response pair using a specified reward model and scoring function.
requestBody: requestBody:
required: true required: true
content: content:
@ -110,19 +15,18 @@ paths:
schema: schema:
type: object type: object
properties: properties:
reward_model: prompt_with_response:
type: string
description: Unique identifier for the reward model
messages:
type: array type: array
items: items:
$ref: '#/components/schemas/Message' $ref: '#/components/schemas/Message'
description: List[Message] same as what is used in /chat_completion api description: "Prompt and response joined as a list of messages."
response: reward_model:
$ref: '#/components/schemas/Completion'
description: Completion same as what is used in /chat_completion api
scoring_function:
type: string type: string
description: "Identifier for the reward model to be used."
scoring_function:
$ref: '#/components/schemas/ScoringFunction'
options:
$ref: '#/components/schemas/Options'
responses: responses:
'200': '200':
description: Scoring completed successfully description: Scoring completed successfully
@ -133,18 +37,18 @@ paths:
properties: properties:
id: id:
type: string type: string
description: Unique identifier for the scoring request logprob:
raw_score:
type: number type: number
format: float format: float
score: score:
type: number type: number
format: float format: float
/batch_reward_model_scoring/: /reward_model_scoring/submit_job/:
post: post:
summary: Batch score multiple pairs of prompts and responses summary: Batch scoring using reward models
description: Take a pair of prompt and responses and score them using a reward model description: |
Submit a batch job for scoring multiple prompt-response pairs using a reward model.
requestBody: requestBody:
required: true required: true
content: content:
@ -154,24 +58,105 @@ paths:
properties: properties:
reward_model: reward_model:
type: string type: string
description: Unique identifier for the reward model description: "Identifier for the reward model to be used."
# TODO: Maybe take local path and api first uploads and generates handle prompt_with_response_path:
# which is returned in the repsonse. $ref: '#/components/schemas/Path'
prompt_response_path: description: "Path to a JSONL file where each line is a List[Message] and custom_id."
type: string
description: File path for a JSONL where each line is a JSON encoded `PromptResponsePair` object
scoring_function: scoring_function:
type: string $ref: '#/components/schemas/ScoringFunction'
description: String used to resolve to the appropriate scoring function from a registry
metadata: metadata:
type: object type: object
additionalProperties: true additionalProperties: true
description: Metadata to carry forward in the response description: "Metadata to carry forward in the response."
responses: responses:
'200': '200':
description: Batch scoring job submitted successfully description: Batch scoring job successfully submitted
content: content:
application/json: application/json:
schema: schema:
$ref: '#/components/schemas/BatchRewardModelScoring' $ref: '#/components/schemas/BatchRewardModelScoringJob'
/reward_model_scoring/submit_job/job_status:
get:
summary: Get status for an already submitted job
description: |
Retrieve the status and details of a previously submitted batch scoring job using its unique job ID.
parameters:
- in: query
name: job_id
schema:
type: string
required: true
description: "Unique identifier for the batch scoring job."
responses:
'200':
description: Batch scoring job status retrieved successfully
content:
application/json:
schema:
$ref: '#/components/schemas/BatchRewardModelScoringJob'
components:
schemas:
Message:
# reuse from /chat_completion
Options:
type: object
properties:
logprobs:
type: boolean
max_tokens:
type: integer
temperature:
type: number
top_p:
type: number
# TODO: Add/Remove more reward model specific params
ScoringFunction:
type: object
properties:
name:
type: string
params:
type: object
additionalProperties: true
Path:
type: object
properties:
value:
type: string
type:
type: string
enum:
- raw_bytes
- filepath
- uri
BatchRewardModelScoringJob:
type: object
properties:
job_id:
type: string
created:
type: string
format: date-time
status:
type: string
enum:
- validating
- running
- completed
- failed
input_file_path:
$ref: '#/components/schemas/Path'
success_file_path:
$ref: '#/components/schemas/Path'
error_file_path:
$ref: '#/components/schemas/Path'
metadata:
type: object
additionalProperties: true
description: "Metadata carried forward from the job submission."

View file

@ -0,0 +1,70 @@
# Reward Model Service
== Schema ==
Message:
# Same as /chat_completion
Options:
logprobs: bool
max_tokens: int
temperature: float
top_p: float
#TODO: Figure out what other reward model specific params
ScoringFunction:
name: str
params: json
BatchRewardModelScoringJob:
job_id: str
created: str # format date-time
status: string # enum (validating, running, completed, failed)
input_file_path: Path
success_file_path: Path # jsonl where each row has {custom_id: <from input>, logprob: <float>, score: float}
error_file_path: Path # jsonl where each row has {custom_id: <from input>, error: <error_info>}
metadata: json # carry forward from job submission api
== Callsites ==
callsite:
reward_model_scoring/
request_type:
post
description:
Score a prompt-response pair using a reward model
request:
prompt_with_response: List[Message] # prompt and response joined as a List[Message]
reward_model: str
scoring_function: ScoringFunction
options: Options
response:
id: str
logprob: float
score: float
callsite:
reward_model_scoring/submit_job/
request_type:
post
description:
Batch scoring using reward models
request:
reward_model: str
prompt_with_response_path: Path # jsonl file where each line is a List[Message] and custom_id
scoring_function: ScoringFunction
metadata: json # anything to carry forward over in the response
response:
batch_reward_model_scoring_job: BatchRewardModelScoringJob
callsite:
/reward_model_scoring/submit_job/job_status
request_type:
get
description:
Get status for an already submitted job
request:
job_id: str # unique identifier for the job
response:
batch_reward_model_scoring_job: BatchRewardModelScoringJob

View file

@ -20,7 +20,7 @@ paths:
description: Model identifier for batch inference. description: Model identifier for batch inference.
prompts_path: prompts_path:
type: string type: string
description: Path to prompts, JSONL where each row is formatted for batch inference. description: Path to prompts, JSONL for batch inference
batch_size: batch_size:
type: integer type: integer
description: Number of prompts to process in each batch. description: Number of prompts to process in each batch.