mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-20 11:47:00 +00:00
fix: DPOAlignmentConfig schema to use correct DPO parameters (#2804)
Some checks failed
Coverage Badge / unit-tests (push) Failing after 1s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 6s
Integration Tests / discover-tests (push) Successful in 4s
Test Llama Stack Build / generate-matrix (push) Successful in 9s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 13s
Unit Tests / unit-tests (3.12) (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 13s
Update ReadTheDocs / update-readthedocs (push) Failing after 13s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 22s
Python Package Build Test / build (3.12) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 18s
Test External Providers / test-external-providers (venv) (push) Failing after 14s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 18s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 20s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 17s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 19s
Unit Tests / unit-tests (3.13) (push) Failing after 19s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 21s
Integration Tests / test-matrix (push) Failing after 19s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 22s
Test Llama Stack Build / build (push) Failing after 15s
Python Package Build Test / build (3.13) (push) Failing after 1m50s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 2m5s
Pre-commit / pre-commit (push) Successful in 3m20s
Some checks failed
Coverage Badge / unit-tests (push) Failing after 1s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 6s
Integration Tests / discover-tests (push) Successful in 4s
Test Llama Stack Build / generate-matrix (push) Successful in 9s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Vector IO Integration Tests / test-matrix (3.13, inline::milvus) (push) Failing after 11s
Vector IO Integration Tests / test-matrix (3.12, inline::faiss) (push) Failing after 13s
Unit Tests / unit-tests (3.12) (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 13s
Update ReadTheDocs / update-readthedocs (push) Failing after 13s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 22s
Python Package Build Test / build (3.12) (push) Failing after 15s
Vector IO Integration Tests / test-matrix (3.12, inline::milvus) (push) Failing after 18s
Test External Providers / test-external-providers (venv) (push) Failing after 14s
Vector IO Integration Tests / test-matrix (3.12, remote::pgvector) (push) Failing after 18s
Vector IO Integration Tests / test-matrix (3.12, remote::chromadb) (push) Failing after 20s
Vector IO Integration Tests / test-matrix (3.13, remote::chromadb) (push) Failing after 17s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.13, inline::faiss) (push) Failing after 19s
Unit Tests / unit-tests (3.13) (push) Failing after 19s
Vector IO Integration Tests / test-matrix (3.13, inline::sqlite-vec) (push) Failing after 21s
Integration Tests / test-matrix (push) Failing after 19s
Vector IO Integration Tests / test-matrix (3.12, inline::sqlite-vec) (push) Failing after 17s
Vector IO Integration Tests / test-matrix (3.13, remote::pgvector) (push) Failing after 22s
Test Llama Stack Build / build (push) Failing after 15s
Python Package Build Test / build (3.13) (push) Failing after 1m50s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 2m5s
Pre-commit / pre-commit (push) Successful in 3m20s
# What does this PR do? This PR fixes the `DPOAlignmentConfig` schema to use the correct Direct Preference Optimization (DPO) parameters. The current schema incorrectly uses PPO-inspired parameters (`reward_scale`, `reward_clip`, `epsilon`, `gamma`) that are not part of the DPO algorithm. This PR updates it to use the standard DPO parameters: - `beta`: The KL divergence coefficient that controls deviation from the reference model - `loss_type`: The type of DPO loss function (sigmoid, hinge, ipo, kto_pair) These parameters align with standard DPO implementations like HuggingFace's TRL library. --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-43-83.ec2.internal>
This commit is contained in:
parent
d994305f0a
commit
874b1cb00f
3 changed files with 40 additions and 28 deletions
29
docs/_static/llama-stack-spec.html
vendored
29
docs/_static/llama-stack-spec.html
vendored
|
@ -14470,28 +14470,31 @@
|
||||||
"DPOAlignmentConfig": {
|
"DPOAlignmentConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"reward_scale": {
|
"beta": {
|
||||||
"type": "number"
|
"type": "number"
|
||||||
},
|
},
|
||||||
"reward_clip": {
|
"loss_type": {
|
||||||
"type": "number"
|
"$ref": "#/components/schemas/DPOLossType",
|
||||||
},
|
"default": "sigmoid"
|
||||||
"epsilon": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"gamma": {
|
|
||||||
"type": "number"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"reward_scale",
|
"beta",
|
||||||
"reward_clip",
|
"loss_type"
|
||||||
"epsilon",
|
|
||||||
"gamma"
|
|
||||||
],
|
],
|
||||||
"title": "DPOAlignmentConfig"
|
"title": "DPOAlignmentConfig"
|
||||||
},
|
},
|
||||||
|
"DPOLossType": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"sigmoid",
|
||||||
|
"hinge",
|
||||||
|
"ipo",
|
||||||
|
"kto_pair"
|
||||||
|
],
|
||||||
|
"title": "DPOLossType"
|
||||||
|
},
|
||||||
"DataConfig": {
|
"DataConfig": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
25
docs/_static/llama-stack-spec.yaml
vendored
25
docs/_static/llama-stack-spec.yaml
vendored
|
@ -10111,21 +10111,24 @@ components:
|
||||||
DPOAlignmentConfig:
|
DPOAlignmentConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
reward_scale:
|
beta:
|
||||||
type: number
|
|
||||||
reward_clip:
|
|
||||||
type: number
|
|
||||||
epsilon:
|
|
||||||
type: number
|
|
||||||
gamma:
|
|
||||||
type: number
|
type: number
|
||||||
|
loss_type:
|
||||||
|
$ref: '#/components/schemas/DPOLossType'
|
||||||
|
default: sigmoid
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- reward_scale
|
- beta
|
||||||
- reward_clip
|
- loss_type
|
||||||
- epsilon
|
|
||||||
- gamma
|
|
||||||
title: DPOAlignmentConfig
|
title: DPOAlignmentConfig
|
||||||
|
DPOLossType:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- sigmoid
|
||||||
|
- hinge
|
||||||
|
- ipo
|
||||||
|
- kto_pair
|
||||||
|
title: DPOLossType
|
||||||
DataConfig:
|
DataConfig:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -104,12 +104,18 @@ class RLHFAlgorithm(Enum):
|
||||||
dpo = "dpo"
|
dpo = "dpo"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class DPOLossType(Enum):
|
||||||
|
sigmoid = "sigmoid"
|
||||||
|
hinge = "hinge"
|
||||||
|
ipo = "ipo"
|
||||||
|
kto_pair = "kto_pair"
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class DPOAlignmentConfig(BaseModel):
|
class DPOAlignmentConfig(BaseModel):
|
||||||
reward_scale: float
|
beta: float
|
||||||
reward_clip: float
|
loss_type: DPOLossType = DPOLossType.sigmoid
|
||||||
epsilon: float
|
|
||||||
gamma: float
|
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue