mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-22 22:42:25 +00:00
Remove unused DPO parameters from schema and tests
This commit is contained in:
parent
5c33bc1353
commit
b7901156f5
4 changed files with 0 additions and 50 deletions
20
docs/_static/llama-stack-spec.html
vendored
20
docs/_static/llama-stack-spec.html
vendored
|
|
@ -15078,22 +15078,6 @@
|
|||
"DPOAlignmentConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reward_scale": {
|
||||
"type": "number",
|
||||
"description": "Scaling factor for the reward signal"
|
||||
},
|
||||
"reward_clip": {
|
||||
"type": "number",
|
||||
"description": "Maximum absolute value for reward clipping"
|
||||
},
|
||||
"epsilon": {
|
||||
"type": "number",
|
||||
"description": "Small value added for numerical stability"
|
||||
},
|
||||
"gamma": {
|
||||
"type": "number",
|
||||
"description": "Discount factor for future rewards"
|
||||
},
|
||||
"beta": {
|
||||
"type": "number",
|
||||
"description": "Temperature parameter for the DPO loss"
|
||||
|
|
@ -15106,10 +15090,6 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reward_scale",
|
||||
"reward_clip",
|
||||
"epsilon",
|
||||
"gamma",
|
||||
"beta",
|
||||
"loss_type"
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue