fixes to reward stuff

2025-12-04 02:03:44 +00:00 · 2024-07-10 19:22:33 -07:00 · 2024-07-10 19:22:33 -07:00 · 956f07b04c
commit 956f07b04c
parent eb12bfbef0
3 changed files with 461 additions and 11 deletions
--- a/source/openapi.html
+++ b/source/openapi.html
@ -191,6 +191,66 @@
                    "required": true
                }
            }
+        },
+        "/synthetic_data_generation/generate": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/SyntheticDataGenerationResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "SyntheticDataGeneration"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/SyntheticDataGenerationRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
+        },
+        "/reward_scoring/score": {
+            "post": {
+                "responses": {
+                    "200": {
+                        "description": "OK",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/RewardScoringResponse"
+                                }
+                            }
+                        }
+                    }
+                },
+                "tags": [
+                    "RewardScoring"
+                ],
+                "parameters": [],
+                "requestBody": {
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "$ref": "#/components/schemas/RewardScoringRequest"
+                            }
+                        }
+                    },
+                    "required": true
+                }
+            }
        }
    },
    "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema",
@ -1451,6 +1511,161 @@
                    "text_delta"
                ],
                "title": "streamed completion response."
+            },
+            "SyntheticDataGenerationRequest": {
+                "type": "object",
+                "properties": {
+                    "prompts": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    },
+                    "filtering_function": {
+                        "type": "string",
+                        "enum": [
+                            "none",
+                            "random",
+                            "top_k",
+                            "top_p",
+                            "top_k_top_p",
+                            "sigmoid"
+                        ],
+                        "title": "The type of filtering function.",
+                        "default": "none"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "prompts",
+                    "filtering_function"
+                ],
+                "title": "Request to generate synthetic data. A small batch of prompts and a filtering function"
+            },
+            "SyntheticDataGenerationResponse": {
+                "type": "object",
+                "properties": {
+                    "synthetic_data": {
+                        "type": "array",
+                        "items": {
+                            "type": "array",
+                            "minItems": 3,
+                            "maxItems": 3,
+                            "prefixItems": [
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "number"
+                                }
+                            ]
+                        }
+                    },
+                    "statistics": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "synthetic_data",
+                    "statistics"
+                ],
+                "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
+            },
+            "RewardScoringRequest": {
+                "type": "object",
+                "properties": {
+                    "prompt_generations": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "prompt": {
+                                    "$ref": "#/components/schemas/Message"
+                                },
+                                "message_history": {
+                                    "type": "array",
+                                    "items": {
+                                        "$ref": "#/components/schemas/Message"
+                                    }
+                                },
+                                "generation": {
+                                    "$ref": "#/components/schemas/Message"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "prompt",
+                                "message_history",
+                                "generation"
+                            ]
+                        }
+                    },
+                    "model": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "prompt_generations",
+                    "model"
+                ],
+                "title": "Request to score a reward function. A list of prompts and a list of responses per prompt."
+            },
+            "RewardScoringResponse": {
+                "type": "object",
+                "properties": {
+                    "scored_generations": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "prompt_generation": {
+                                    "type": "object",
+                                    "properties": {
+                                        "prompt": {
+                                            "$ref": "#/components/schemas/Message"
+                                        },
+                                        "message_history": {
+                                            "type": "array",
+                                            "items": {
+                                                "$ref": "#/components/schemas/Message"
+                                            }
+                                        },
+                                        "generation": {
+                                            "$ref": "#/components/schemas/Message"
+                                        }
+                                    },
+                                    "additionalProperties": false,
+                                    "required": [
+                                        "prompt",
+                                        "message_history",
+                                        "generation"
+                                    ]
+                                },
+                                "score": {
+                                    "type": "number"
+                                }
+                            },
+                            "additionalProperties": false,
+                            "required": [
+                                "prompt_generation",
+                                "score"
+                            ]
+                        }
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "scored_generations"
+                ],
+                "title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
            }
        },
        "responses": {}
@ -1462,11 +1677,17 @@
    ],
    "tags": [
        {
-            "name": "AgenticSystem"
+            "name": "RewardScoring"
        },
        {
            "name": "Inference"
        },
+        {
+            "name": "SyntheticDataGeneration"
+        },
+        {
+            "name": "AgenticSystem"
+        },
        {
            "name": "ShieldConfig",
            "description": "<SchemaDefinition schemaRef=\"#/components/schemas/ShieldConfig\" />"
@ -1530,6 +1751,22 @@
        {
            "name": "CompletionResponseStreamChunk",
            "description": "streamed completion response.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/CompletionResponseStreamChunk\" />"
+        },
+        {
+            "name": "SyntheticDataGenerationRequest",
+            "description": "Request to generate synthetic data. A small batch of prompts and a filtering function\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationRequest\" />"
+        },
+        {
+            "name": "SyntheticDataGenerationResponse",
+            "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/SyntheticDataGenerationResponse\" />"
+        },
+        {
+            "name": "RewardScoringRequest",
+            "description": "Request to score a reward function. A list of prompts and a list of responses per prompt.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringRequest\" />"
+        },
+        {
+            "name": "RewardScoringResponse",
+            "description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n<SchemaDefinition schemaRef=\"#/components/schemas/RewardScoringResponse\" />"
        }
    ],
    "x-tagGroups": [
@ -1537,7 +1774,9 @@
            "name": "Operations",
            "tags": [
                "AgenticSystem",
-                "Inference"
+                "Inference",
+                "RewardScoring",
+                "SyntheticDataGeneration"
            ]
        },
        {
@ -1557,7 +1796,11 @@
                "CompletionResponse",
                "CompletionResponseStreamChunk",
                "Message",
+                "RewardScoringRequest",
+                "RewardScoringResponse",
                "ShieldConfig",
+                "SyntheticDataGenerationRequest",
+                "SyntheticDataGenerationResponse",
                "URL"
            ]
        }