diff --git a/llama_toolchain/evaluations/api/endpoints.py b/llama_toolchain/evaluations/api/endpoints.py index 39b9a28e0..30e5689e3 100644 --- a/llama_toolchain/evaluations/api/endpoints.py +++ b/llama_toolchain/evaluations/api/endpoints.py @@ -60,19 +60,19 @@ class EvaluationJobArtifactsResponse(BaseModel): class Evaluations(Protocol): @webmethod(route="/evaluate/text_generation/") - def post_evaluate_text_generation( + def evaluate_text_generation( self, request: EvaluateTextGenerationRequest, ) -> EvaluationJob: ... @webmethod(route="/evaluate/question_answering/") - def post_evaluate_question_answering( + def evaluate_question_answering( self, request: EvaluateQuestionAnsweringRequest, ) -> EvaluationJob: ... @webmethod(route="/evaluate/summarization/") - def post_evaluate_summarization( + def evaluate_summarization( self, request: EvaluateSummarizationRequest, ) -> EvaluationJob: ... diff --git a/llama_toolchain/memory/api/endpoints.py b/llama_toolchain/memory/api/endpoints.py index 4261afa89..d8ac0e90c 100644 --- a/llama_toolchain/memory/api/endpoints.py +++ b/llama_toolchain/memory/api/endpoints.py @@ -13,7 +13,7 @@ from .datatypes import * # noqa: F403 class MemoryBanks(Protocol): @webmethod(route="/memory_banks/create") - def post_create_memory_bank( + def create_memory_bank( self, bank_id: str, bank_name: str, @@ -33,14 +33,14 @@ class MemoryBanks(Protocol): ) -> str: ... @webmethod(route="/memory_bank/insert") - def post_insert_memory_documents( + def insert_memory_documents( self, bank_id: str, documents: List[MemoryBankDocument], ) -> None: ... @webmethod(route="/memory_bank/update") - def post_update_memory_documents( + def update_memory_documents( self, bank_id: str, documents: List[MemoryBankDocument], diff --git a/llama_toolchain/post_training/api/endpoints.py b/llama_toolchain/post_training/api/endpoints.py index 0512003d3..542aff8cd 100644 --- a/llama_toolchain/post_training/api/endpoints.py +++ b/llama_toolchain/post_training/api/endpoints.py @@ -95,13 +95,13 @@ class PostTrainingJobArtifactsResponse(BaseModel): class PostTraining(Protocol): @webmethod(route="/post_training/supervised_fine_tune") - def post_supervised_fine_tune( + def supervised_fine_tune( self, request: PostTrainingSFTRequest, ) -> PostTrainingJob: ... @webmethod(route="/post_training/preference_optimize") - def post_preference_optimize( + def preference_optimize( self, request: PostTrainingRLHFRequest, ) -> PostTrainingJob: ... diff --git a/llama_toolchain/reward_scoring/api/endpoints.py b/llama_toolchain/reward_scoring/api/endpoints.py index 0a7327a9b..657e7b325 100644 --- a/llama_toolchain/reward_scoring/api/endpoints.py +++ b/llama_toolchain/reward_scoring/api/endpoints.py @@ -27,7 +27,7 @@ class RewardScoringResponse(BaseModel): class RewardScoring(Protocol): @webmethod(route="/reward_scoring/score") - def post_score( + def reward_score( self, request: RewardScoringRequest, ) -> Union[RewardScoringResponse]: ... diff --git a/llama_toolchain/synthetic_data_generation/api/endpoints.py b/llama_toolchain/synthetic_data_generation/api/endpoints.py index 8eada05cf..fa4867bcf 100644 --- a/llama_toolchain/synthetic_data_generation/api/endpoints.py +++ b/llama_toolchain/synthetic_data_generation/api/endpoints.py @@ -34,7 +34,7 @@ class SyntheticDataGenerationResponse(BaseModel): class SyntheticDataGeneration(Protocol): @webmethod(route="/synthetic_data_generation/generate") - def post_generate( + def synthetic_data_generate( self, request: SyntheticDataGenerationRequest, ) -> Union[SyntheticDataGenerationResponse]: ... diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html index 67756d4ad..f47c9ba4c 100644 --- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html +++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-07-23 02:02:16.069876" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-08-15 13:41:52.916332" }, "servers": [ { @@ -349,6 +349,49 @@ } } }, + "/memory_banks/create": { + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "MemoryBanks" + ], + "parameters": [ + { + "name": "bank_id", + "in": "query", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "bank_name", + "in": "query", + "required": true, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/MemoryBankDocument" + } + } + } + }, + "required": true + } + } + }, "/agentic_system/delete": { "delete": { "responses": { @@ -537,6 +580,96 @@ } } }, + "/evaluate/question_answering/": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluationJob" + } + } + } + } + }, + "tags": [ + "Evaluations" + ], + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest" + } + } + }, + "required": true + } + } + }, + "/evaluate/summarization/": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluationJob" + } + } + } + } + }, + "tags": [ + "Evaluations" + ], + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluateSummarizationRequest" + } + } + }, + "required": true + } + } + }, + "/evaluate/text_generation/": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluationJob" + } + } + } + } + }, + "tags": [ + "Evaluations" + ], + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EvaluateTextGenerationRequest" + } + } + }, + "required": true + } + } + }, "/agentic_system/session/get": { "post": { "responses": { @@ -1010,169 +1143,6 @@ "parameters": [] } }, - "/memory_banks/create": { - "post": { - "responses": { - "200": { - "description": "OK" - } - }, - "tags": [ - "MemoryBanks" - ], - "parameters": [ - { - "name": "bank_id", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - }, - { - "name": "bank_name", - "in": "query", - "required": true, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/MemoryBankDocument" - } - } - } - }, - "required": true - } - } - }, - "/evaluate/question_answering/": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJob" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest" - } - } - }, - "required": true - } - } - }, - "/evaluate/summarization/": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJob" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateSummarizationRequest" - } - } - }, - "required": true - } - } - }, - "/evaluate/text_generation/": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluationJob" - } - } - } - } - }, - "tags": [ - "Evaluations" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/EvaluateTextGenerationRequest" - } - } - }, - "required": true - } - } - }, - "/synthetic_data_generation/generate": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SyntheticDataGenerationResponse" - } - } - } - } - }, - "tags": [ - "SyntheticDataGeneration" - ], - "parameters": [], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SyntheticDataGenerationRequest" - } - } - }, - "required": true - } - } - }, "/memory_bank/insert": { "post": { "responses": { @@ -1298,6 +1268,36 @@ } } }, + "/synthetic_data_generation/generate": { + "post": { + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SyntheticDataGenerationResponse" + } + } + } + } + }, + "tags": [ + "SyntheticDataGeneration" + ], + "parameters": [], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SyntheticDataGenerationRequest" + } + } + }, + "required": true + } + } + }, "/memory_bank/update": { "post": { "responses": { @@ -1357,7 +1357,7 @@ "type": "object", "properties": { "model": { - "$ref": "#/components/schemas/InstructModel" + "type": "string" }, "messages_batch": { "type": "array", @@ -1499,13 +1499,6 @@ "type" ] }, - "InstructModel": { - "type": "string", - "enum": [ - "llama3_8b_chat", - "llama3_70b_chat" - ] - }, "SamplingParams": { "type": "object", "properties": { @@ -1835,7 +1828,7 @@ "type": "object", "properties": { "model": { - "$ref": "#/components/schemas/PretrainedModel" + "type": "string" }, "content_batch": { "type": "array", @@ -1892,9 +1885,6 @@ "content_batch" ] }, - "PretrainedModel": { - "description": "The type of the model. This is used to determine the model family and SKU." - }, "BatchCompletionResponse": { "type": "object", "properties": { @@ -1914,7 +1904,7 @@ "type": "object", "properties": { "model": { - "$ref": "#/components/schemas/InstructModel" + "type": "string" }, "messages": { "type": "array", @@ -2078,7 +2068,7 @@ "type": "object", "properties": { "model": { - "$ref": "#/components/schemas/PretrainedModel" + "type": "string" }, "content": { "oneOf": [ @@ -2161,7 +2151,7 @@ "type": "object", "properties": { "model": { - "$ref": "#/components/schemas/InstructModel" + "type": "string" }, "instance_config": { "$ref": "#/components/schemas/AgenticSystemInstanceConfig" @@ -2525,39 +2515,6 @@ "json" ] }, - "InferenceStep": { - "type": "object", - "properties": { - "turn_id": { - "type": "string" - }, - "step_id": { - "type": "string" - }, - "started_at": { - "type": "string", - "format": "date-time" - }, - "completed_at": { - "type": "string", - "format": "date-time" - }, - "step_type": { - "type": "string", - "const": "inference" - }, - "model_response": { - "$ref": "#/components/schemas/CompletionMessage" - } - }, - "additionalProperties": false, - "required": [ - "turn_id", - "step_id", - "step_type", - "model_response" - ] - }, "MemoryBankDocument": { "type": "object", "properties": { @@ -2605,6 +2562,163 @@ "mime_type" ] }, + "Checkpoint": { + "description": "Checkpoint created during training runs" + }, + "EvaluateQuestionAnsweringRequest": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/TrainEvalDataset" + }, + "checkpoint": { + "$ref": "#/components/schemas/Checkpoint" + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "metrics": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "em", + "f1" + ] + } + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "dataset", + "checkpoint", + "sampling_params", + "metrics" + ], + "title": "Request to evaluate question answering." + }, + "EvaluationJob": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "job_uuid" + ] + }, + "EvaluateSummarizationRequest": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/TrainEvalDataset" + }, + "checkpoint": { + "$ref": "#/components/schemas/Checkpoint" + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "metrics": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "rouge", + "bleu" + ] + } + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "dataset", + "checkpoint", + "sampling_params", + "metrics" + ], + "title": "Request to evaluate summarization." + }, + "EvaluateTextGenerationRequest": { + "type": "object", + "properties": { + "job_uuid": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/TrainEvalDataset" + }, + "checkpoint": { + "$ref": "#/components/schemas/Checkpoint" + }, + "sampling_params": { + "$ref": "#/components/schemas/SamplingParams" + }, + "metrics": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "perplexity", + "rouge", + "bleu" + ] + } + } + }, + "additionalProperties": false, + "required": [ + "job_uuid", + "dataset", + "checkpoint", + "sampling_params", + "metrics" + ], + "title": "Request to evaluate text generation." + }, + "InferenceStep": { + "type": "object", + "properties": { + "turn_id": { + "type": "string" + }, + "step_id": { + "type": "string" + }, + "started_at": { + "type": "string", + "format": "date-time" + }, + "completed_at": { + "type": "string", + "format": "date-time" + }, + "step_type": { + "type": "string", + "const": "inference" + }, + "model_response": { + "$ref": "#/components/schemas/CompletionMessage" + } + }, + "additionalProperties": false, + "required": [ + "turn_id", + "step_id", + "step_type", + "model_response" + ] + }, "MemoryRetrievalStep": { "type": "object", "properties": { @@ -2962,18 +3076,6 @@ "job_uuid" ] }, - "EvaluationJob": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "job_uuid" - ] - }, "MemoryBank": { "type": "object", "properties": { @@ -2990,9 +3092,6 @@ "memory_bank_name" ] }, - "Checkpoint": { - "description": "Checkpoint created during training runs" - }, "PostTrainingJobArtifactsResponse": { "type": "object", "properties": { @@ -3115,272 +3214,6 @@ "job_uuid" ] }, - "EvaluateQuestionAnsweringRequest": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/TrainEvalDataset" - }, - "checkpoint": { - "$ref": "#/components/schemas/Checkpoint" - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "metrics": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "em", - "f1" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "job_uuid", - "dataset", - "checkpoint", - "sampling_params", - "metrics" - ], - "title": "Request to evaluate question answering." - }, - "EvaluateSummarizationRequest": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/TrainEvalDataset" - }, - "checkpoint": { - "$ref": "#/components/schemas/Checkpoint" - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "metrics": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "rouge", - "bleu" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "job_uuid", - "dataset", - "checkpoint", - "sampling_params", - "metrics" - ], - "title": "Request to evaluate summarization." - }, - "EvaluateTextGenerationRequest": { - "type": "object", - "properties": { - "job_uuid": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/TrainEvalDataset" - }, - "checkpoint": { - "$ref": "#/components/schemas/Checkpoint" - }, - "sampling_params": { - "$ref": "#/components/schemas/SamplingParams" - }, - "metrics": { - "type": "array", - "items": { - "type": "string", - "enum": [ - "perplexity", - "rouge", - "bleu" - ] - } - } - }, - "additionalProperties": false, - "required": [ - "job_uuid", - "dataset", - "checkpoint", - "sampling_params", - "metrics" - ], - "title": "Request to evaluate text generation." - }, - "RewardModel": { - "type": "string", - "enum": [ - "llama3_70b_reward", - "llama3_405b_reward" - ] - }, - "SyntheticDataGenerationRequest": { - "type": "object", - "properties": { - "dialogs": { - "type": "array", - "items": { - "oneOf": [ - { - "$ref": "#/components/schemas/UserMessage" - }, - { - "$ref": "#/components/schemas/SystemMessage" - }, - { - "$ref": "#/components/schemas/ToolResponseMessage" - }, - { - "$ref": "#/components/schemas/CompletionMessage" - } - ] - } - }, - "filtering_function": { - "type": "string", - "enum": [ - "none", - "random", - "top_k", - "top_p", - "top_k_top_p", - "sigmoid" - ], - "title": "The type of filtering function." - }, - "model": { - "$ref": "#/components/schemas/RewardModel" - } - }, - "additionalProperties": false, - "required": [ - "dialogs", - "filtering_function" - ], - "title": "Request to generate synthetic data. A small batch of prompts and a filtering function" - }, - "ScoredDialogGenerations": { - "type": "object", - "properties": { - "dialog": { - "type": "array", - "items": { - "oneOf": [ - { - "$ref": "#/components/schemas/UserMessage" - }, - { - "$ref": "#/components/schemas/SystemMessage" - }, - { - "$ref": "#/components/schemas/ToolResponseMessage" - }, - { - "$ref": "#/components/schemas/CompletionMessage" - } - ] - } - }, - "scored_generations": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ScoredMessage" - } - } - }, - "additionalProperties": false, - "required": [ - "dialog", - "scored_generations" - ] - }, - "ScoredMessage": { - "type": "object", - "properties": { - "message": { - "oneOf": [ - { - "$ref": "#/components/schemas/UserMessage" - }, - { - "$ref": "#/components/schemas/SystemMessage" - }, - { - "$ref": "#/components/schemas/ToolResponseMessage" - }, - { - "$ref": "#/components/schemas/CompletionMessage" - } - ] - }, - "score": { - "type": "number" - } - }, - "additionalProperties": false, - "required": [ - "message", - "score" - ] - }, - "SyntheticDataGenerationResponse": { - "type": "object", - "properties": { - "synthetic_data": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ScoredDialogGenerations" - } - }, - "statistics": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "synthetic_data" - ], - "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." - }, "DPOAlignmentConfig": { "type": "object", "properties": { @@ -3627,7 +3460,7 @@ } }, "model": { - "$ref": "#/components/schemas/RewardModel" + "type": "string" } }, "additionalProperties": false, @@ -3653,6 +3486,70 @@ ], "title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold." }, + "ScoredDialogGenerations": { + "type": "object", + "properties": { + "dialog": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/ToolResponseMessage" + }, + { + "$ref": "#/components/schemas/CompletionMessage" + } + ] + } + }, + "scored_generations": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ScoredMessage" + } + } + }, + "additionalProperties": false, + "required": [ + "dialog", + "scored_generations" + ] + }, + "ScoredMessage": { + "type": "object", + "properties": { + "message": { + "oneOf": [ + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/ToolResponseMessage" + }, + { + "$ref": "#/components/schemas/CompletionMessage" + } + ] + }, + "score": { + "type": "number" + } + }, + "additionalProperties": false, + "required": [ + "message", + "score" + ] + }, "DoraFinetuningConfig": { "type": "object", "properties": { @@ -3731,7 +3628,7 @@ "type": "string" }, "model": { - "$ref": "#/components/schemas/PretrainedModel" + "type": "string" }, "dataset": { "$ref": "#/components/schemas/TrainEvalDataset" @@ -3857,6 +3754,92 @@ "rank", "alpha" ] + }, + "SyntheticDataGenerationRequest": { + "type": "object", + "properties": { + "dialogs": { + "type": "array", + "items": { + "oneOf": [ + { + "$ref": "#/components/schemas/UserMessage" + }, + { + "$ref": "#/components/schemas/SystemMessage" + }, + { + "$ref": "#/components/schemas/ToolResponseMessage" + }, + { + "$ref": "#/components/schemas/CompletionMessage" + } + ] + } + }, + "filtering_function": { + "type": "string", + "enum": [ + "none", + "random", + "top_k", + "top_p", + "top_k_top_p", + "sigmoid" + ], + "title": "The type of filtering function." + }, + "model": { + "type": "string" + } + }, + "additionalProperties": false, + "required": [ + "dialogs", + "filtering_function" + ], + "title": "Request to generate synthetic data. A small batch of prompts and a filtering function" + }, + "SyntheticDataGenerationResponse": { + "type": "object", + "properties": { + "synthetic_data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ScoredDialogGenerations" + } + }, + "statistics": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "synthetic_data" + ], + "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold." } }, "responses": {} @@ -3868,10 +3851,16 @@ ], "tags": [ { - "name": "PostTraining" + "name": "Evaluations" }, { - "name": "MemoryBanks" + "name": "Inference" + }, + { + "name": "SyntheticDataGeneration" + }, + { + "name": "AgenticSystem" }, { "name": "RewardScoring" @@ -3880,16 +3869,10 @@ "name": "Datasets" }, { - "name": "Evaluations" + "name": "PostTraining" }, { - "name": "AgenticSystem" - }, - { - "name": "Inference" - }, - { - "name": "SyntheticDataGeneration" + "name": "MemoryBanks" }, { "name": "Attachment", @@ -3915,10 +3898,6 @@ "name": "Fp8QuantizationConfig", "description": "" }, - { - "name": "InstructModel", - "description": "" - }, { "name": "SamplingParams", "description": "" @@ -3967,10 +3946,6 @@ "name": "BatchCompletionRequest", "description": "" }, - { - "name": "PretrainedModel", - "description": "The type of the model. This is used to determine the model family and SKU.\n\n" - }, { "name": "BatchCompletionResponse", "description": "" @@ -4075,14 +4050,34 @@ "name": "TrainEvalDatasetColumnType", "description": "" }, - { - "name": "InferenceStep", - "description": "" - }, { "name": "MemoryBankDocument", "description": "" }, + { + "name": "Checkpoint", + "description": "Checkpoint created during training runs\n\n" + }, + { + "name": "EvaluateQuestionAnsweringRequest", + "description": "Request to evaluate question answering.\n\n" + }, + { + "name": "EvaluationJob", + "description": "" + }, + { + "name": "EvaluateSummarizationRequest", + "description": "Request to evaluate summarization.\n\n" + }, + { + "name": "EvaluateTextGenerationRequest", + "description": "Request to evaluate text generation.\n\n" + }, + { + "name": "InferenceStep", + "description": "" + }, { "name": "MemoryRetrievalStep", "description": "" @@ -4127,18 +4122,10 @@ "name": "EvaluationJobStatusResponse", "description": "" }, - { - "name": "EvaluationJob", - "description": "" - }, { "name": "MemoryBank", "description": "" }, - { - "name": "Checkpoint", - "description": "Checkpoint created during training runs\n\n" - }, { "name": "PostTrainingJobArtifactsResponse", "description": "Artifacts of a finetuning job.\n\n" @@ -4159,38 +4146,6 @@ "name": "PostTrainingJob", "description": "" }, - { - "name": "EvaluateQuestionAnsweringRequest", - "description": "Request to evaluate question answering.\n\n" - }, - { - "name": "EvaluateSummarizationRequest", - "description": "Request to evaluate summarization.\n\n" - }, - { - "name": "EvaluateTextGenerationRequest", - "description": "Request to evaluate text generation.\n\n" - }, - { - "name": "RewardModel", - "description": "" - }, - { - "name": "SyntheticDataGenerationRequest", - "description": "Request to generate synthetic data. A small batch of prompts and a filtering function\n\n" - }, - { - "name": "ScoredDialogGenerations", - "description": "" - }, - { - "name": "ScoredMessage", - "description": "" - }, - { - "name": "SyntheticDataGenerationResponse", - "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n" - }, { "name": "DPOAlignmentConfig", "description": "" @@ -4223,6 +4178,14 @@ "name": "RewardScoringResponse", "description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n" }, + { + "name": "ScoredDialogGenerations", + "description": "" + }, + { + "name": "ScoredMessage", + "description": "" + }, { "name": "DoraFinetuningConfig", "description": "" @@ -4242,6 +4205,14 @@ { "name": "QLoraFinetuningConfig", "description": "" + }, + { + "name": "SyntheticDataGenerationRequest", + "description": "Request to generate synthetic data. A small batch of prompts and a filtering function\n\n" + }, + { + "name": "SyntheticDataGenerationResponse", + "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n" } ], "x-tagGroups": [ @@ -4300,7 +4271,6 @@ "FinetuningAlgorithm", "Fp8QuantizationConfig", "InferenceStep", - "InstructModel", "LoraFinetuningConfig", "MemoryBank", "MemoryBankDocument", @@ -4314,12 +4284,10 @@ "PostTrainingJobStatusResponse", "PostTrainingRLHFRequest", "PostTrainingSFTRequest", - "PretrainedModel", "QLoraFinetuningConfig", "RLHFAlgorithm", "RestAPIExecutionConfig", "RestAPIMethod", - "RewardModel", "RewardScoringRequest", "RewardScoringResponse", "SamplingParams", diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml index d85d2f99d..89500a09e 100644 --- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml +++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml @@ -7,7 +7,7 @@ components: instance_config: $ref: '#/components/schemas/AgenticSystemInstanceConfig' model: - $ref: '#/components/schemas/InstructModel' + type: string required: - model - instance_config @@ -170,7 +170,7 @@ components: type: array type: array model: - $ref: '#/components/schemas/InstructModel' + type: string quantization_config: oneOf: - $ref: '#/components/schemas/Bf16QuantizationConfig' @@ -212,7 +212,7 @@ components: type: integer type: object model: - $ref: '#/components/schemas/PretrainedModel' + type: string quantization_config: oneOf: - $ref: '#/components/schemas/Bf16QuantizationConfig' @@ -279,7 +279,7 @@ components: - $ref: '#/components/schemas/CompletionMessage' type: array model: - $ref: '#/components/schemas/InstructModel' + type: string quantization_config: oneOf: - $ref: '#/components/schemas/Bf16QuantizationConfig' @@ -375,7 +375,7 @@ components: type: integer type: object model: - $ref: '#/components/schemas/PretrainedModel' + type: string quantization_config: oneOf: - $ref: '#/components/schemas/Bf16QuantizationConfig' @@ -629,11 +629,6 @@ components: - step_type - model_response type: object - InstructModel: - enum: - - llama3_8b_chat - - llama3_70b_chat - type: string LoraFinetuningConfig: additionalProperties: false properties: @@ -922,7 +917,7 @@ components: - type: object type: object model: - $ref: '#/components/schemas/PretrainedModel' + type: string optimizer_config: $ref: '#/components/schemas/OptimizerConfig' training_config: @@ -942,9 +937,6 @@ components: - logger_config title: Request to finetune a model. type: object - PretrainedModel: - description: The type of the model. This is used to determine the model family - and SKU. QLoraFinetuningConfig: additionalProperties: false properties: @@ -1001,11 +993,6 @@ components: - PUT - DELETE type: string - RewardModel: - enum: - - llama3_70b_reward - - llama3_405b_reward - type: string RewardScoringRequest: additionalProperties: false properties: @@ -1014,7 +1001,7 @@ components: $ref: '#/components/schemas/DialogGenerations' type: array model: - $ref: '#/components/schemas/RewardModel' + type: string required: - dialog_generations - model @@ -1202,7 +1189,7 @@ components: title: The type of filtering function. type: string model: - $ref: '#/components/schemas/RewardModel' + type: string required: - dialogs - filtering_function @@ -1551,7 +1538,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-07-23 02:02:16.069876" + \ draft and subject to change.\n Generated at 2024-08-15 13:41:52.916332" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -2338,14 +2325,14 @@ security: servers: - url: http://any-hosted-llama-stack.com tags: -- name: PostTraining -- name: MemoryBanks -- name: RewardScoring -- name: Datasets - name: Evaluations -- name: AgenticSystem - name: Inference - name: SyntheticDataGeneration +- name: AgenticSystem +- name: RewardScoring +- name: Datasets +- name: PostTraining +- name: MemoryBanks - description: name: Attachment - description: name: Fp8QuantizationConfig -- description: - name: InstructModel - description: name: SamplingParams - description: name: BatchCompletionRequest -- description: 'The type of the model. This is used to determine the model family - and SKU. - - - ' - name: PretrainedModel - description: name: BatchCompletionResponse @@ -2489,11 +2468,36 @@ tags: - description: name: TrainEvalDatasetColumnType -- description: - name: InferenceStep - description: name: MemoryBankDocument +- description: 'Checkpoint created during training runs + + + ' + name: Checkpoint +- description: 'Request to evaluate question answering. + + + ' + name: EvaluateQuestionAnsweringRequest +- description: + name: EvaluationJob +- description: 'Request to evaluate summarization. + + + ' + name: EvaluateSummarizationRequest +- description: 'Request to evaluate text generation. + + + ' + name: EvaluateTextGenerationRequest +- description: + name: InferenceStep - description: name: MemoryRetrievalStep @@ -2531,15 +2535,8 @@ tags: - description: name: EvaluationJobStatusResponse -- description: - name: EvaluationJob - description: name: MemoryBank -- description: 'Checkpoint created during training runs - - - ' - name: Checkpoint - description: 'Artifacts of a finetuning job. @@ -2563,45 +2560,6 @@ tags: - description: name: PostTrainingJob -- description: 'Request to evaluate question answering. - - - ' - name: EvaluateQuestionAnsweringRequest -- description: 'Request to evaluate summarization. - - - ' - name: EvaluateSummarizationRequest -- description: 'Request to evaluate text generation. - - - ' - name: EvaluateTextGenerationRequest -- description: - name: RewardModel -- description: 'Request to generate synthetic data. A small batch of prompts and a - filtering function - - - ' - name: SyntheticDataGenerationRequest -- description: - name: ScoredDialogGenerations -- description: - name: ScoredMessage -- description: 'Response from the synthetic data generation. Batch of (prompt, response, - score) tuples that pass the threshold. - - - ' - name: SyntheticDataGenerationResponse - description: name: DPOAlignmentConfig @@ -2632,6 +2590,11 @@ tags: ' name: RewardScoringResponse +- description: + name: ScoredDialogGenerations +- description: + name: ScoredMessage - description: name: DoraFinetuningConfig @@ -2649,6 +2612,20 @@ tags: - description: name: QLoraFinetuningConfig +- description: 'Request to generate synthetic data. A small batch of prompts and a + filtering function + + + ' + name: SyntheticDataGenerationRequest +- description: 'Response from the synthetic data generation. Batch of (prompt, response, + score) tuples that pass the threshold. + + + ' + name: SyntheticDataGenerationResponse x-tagGroups: - name: Operations tags: @@ -2701,7 +2678,6 @@ x-tagGroups: - FinetuningAlgorithm - Fp8QuantizationConfig - InferenceStep - - InstructModel - LoraFinetuningConfig - MemoryBank - MemoryBankDocument @@ -2715,12 +2691,10 @@ x-tagGroups: - PostTrainingJobStatusResponse - PostTrainingRLHFRequest - PostTrainingSFTRequest - - PretrainedModel - QLoraFinetuningConfig - RLHFAlgorithm - RestAPIExecutionConfig - RestAPIMethod - - RewardModel - RewardScoringRequest - RewardScoringResponse - SamplingParams diff --git a/rfcs/openapi_generator/README.md b/rfcs/openapi_generator/README.md new file mode 100644 index 000000000..023486534 --- /dev/null +++ b/rfcs/openapi_generator/README.md @@ -0,0 +1,9 @@ +The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_toolchain/[]/api/endpoints.py` using the `generate.py` utility. + +Please install the following packages before running the script: + +``` +pip install python-openapi json-strong-typing fire PyYAML llama-models +``` + +Then simply run `sh run_openapi_generator.sh ` diff --git a/rfcs/openapi_generator/generate.py b/rfcs/openapi_generator/generate.py new file mode 100644 index 000000000..95d5c3598 --- /dev/null +++ b/rfcs/openapi_generator/generate.py @@ -0,0 +1,130 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described found in the +# LICENSE file in the root directory of this source tree. + +import inspect + +from datetime import datetime +from pathlib import Path +from typing import Callable, Iterator, List, Tuple + +import fire +import yaml +from llama_models import schema_utils +from pyopenapi import Info, operations, Options, Server, Specification + +# We do a series of monkey-patching to ensure our definitions only use the minimal +# (json_schema_type, webmethod) definitions from the llama_models package. For +# generation though, we need the full definitions and implementations from the +# (python-openapi, json-strong-typing) packages. + +from strong_typing.schema import json_schema_type +from termcolor import colored + + +# PATCH `json_schema_type` first +schema_utils.json_schema_type = json_schema_type + +from llama_models.llama3_1.api.datatypes import * # noqa: F403 +from llama_toolchain.agentic_system.api import * # noqa: F403 +from llama_toolchain.dataset.api import * # noqa: F403 +from llama_toolchain.evaluations.api import * # noqa: F403 +from llama_toolchain.inference.api import * # noqa: F403 +from llama_toolchain.memory.api import * # noqa: F403 +from llama_toolchain.post_training.api import * # noqa: F403 +from llama_toolchain.reward_scoring.api import * # noqa: F403 +from llama_toolchain.synthetic_data_generation.api import * # noqa: F403 + + +def patched_get_endpoint_functions( + endpoint: type, prefixes: List[str] +) -> Iterator[Tuple[str, str, str, Callable]]: + if not inspect.isclass(endpoint): + raise ValueError(f"object is not a class type: {endpoint}") + + functions = inspect.getmembers(endpoint, inspect.isfunction) + for func_name, func_ref in functions: + webmethod = getattr(func_ref, "__webmethod__", None) + if not webmethod: + continue + + print(f"Processing {colored(func_name, 'white')}...") + operation_name = func_name + if operation_name.startswith("get_") or operation_name.endswith("/get"): + prefix = "get" + elif ( + operation_name.startswith("delete_") + or operation_name.startswith("remove_") + or operation_name.endswith("/delete") + or operation_name.endswith("/remove") + ): + prefix = "delete" + else: + if webmethod.method == "GET": + prefix = "get" + elif webmethod.method == "DELETE": + prefix = "delete" + else: + # by default everything else is a POST + prefix = "post" + + yield prefix, operation_name, func_name, func_ref + + +operations._get_endpoint_functions = patched_get_endpoint_functions + + +class LlamaStackEndpoints( + Inference, + AgenticSystem, + RewardScoring, + SyntheticDataGeneration, + Datasets, + PostTraining, + MemoryBanks, + Evaluations, +): ... + + +def main(output_dir: str): + output_dir = Path(output_dir) + if not output_dir.exists(): + raise ValueError(f"Directory {output_dir} does not exist") + + now = str(datetime.now()) + print( + "Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now + ) + print("") + spec = Specification( + LlamaStackEndpoints, + Options( + server=Server(url="http://any-hosted-llama-stack.com"), + info=Info( + title="[DRAFT] Llama Stack Specification", + version="0.0.1", + description="""This is the specification of the llama stack that provides + a set of endpoints and their corresponding interfaces that are tailored to + best leverage Llama Models. The specification is still in draft and subject to change. + Generated at """ + + now, + ), + ), + ) + with open(output_dir / "llama-stack-spec.yaml", "w", encoding="utf-8") as fp: + yaml.dump(spec.get_json(), fp, allow_unicode=True) + + with open(output_dir / "llama-stack-spec.html", "w") as fp: + spec.write_html(fp, pretty_print=True) + + +if __name__ == "__main__": + fire.Fire(main) diff --git a/rfcs/openapi_generator/run_openapi_generator.sh b/rfcs/openapi_generator/run_openapi_generator.sh new file mode 100644 index 000000000..49a93f362 --- /dev/null +++ b/rfcs/openapi_generator/run_openapi_generator.sh @@ -0,0 +1,33 @@ +#!/bin/bash + + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +PYTHONPATH=${PYTHONPATH:-} + +set -euo pipefail + +missing_packages=() + +check_package() { + if ! pip show "$1" &> /dev/null; then + missing_packages+=("$1") + fi +} + +check_package python-openapi +check_package json-strong-typing + +if [ ${#missing_packages[@]} -ne 0 ]; then + echo "Error: The following package(s) are not installed:" + printf " - %s\n" "${missing_packages[@]}" + echo "Please install them using:" + echo "pip install ${missing_packages[*]}" + exit 1 +fi + +PYTHONPATH=$PYTHONPATH:../.. python3 -m rfcs.openapi_generator.generate $*