diff --git a/llama_toolchain/evaluations/api/endpoints.py b/llama_toolchain/evaluations/api/endpoints.py
index 39b9a28e0..30e5689e3 100644
--- a/llama_toolchain/evaluations/api/endpoints.py
+++ b/llama_toolchain/evaluations/api/endpoints.py
@@ -60,19 +60,19 @@ class EvaluationJobArtifactsResponse(BaseModel):
class Evaluations(Protocol):
@webmethod(route="/evaluate/text_generation/")
- def post_evaluate_text_generation(
+ def evaluate_text_generation(
self,
request: EvaluateTextGenerationRequest,
) -> EvaluationJob: ...
@webmethod(route="/evaluate/question_answering/")
- def post_evaluate_question_answering(
+ def evaluate_question_answering(
self,
request: EvaluateQuestionAnsweringRequest,
) -> EvaluationJob: ...
@webmethod(route="/evaluate/summarization/")
- def post_evaluate_summarization(
+ def evaluate_summarization(
self,
request: EvaluateSummarizationRequest,
) -> EvaluationJob: ...
diff --git a/llama_toolchain/memory/api/endpoints.py b/llama_toolchain/memory/api/endpoints.py
index 4261afa89..d8ac0e90c 100644
--- a/llama_toolchain/memory/api/endpoints.py
+++ b/llama_toolchain/memory/api/endpoints.py
@@ -13,7 +13,7 @@ from .datatypes import * # noqa: F403
class MemoryBanks(Protocol):
@webmethod(route="/memory_banks/create")
- def post_create_memory_bank(
+ def create_memory_bank(
self,
bank_id: str,
bank_name: str,
@@ -33,14 +33,14 @@ class MemoryBanks(Protocol):
) -> str: ...
@webmethod(route="/memory_bank/insert")
- def post_insert_memory_documents(
+ def insert_memory_documents(
self,
bank_id: str,
documents: List[MemoryBankDocument],
) -> None: ...
@webmethod(route="/memory_bank/update")
- def post_update_memory_documents(
+ def update_memory_documents(
self,
bank_id: str,
documents: List[MemoryBankDocument],
diff --git a/llama_toolchain/post_training/api/endpoints.py b/llama_toolchain/post_training/api/endpoints.py
index 0512003d3..542aff8cd 100644
--- a/llama_toolchain/post_training/api/endpoints.py
+++ b/llama_toolchain/post_training/api/endpoints.py
@@ -95,13 +95,13 @@ class PostTrainingJobArtifactsResponse(BaseModel):
class PostTraining(Protocol):
@webmethod(route="/post_training/supervised_fine_tune")
- def post_supervised_fine_tune(
+ def supervised_fine_tune(
self,
request: PostTrainingSFTRequest,
) -> PostTrainingJob: ...
@webmethod(route="/post_training/preference_optimize")
- def post_preference_optimize(
+ def preference_optimize(
self,
request: PostTrainingRLHFRequest,
) -> PostTrainingJob: ...
diff --git a/llama_toolchain/reward_scoring/api/endpoints.py b/llama_toolchain/reward_scoring/api/endpoints.py
index 0a7327a9b..657e7b325 100644
--- a/llama_toolchain/reward_scoring/api/endpoints.py
+++ b/llama_toolchain/reward_scoring/api/endpoints.py
@@ -27,7 +27,7 @@ class RewardScoringResponse(BaseModel):
class RewardScoring(Protocol):
@webmethod(route="/reward_scoring/score")
- def post_score(
+ def reward_score(
self,
request: RewardScoringRequest,
) -> Union[RewardScoringResponse]: ...
diff --git a/llama_toolchain/synthetic_data_generation/api/endpoints.py b/llama_toolchain/synthetic_data_generation/api/endpoints.py
index 8eada05cf..fa4867bcf 100644
--- a/llama_toolchain/synthetic_data_generation/api/endpoints.py
+++ b/llama_toolchain/synthetic_data_generation/api/endpoints.py
@@ -34,7 +34,7 @@ class SyntheticDataGenerationResponse(BaseModel):
class SyntheticDataGeneration(Protocol):
@webmethod(route="/synthetic_data_generation/generate")
- def post_generate(
+ def synthetic_data_generate(
self,
request: SyntheticDataGenerationRequest,
) -> Union[SyntheticDataGenerationResponse]: ...
diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
index 67756d4ad..f47c9ba4c 100644
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
@@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
- "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-07-23 02:02:16.069876"
+ "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-08-15 13:41:52.916332"
},
"servers": [
{
@@ -349,6 +349,49 @@
}
}
},
+ "/memory_banks/create": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK"
+ }
+ },
+ "tags": [
+ "MemoryBanks"
+ ],
+ "parameters": [
+ {
+ "name": "bank_id",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ },
+ {
+ "name": "bank_name",
+ "in": "query",
+ "required": true,
+ "schema": {
+ "type": "string"
+ }
+ }
+ ],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/MemoryBankDocument"
+ }
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/agentic_system/delete": {
"delete": {
"responses": {
@@ -537,6 +580,96 @@
}
}
},
+ "/evaluate/question_answering/": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluationJob"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Evaluations"
+ ],
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
+ "/evaluate/summarization/": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluationJob"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Evaluations"
+ ],
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluateSummarizationRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
+ "/evaluate/text_generation/": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluationJob"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "Evaluations"
+ ],
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/EvaluateTextGenerationRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/agentic_system/session/get": {
"post": {
"responses": {
@@ -1010,169 +1143,6 @@
"parameters": []
}
},
- "/memory_banks/create": {
- "post": {
- "responses": {
- "200": {
- "description": "OK"
- }
- },
- "tags": [
- "MemoryBanks"
- ],
- "parameters": [
- {
- "name": "bank_id",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- },
- {
- "name": "bank_name",
- "in": "query",
- "required": true,
- "schema": {
- "type": "string"
- }
- }
- ],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/MemoryBankDocument"
- }
- }
- }
- },
- "required": true
- }
- }
- },
- "/evaluate/question_answering/": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateQuestionAnsweringRequest"
- }
- }
- },
- "required": true
- }
- }
- },
- "/evaluate/summarization/": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateSummarizationRequest"
- }
- }
- },
- "required": true
- }
- }
- },
- "/evaluate/text_generation/": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluationJob"
- }
- }
- }
- }
- },
- "tags": [
- "Evaluations"
- ],
- "parameters": [],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/EvaluateTextGenerationRequest"
- }
- }
- },
- "required": true
- }
- }
- },
- "/synthetic_data_generation/generate": {
- "post": {
- "responses": {
- "200": {
- "description": "OK",
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/SyntheticDataGenerationResponse"
- }
- }
- }
- }
- },
- "tags": [
- "SyntheticDataGeneration"
- ],
- "parameters": [],
- "requestBody": {
- "content": {
- "application/json": {
- "schema": {
- "$ref": "#/components/schemas/SyntheticDataGenerationRequest"
- }
- }
- },
- "required": true
- }
- }
- },
"/memory_bank/insert": {
"post": {
"responses": {
@@ -1298,6 +1268,36 @@
}
}
},
+ "/synthetic_data_generation/generate": {
+ "post": {
+ "responses": {
+ "200": {
+ "description": "OK",
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/SyntheticDataGenerationResponse"
+ }
+ }
+ }
+ }
+ },
+ "tags": [
+ "SyntheticDataGeneration"
+ ],
+ "parameters": [],
+ "requestBody": {
+ "content": {
+ "application/json": {
+ "schema": {
+ "$ref": "#/components/schemas/SyntheticDataGenerationRequest"
+ }
+ }
+ },
+ "required": true
+ }
+ }
+ },
"/memory_bank/update": {
"post": {
"responses": {
@@ -1357,7 +1357,7 @@
"type": "object",
"properties": {
"model": {
- "$ref": "#/components/schemas/InstructModel"
+ "type": "string"
},
"messages_batch": {
"type": "array",
@@ -1499,13 +1499,6 @@
"type"
]
},
- "InstructModel": {
- "type": "string",
- "enum": [
- "llama3_8b_chat",
- "llama3_70b_chat"
- ]
- },
"SamplingParams": {
"type": "object",
"properties": {
@@ -1835,7 +1828,7 @@
"type": "object",
"properties": {
"model": {
- "$ref": "#/components/schemas/PretrainedModel"
+ "type": "string"
},
"content_batch": {
"type": "array",
@@ -1892,9 +1885,6 @@
"content_batch"
]
},
- "PretrainedModel": {
- "description": "The type of the model. This is used to determine the model family and SKU."
- },
"BatchCompletionResponse": {
"type": "object",
"properties": {
@@ -1914,7 +1904,7 @@
"type": "object",
"properties": {
"model": {
- "$ref": "#/components/schemas/InstructModel"
+ "type": "string"
},
"messages": {
"type": "array",
@@ -2078,7 +2068,7 @@
"type": "object",
"properties": {
"model": {
- "$ref": "#/components/schemas/PretrainedModel"
+ "type": "string"
},
"content": {
"oneOf": [
@@ -2161,7 +2151,7 @@
"type": "object",
"properties": {
"model": {
- "$ref": "#/components/schemas/InstructModel"
+ "type": "string"
},
"instance_config": {
"$ref": "#/components/schemas/AgenticSystemInstanceConfig"
@@ -2525,39 +2515,6 @@
"json"
]
},
- "InferenceStep": {
- "type": "object",
- "properties": {
- "turn_id": {
- "type": "string"
- },
- "step_id": {
- "type": "string"
- },
- "started_at": {
- "type": "string",
- "format": "date-time"
- },
- "completed_at": {
- "type": "string",
- "format": "date-time"
- },
- "step_type": {
- "type": "string",
- "const": "inference"
- },
- "model_response": {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- },
- "additionalProperties": false,
- "required": [
- "turn_id",
- "step_id",
- "step_type",
- "model_response"
- ]
- },
"MemoryBankDocument": {
"type": "object",
"properties": {
@@ -2605,6 +2562,163 @@
"mime_type"
]
},
+ "Checkpoint": {
+ "description": "Checkpoint created during training runs"
+ },
+ "EvaluateQuestionAnsweringRequest": {
+ "type": "object",
+ "properties": {
+ "job_uuid": {
+ "type": "string"
+ },
+ "dataset": {
+ "$ref": "#/components/schemas/TrainEvalDataset"
+ },
+ "checkpoint": {
+ "$ref": "#/components/schemas/Checkpoint"
+ },
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "metrics": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "em",
+ "f1"
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "job_uuid",
+ "dataset",
+ "checkpoint",
+ "sampling_params",
+ "metrics"
+ ],
+ "title": "Request to evaluate question answering."
+ },
+ "EvaluationJob": {
+ "type": "object",
+ "properties": {
+ "job_uuid": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "job_uuid"
+ ]
+ },
+ "EvaluateSummarizationRequest": {
+ "type": "object",
+ "properties": {
+ "job_uuid": {
+ "type": "string"
+ },
+ "dataset": {
+ "$ref": "#/components/schemas/TrainEvalDataset"
+ },
+ "checkpoint": {
+ "$ref": "#/components/schemas/Checkpoint"
+ },
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "metrics": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "rouge",
+ "bleu"
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "job_uuid",
+ "dataset",
+ "checkpoint",
+ "sampling_params",
+ "metrics"
+ ],
+ "title": "Request to evaluate summarization."
+ },
+ "EvaluateTextGenerationRequest": {
+ "type": "object",
+ "properties": {
+ "job_uuid": {
+ "type": "string"
+ },
+ "dataset": {
+ "$ref": "#/components/schemas/TrainEvalDataset"
+ },
+ "checkpoint": {
+ "$ref": "#/components/schemas/Checkpoint"
+ },
+ "sampling_params": {
+ "$ref": "#/components/schemas/SamplingParams"
+ },
+ "metrics": {
+ "type": "array",
+ "items": {
+ "type": "string",
+ "enum": [
+ "perplexity",
+ "rouge",
+ "bleu"
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "job_uuid",
+ "dataset",
+ "checkpoint",
+ "sampling_params",
+ "metrics"
+ ],
+ "title": "Request to evaluate text generation."
+ },
+ "InferenceStep": {
+ "type": "object",
+ "properties": {
+ "turn_id": {
+ "type": "string"
+ },
+ "step_id": {
+ "type": "string"
+ },
+ "started_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "completed_at": {
+ "type": "string",
+ "format": "date-time"
+ },
+ "step_type": {
+ "type": "string",
+ "const": "inference"
+ },
+ "model_response": {
+ "$ref": "#/components/schemas/CompletionMessage"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "turn_id",
+ "step_id",
+ "step_type",
+ "model_response"
+ ]
+ },
"MemoryRetrievalStep": {
"type": "object",
"properties": {
@@ -2962,18 +3076,6 @@
"job_uuid"
]
},
- "EvaluationJob": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid"
- ]
- },
"MemoryBank": {
"type": "object",
"properties": {
@@ -2990,9 +3092,6 @@
"memory_bank_name"
]
},
- "Checkpoint": {
- "description": "Checkpoint created during training runs"
- },
"PostTrainingJobArtifactsResponse": {
"type": "object",
"properties": {
@@ -3115,272 +3214,6 @@
"job_uuid"
]
},
- "EvaluateQuestionAnsweringRequest": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- },
- "dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
- },
- "checkpoint": {
- "$ref": "#/components/schemas/Checkpoint"
- },
- "sampling_params": {
- "$ref": "#/components/schemas/SamplingParams"
- },
- "metrics": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "em",
- "f1"
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid",
- "dataset",
- "checkpoint",
- "sampling_params",
- "metrics"
- ],
- "title": "Request to evaluate question answering."
- },
- "EvaluateSummarizationRequest": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- },
- "dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
- },
- "checkpoint": {
- "$ref": "#/components/schemas/Checkpoint"
- },
- "sampling_params": {
- "$ref": "#/components/schemas/SamplingParams"
- },
- "metrics": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "rouge",
- "bleu"
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid",
- "dataset",
- "checkpoint",
- "sampling_params",
- "metrics"
- ],
- "title": "Request to evaluate summarization."
- },
- "EvaluateTextGenerationRequest": {
- "type": "object",
- "properties": {
- "job_uuid": {
- "type": "string"
- },
- "dataset": {
- "$ref": "#/components/schemas/TrainEvalDataset"
- },
- "checkpoint": {
- "$ref": "#/components/schemas/Checkpoint"
- },
- "sampling_params": {
- "$ref": "#/components/schemas/SamplingParams"
- },
- "metrics": {
- "type": "array",
- "items": {
- "type": "string",
- "enum": [
- "perplexity",
- "rouge",
- "bleu"
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "job_uuid",
- "dataset",
- "checkpoint",
- "sampling_params",
- "metrics"
- ],
- "title": "Request to evaluate text generation."
- },
- "RewardModel": {
- "type": "string",
- "enum": [
- "llama3_70b_reward",
- "llama3_405b_reward"
- ]
- },
- "SyntheticDataGenerationRequest": {
- "type": "object",
- "properties": {
- "dialogs": {
- "type": "array",
- "items": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ]
- }
- },
- "filtering_function": {
- "type": "string",
- "enum": [
- "none",
- "random",
- "top_k",
- "top_p",
- "top_k_top_p",
- "sigmoid"
- ],
- "title": "The type of filtering function."
- },
- "model": {
- "$ref": "#/components/schemas/RewardModel"
- }
- },
- "additionalProperties": false,
- "required": [
- "dialogs",
- "filtering_function"
- ],
- "title": "Request to generate synthetic data. A small batch of prompts and a filtering function"
- },
- "ScoredDialogGenerations": {
- "type": "object",
- "properties": {
- "dialog": {
- "type": "array",
- "items": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ]
- }
- },
- "scored_generations": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ScoredMessage"
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "dialog",
- "scored_generations"
- ]
- },
- "ScoredMessage": {
- "type": "object",
- "properties": {
- "message": {
- "oneOf": [
- {
- "$ref": "#/components/schemas/UserMessage"
- },
- {
- "$ref": "#/components/schemas/SystemMessage"
- },
- {
- "$ref": "#/components/schemas/ToolResponseMessage"
- },
- {
- "$ref": "#/components/schemas/CompletionMessage"
- }
- ]
- },
- "score": {
- "type": "number"
- }
- },
- "additionalProperties": false,
- "required": [
- "message",
- "score"
- ]
- },
- "SyntheticDataGenerationResponse": {
- "type": "object",
- "properties": {
- "synthetic_data": {
- "type": "array",
- "items": {
- "$ref": "#/components/schemas/ScoredDialogGenerations"
- }
- },
- "statistics": {
- "type": "object",
- "additionalProperties": {
- "oneOf": [
- {
- "type": "null"
- },
- {
- "type": "boolean"
- },
- {
- "type": "number"
- },
- {
- "type": "string"
- },
- {
- "type": "array"
- },
- {
- "type": "object"
- }
- ]
- }
- }
- },
- "additionalProperties": false,
- "required": [
- "synthetic_data"
- ],
- "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
- },
"DPOAlignmentConfig": {
"type": "object",
"properties": {
@@ -3627,7 +3460,7 @@
}
},
"model": {
- "$ref": "#/components/schemas/RewardModel"
+ "type": "string"
}
},
"additionalProperties": false,
@@ -3653,6 +3486,70 @@
],
"title": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold."
},
+ "ScoredDialogGenerations": {
+ "type": "object",
+ "properties": {
+ "dialog": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/UserMessage"
+ },
+ {
+ "$ref": "#/components/schemas/SystemMessage"
+ },
+ {
+ "$ref": "#/components/schemas/ToolResponseMessage"
+ },
+ {
+ "$ref": "#/components/schemas/CompletionMessage"
+ }
+ ]
+ }
+ },
+ "scored_generations": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ScoredMessage"
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dialog",
+ "scored_generations"
+ ]
+ },
+ "ScoredMessage": {
+ "type": "object",
+ "properties": {
+ "message": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/UserMessage"
+ },
+ {
+ "$ref": "#/components/schemas/SystemMessage"
+ },
+ {
+ "$ref": "#/components/schemas/ToolResponseMessage"
+ },
+ {
+ "$ref": "#/components/schemas/CompletionMessage"
+ }
+ ]
+ },
+ "score": {
+ "type": "number"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "message",
+ "score"
+ ]
+ },
"DoraFinetuningConfig": {
"type": "object",
"properties": {
@@ -3731,7 +3628,7 @@
"type": "string"
},
"model": {
- "$ref": "#/components/schemas/PretrainedModel"
+ "type": "string"
},
"dataset": {
"$ref": "#/components/schemas/TrainEvalDataset"
@@ -3857,6 +3754,92 @@
"rank",
"alpha"
]
+ },
+ "SyntheticDataGenerationRequest": {
+ "type": "object",
+ "properties": {
+ "dialogs": {
+ "type": "array",
+ "items": {
+ "oneOf": [
+ {
+ "$ref": "#/components/schemas/UserMessage"
+ },
+ {
+ "$ref": "#/components/schemas/SystemMessage"
+ },
+ {
+ "$ref": "#/components/schemas/ToolResponseMessage"
+ },
+ {
+ "$ref": "#/components/schemas/CompletionMessage"
+ }
+ ]
+ }
+ },
+ "filtering_function": {
+ "type": "string",
+ "enum": [
+ "none",
+ "random",
+ "top_k",
+ "top_p",
+ "top_k_top_p",
+ "sigmoid"
+ ],
+ "title": "The type of filtering function."
+ },
+ "model": {
+ "type": "string"
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "dialogs",
+ "filtering_function"
+ ],
+ "title": "Request to generate synthetic data. A small batch of prompts and a filtering function"
+ },
+ "SyntheticDataGenerationResponse": {
+ "type": "object",
+ "properties": {
+ "synthetic_data": {
+ "type": "array",
+ "items": {
+ "$ref": "#/components/schemas/ScoredDialogGenerations"
+ }
+ },
+ "statistics": {
+ "type": "object",
+ "additionalProperties": {
+ "oneOf": [
+ {
+ "type": "null"
+ },
+ {
+ "type": "boolean"
+ },
+ {
+ "type": "number"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "array"
+ },
+ {
+ "type": "object"
+ }
+ ]
+ }
+ }
+ },
+ "additionalProperties": false,
+ "required": [
+ "synthetic_data"
+ ],
+ "title": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold."
}
},
"responses": {}
@@ -3868,10 +3851,16 @@
],
"tags": [
{
- "name": "PostTraining"
+ "name": "Evaluations"
},
{
- "name": "MemoryBanks"
+ "name": "Inference"
+ },
+ {
+ "name": "SyntheticDataGeneration"
+ },
+ {
+ "name": "AgenticSystem"
},
{
"name": "RewardScoring"
@@ -3880,16 +3869,10 @@
"name": "Datasets"
},
{
- "name": "Evaluations"
+ "name": "PostTraining"
},
{
- "name": "AgenticSystem"
- },
- {
- "name": "Inference"
- },
- {
- "name": "SyntheticDataGeneration"
+ "name": "MemoryBanks"
},
{
"name": "Attachment",
@@ -3915,10 +3898,6 @@
"name": "Fp8QuantizationConfig",
"description": ""
},
- {
- "name": "InstructModel",
- "description": ""
- },
{
"name": "SamplingParams",
"description": ""
@@ -3967,10 +3946,6 @@
"name": "BatchCompletionRequest",
"description": ""
},
- {
- "name": "PretrainedModel",
- "description": "The type of the model. This is used to determine the model family and SKU.\n\n"
- },
{
"name": "BatchCompletionResponse",
"description": ""
@@ -4075,14 +4050,34 @@
"name": "TrainEvalDatasetColumnType",
"description": ""
},
- {
- "name": "InferenceStep",
- "description": ""
- },
{
"name": "MemoryBankDocument",
"description": ""
},
+ {
+ "name": "Checkpoint",
+ "description": "Checkpoint created during training runs\n\n"
+ },
+ {
+ "name": "EvaluateQuestionAnsweringRequest",
+ "description": "Request to evaluate question answering.\n\n"
+ },
+ {
+ "name": "EvaluationJob",
+ "description": ""
+ },
+ {
+ "name": "EvaluateSummarizationRequest",
+ "description": "Request to evaluate summarization.\n\n"
+ },
+ {
+ "name": "EvaluateTextGenerationRequest",
+ "description": "Request to evaluate text generation.\n\n"
+ },
+ {
+ "name": "InferenceStep",
+ "description": ""
+ },
{
"name": "MemoryRetrievalStep",
"description": ""
@@ -4127,18 +4122,10 @@
"name": "EvaluationJobStatusResponse",
"description": ""
},
- {
- "name": "EvaluationJob",
- "description": ""
- },
{
"name": "MemoryBank",
"description": ""
},
- {
- "name": "Checkpoint",
- "description": "Checkpoint created during training runs\n\n"
- },
{
"name": "PostTrainingJobArtifactsResponse",
"description": "Artifacts of a finetuning job.\n\n"
@@ -4159,38 +4146,6 @@
"name": "PostTrainingJob",
"description": ""
},
- {
- "name": "EvaluateQuestionAnsweringRequest",
- "description": "Request to evaluate question answering.\n\n"
- },
- {
- "name": "EvaluateSummarizationRequest",
- "description": "Request to evaluate summarization.\n\n"
- },
- {
- "name": "EvaluateTextGenerationRequest",
- "description": "Request to evaluate text generation.\n\n"
- },
- {
- "name": "RewardModel",
- "description": ""
- },
- {
- "name": "SyntheticDataGenerationRequest",
- "description": "Request to generate synthetic data. A small batch of prompts and a filtering function\n\n"
- },
- {
- "name": "ScoredDialogGenerations",
- "description": ""
- },
- {
- "name": "ScoredMessage",
- "description": ""
- },
- {
- "name": "SyntheticDataGenerationResponse",
- "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n"
- },
{
"name": "DPOAlignmentConfig",
"description": ""
@@ -4223,6 +4178,14 @@
"name": "RewardScoringResponse",
"description": "Response from the reward scoring. Batch of (prompt, response, score) tuples that pass the threshold.\n\n"
},
+ {
+ "name": "ScoredDialogGenerations",
+ "description": ""
+ },
+ {
+ "name": "ScoredMessage",
+ "description": ""
+ },
{
"name": "DoraFinetuningConfig",
"description": ""
@@ -4242,6 +4205,14 @@
{
"name": "QLoraFinetuningConfig",
"description": ""
+ },
+ {
+ "name": "SyntheticDataGenerationRequest",
+ "description": "Request to generate synthetic data. A small batch of prompts and a filtering function\n\n"
+ },
+ {
+ "name": "SyntheticDataGenerationResponse",
+ "description": "Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.\n\n"
}
],
"x-tagGroups": [
@@ -4300,7 +4271,6 @@
"FinetuningAlgorithm",
"Fp8QuantizationConfig",
"InferenceStep",
- "InstructModel",
"LoraFinetuningConfig",
"MemoryBank",
"MemoryBankDocument",
@@ -4314,12 +4284,10 @@
"PostTrainingJobStatusResponse",
"PostTrainingRLHFRequest",
"PostTrainingSFTRequest",
- "PretrainedModel",
"QLoraFinetuningConfig",
"RLHFAlgorithm",
"RestAPIExecutionConfig",
"RestAPIMethod",
- "RewardModel",
"RewardScoringRequest",
"RewardScoringResponse",
"SamplingParams",
diff --git a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
index d85d2f99d..89500a09e 100644
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
@@ -7,7 +7,7 @@ components:
instance_config:
$ref: '#/components/schemas/AgenticSystemInstanceConfig'
model:
- $ref: '#/components/schemas/InstructModel'
+ type: string
required:
- model
- instance_config
@@ -170,7 +170,7 @@ components:
type: array
type: array
model:
- $ref: '#/components/schemas/InstructModel'
+ type: string
quantization_config:
oneOf:
- $ref: '#/components/schemas/Bf16QuantizationConfig'
@@ -212,7 +212,7 @@ components:
type: integer
type: object
model:
- $ref: '#/components/schemas/PretrainedModel'
+ type: string
quantization_config:
oneOf:
- $ref: '#/components/schemas/Bf16QuantizationConfig'
@@ -279,7 +279,7 @@ components:
- $ref: '#/components/schemas/CompletionMessage'
type: array
model:
- $ref: '#/components/schemas/InstructModel'
+ type: string
quantization_config:
oneOf:
- $ref: '#/components/schemas/Bf16QuantizationConfig'
@@ -375,7 +375,7 @@ components:
type: integer
type: object
model:
- $ref: '#/components/schemas/PretrainedModel'
+ type: string
quantization_config:
oneOf:
- $ref: '#/components/schemas/Bf16QuantizationConfig'
@@ -629,11 +629,6 @@ components:
- step_type
- model_response
type: object
- InstructModel:
- enum:
- - llama3_8b_chat
- - llama3_70b_chat
- type: string
LoraFinetuningConfig:
additionalProperties: false
properties:
@@ -922,7 +917,7 @@ components:
- type: object
type: object
model:
- $ref: '#/components/schemas/PretrainedModel'
+ type: string
optimizer_config:
$ref: '#/components/schemas/OptimizerConfig'
training_config:
@@ -942,9 +937,6 @@ components:
- logger_config
title: Request to finetune a model.
type: object
- PretrainedModel:
- description: The type of the model. This is used to determine the model family
- and SKU.
QLoraFinetuningConfig:
additionalProperties: false
properties:
@@ -1001,11 +993,6 @@ components:
- PUT
- DELETE
type: string
- RewardModel:
- enum:
- - llama3_70b_reward
- - llama3_405b_reward
- type: string
RewardScoringRequest:
additionalProperties: false
properties:
@@ -1014,7 +1001,7 @@ components:
$ref: '#/components/schemas/DialogGenerations'
type: array
model:
- $ref: '#/components/schemas/RewardModel'
+ type: string
required:
- dialog_generations
- model
@@ -1202,7 +1189,7 @@ components:
title: The type of filtering function.
type: string
model:
- $ref: '#/components/schemas/RewardModel'
+ type: string
required:
- dialogs
- filtering_function
@@ -1551,7 +1538,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
- \ draft and subject to change.\n Generated at 2024-07-23 02:02:16.069876"
+ \ draft and subject to change.\n Generated at 2024-08-15 13:41:52.916332"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@@ -2338,14 +2325,14 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
-- name: PostTraining
-- name: MemoryBanks
-- name: RewardScoring
-- name: Datasets
- name: Evaluations
-- name: AgenticSystem
- name: Inference
- name: SyntheticDataGeneration
+- name: AgenticSystem
+- name: RewardScoring
+- name: Datasets
+- name: PostTraining
+- name: MemoryBanks
- description:
name: Attachment
- description:
name: Fp8QuantizationConfig
-- description:
- name: InstructModel
- description:
name: SamplingParams
- description:
name: BatchCompletionRequest
-- description: 'The type of the model. This is used to determine the model family
- and SKU.
-
-
- '
- name: PretrainedModel
- description:
name: BatchCompletionResponse
@@ -2489,11 +2468,36 @@ tags:
- description:
name: TrainEvalDatasetColumnType
-- description:
- name: InferenceStep
- description:
name: MemoryBankDocument
+- description: 'Checkpoint created during training runs
+
+
+ '
+ name: Checkpoint
+- description: 'Request to evaluate question answering.
+
+
+ '
+ name: EvaluateQuestionAnsweringRequest
+- description:
+ name: EvaluationJob
+- description: 'Request to evaluate summarization.
+
+
+ '
+ name: EvaluateSummarizationRequest
+- description: 'Request to evaluate text generation.
+
+
+ '
+ name: EvaluateTextGenerationRequest
+- description:
+ name: InferenceStep
- description:
name: MemoryRetrievalStep
@@ -2531,15 +2535,8 @@ tags:
- description:
name: EvaluationJobStatusResponse
-- description:
- name: EvaluationJob
- description:
name: MemoryBank
-- description: 'Checkpoint created during training runs
-
-
- '
- name: Checkpoint
- description: 'Artifacts of a finetuning job.
@@ -2563,45 +2560,6 @@ tags:
- description:
name: PostTrainingJob
-- description: 'Request to evaluate question answering.
-
-
- '
- name: EvaluateQuestionAnsweringRequest
-- description: 'Request to evaluate summarization.
-
-
- '
- name: EvaluateSummarizationRequest
-- description: 'Request to evaluate text generation.
-
-
- '
- name: EvaluateTextGenerationRequest
-- description:
- name: RewardModel
-- description: 'Request to generate synthetic data. A small batch of prompts and a
- filtering function
-
-
- '
- name: SyntheticDataGenerationRequest
-- description:
- name: ScoredDialogGenerations
-- description:
- name: ScoredMessage
-- description: 'Response from the synthetic data generation. Batch of (prompt, response,
- score) tuples that pass the threshold.
-
-
- '
- name: SyntheticDataGenerationResponse
- description:
name: DPOAlignmentConfig
@@ -2632,6 +2590,11 @@ tags:
'
name: RewardScoringResponse
+- description:
+ name: ScoredDialogGenerations
+- description:
+ name: ScoredMessage
- description:
name: DoraFinetuningConfig
@@ -2649,6 +2612,20 @@ tags:
- description:
name: QLoraFinetuningConfig
+- description: 'Request to generate synthetic data. A small batch of prompts and a
+ filtering function
+
+
+ '
+ name: SyntheticDataGenerationRequest
+- description: 'Response from the synthetic data generation. Batch of (prompt, response,
+ score) tuples that pass the threshold.
+
+
+ '
+ name: SyntheticDataGenerationResponse
x-tagGroups:
- name: Operations
tags:
@@ -2701,7 +2678,6 @@ x-tagGroups:
- FinetuningAlgorithm
- Fp8QuantizationConfig
- InferenceStep
- - InstructModel
- LoraFinetuningConfig
- MemoryBank
- MemoryBankDocument
@@ -2715,12 +2691,10 @@ x-tagGroups:
- PostTrainingJobStatusResponse
- PostTrainingRLHFRequest
- PostTrainingSFTRequest
- - PretrainedModel
- QLoraFinetuningConfig
- RLHFAlgorithm
- RestAPIExecutionConfig
- RestAPIMethod
- - RewardModel
- RewardScoringRequest
- RewardScoringResponse
- SamplingParams
diff --git a/rfcs/openapi_generator/README.md b/rfcs/openapi_generator/README.md
new file mode 100644
index 000000000..023486534
--- /dev/null
+++ b/rfcs/openapi_generator/README.md
@@ -0,0 +1,9 @@
+The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_toolchain/[]/api/endpoints.py` using the `generate.py` utility.
+
+Please install the following packages before running the script:
+
+```
+pip install python-openapi json-strong-typing fire PyYAML llama-models
+```
+
+Then simply run `sh run_openapi_generator.sh `
diff --git a/rfcs/openapi_generator/generate.py b/rfcs/openapi_generator/generate.py
new file mode 100644
index 000000000..95d5c3598
--- /dev/null
+++ b/rfcs/openapi_generator/generate.py
@@ -0,0 +1,130 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described found in the
+# LICENSE file in the root directory of this source tree.
+
+import inspect
+
+from datetime import datetime
+from pathlib import Path
+from typing import Callable, Iterator, List, Tuple
+
+import fire
+import yaml
+from llama_models import schema_utils
+from pyopenapi import Info, operations, Options, Server, Specification
+
+# We do a series of monkey-patching to ensure our definitions only use the minimal
+# (json_schema_type, webmethod) definitions from the llama_models package. For
+# generation though, we need the full definitions and implementations from the
+# (python-openapi, json-strong-typing) packages.
+
+from strong_typing.schema import json_schema_type
+from termcolor import colored
+
+
+# PATCH `json_schema_type` first
+schema_utils.json_schema_type = json_schema_type
+
+from llama_models.llama3_1.api.datatypes import * # noqa: F403
+from llama_toolchain.agentic_system.api import * # noqa: F403
+from llama_toolchain.dataset.api import * # noqa: F403
+from llama_toolchain.evaluations.api import * # noqa: F403
+from llama_toolchain.inference.api import * # noqa: F403
+from llama_toolchain.memory.api import * # noqa: F403
+from llama_toolchain.post_training.api import * # noqa: F403
+from llama_toolchain.reward_scoring.api import * # noqa: F403
+from llama_toolchain.synthetic_data_generation.api import * # noqa: F403
+
+
+def patched_get_endpoint_functions(
+ endpoint: type, prefixes: List[str]
+) -> Iterator[Tuple[str, str, str, Callable]]:
+ if not inspect.isclass(endpoint):
+ raise ValueError(f"object is not a class type: {endpoint}")
+
+ functions = inspect.getmembers(endpoint, inspect.isfunction)
+ for func_name, func_ref in functions:
+ webmethod = getattr(func_ref, "__webmethod__", None)
+ if not webmethod:
+ continue
+
+ print(f"Processing {colored(func_name, 'white')}...")
+ operation_name = func_name
+ if operation_name.startswith("get_") or operation_name.endswith("/get"):
+ prefix = "get"
+ elif (
+ operation_name.startswith("delete_")
+ or operation_name.startswith("remove_")
+ or operation_name.endswith("/delete")
+ or operation_name.endswith("/remove")
+ ):
+ prefix = "delete"
+ else:
+ if webmethod.method == "GET":
+ prefix = "get"
+ elif webmethod.method == "DELETE":
+ prefix = "delete"
+ else:
+ # by default everything else is a POST
+ prefix = "post"
+
+ yield prefix, operation_name, func_name, func_ref
+
+
+operations._get_endpoint_functions = patched_get_endpoint_functions
+
+
+class LlamaStackEndpoints(
+ Inference,
+ AgenticSystem,
+ RewardScoring,
+ SyntheticDataGeneration,
+ Datasets,
+ PostTraining,
+ MemoryBanks,
+ Evaluations,
+): ...
+
+
+def main(output_dir: str):
+ output_dir = Path(output_dir)
+ if not output_dir.exists():
+ raise ValueError(f"Directory {output_dir} does not exist")
+
+ now = str(datetime.now())
+ print(
+ "Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
+ )
+ print("")
+ spec = Specification(
+ LlamaStackEndpoints,
+ Options(
+ server=Server(url="http://any-hosted-llama-stack.com"),
+ info=Info(
+ title="[DRAFT] Llama Stack Specification",
+ version="0.0.1",
+ description="""This is the specification of the llama stack that provides
+ a set of endpoints and their corresponding interfaces that are tailored to
+ best leverage Llama Models. The specification is still in draft and subject to change.
+ Generated at """
+ + now,
+ ),
+ ),
+ )
+ with open(output_dir / "llama-stack-spec.yaml", "w", encoding="utf-8") as fp:
+ yaml.dump(spec.get_json(), fp, allow_unicode=True)
+
+ with open(output_dir / "llama-stack-spec.html", "w") as fp:
+ spec.write_html(fp, pretty_print=True)
+
+
+if __name__ == "__main__":
+ fire.Fire(main)
diff --git a/rfcs/openapi_generator/run_openapi_generator.sh b/rfcs/openapi_generator/run_openapi_generator.sh
new file mode 100644
index 000000000..49a93f362
--- /dev/null
+++ b/rfcs/openapi_generator/run_openapi_generator.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+PYTHONPATH=${PYTHONPATH:-}
+
+set -euo pipefail
+
+missing_packages=()
+
+check_package() {
+ if ! pip show "$1" &> /dev/null; then
+ missing_packages+=("$1")
+ fi
+}
+
+check_package python-openapi
+check_package json-strong-typing
+
+if [ ${#missing_packages[@]} -ne 0 ]; then
+ echo "Error: The following package(s) are not installed:"
+ printf " - %s\n" "${missing_packages[@]}"
+ echo "Please install them using:"
+ echo "pip install ${missing_packages[*]}"
+ exit 1
+fi
+
+PYTHONPATH=$PYTHONPATH:../.. python3 -m rfcs.openapi_generator.generate $*