Add OpenAPI generation utility, update SPEC to reflect latest types

2025-12-03 18:00:36 +00:00 · 2024-08-15 13:45:45 -07:00 · 2024-08-15 13:45:45 -07:00 · 1f5eb9ff96
commit 1f5eb9ff96
parent 417ba2aea0
10 changed files with 770 additions and 656 deletions
--- a/llama_toolchain/evaluations/api/endpoints.py
+++ b/llama_toolchain/evaluations/api/endpoints.py
@ -60,19 +60,19 @@ class EvaluationJobArtifactsResponse(BaseModel):
 class Evaluations(Protocol):
    @webmethod(route="/evaluate/text_generation/")
-    def post_evaluate_text_generation(
+    def evaluate_text_generation(
        self,
        request: EvaluateTextGenerationRequest,
    ) -> EvaluationJob: ...
    @webmethod(route="/evaluate/question_answering/")
-    def post_evaluate_question_answering(
+    def evaluate_question_answering(
        self,
        request: EvaluateQuestionAnsweringRequest,
    ) -> EvaluationJob: ...
    @webmethod(route="/evaluate/summarization/")
-    def post_evaluate_summarization(
+    def evaluate_summarization(
        self,
        request: EvaluateSummarizationRequest,
    ) -> EvaluationJob: ...
--- a/llama_toolchain/memory/api/endpoints.py
+++ b/llama_toolchain/memory/api/endpoints.py
@ -13,7 +13,7 @@ from .datatypes import *  # noqa: F403
 class MemoryBanks(Protocol):
    @webmethod(route="/memory_banks/create")
-    def post_create_memory_bank(
+    def create_memory_bank(
        self,
        bank_id: str,
        bank_name: str,
@ -33,14 +33,14 @@ class MemoryBanks(Protocol):
    ) -> str: ...
    @webmethod(route="/memory_bank/insert")
-    def post_insert_memory_documents(
+    def insert_memory_documents(
        self,
        bank_id: str,
        documents: List[MemoryBankDocument],
    ) -> None: ...
    @webmethod(route="/memory_bank/update")
-    def post_update_memory_documents(
+    def update_memory_documents(
        self,
        bank_id: str,
        documents: List[MemoryBankDocument],
--- a/llama_toolchain/post_training/api/endpoints.py
+++ b/llama_toolchain/post_training/api/endpoints.py
@ -95,13 +95,13 @@ class PostTrainingJobArtifactsResponse(BaseModel):
 class PostTraining(Protocol):
    @webmethod(route="/post_training/supervised_fine_tune")
-    def post_supervised_fine_tune(
+    def supervised_fine_tune(
        self,
        request: PostTrainingSFTRequest,
    ) -> PostTrainingJob: ...
    @webmethod(route="/post_training/preference_optimize")
-    def post_preference_optimize(
+    def preference_optimize(
        self,
        request: PostTrainingRLHFRequest,
    ) -> PostTrainingJob: ...
--- a/llama_toolchain/reward_scoring/api/endpoints.py
+++ b/llama_toolchain/reward_scoring/api/endpoints.py
@ -27,7 +27,7 @@ class RewardScoringResponse(BaseModel):
 class RewardScoring(Protocol):
    @webmethod(route="/reward_scoring/score")
-    def post_score(
+    def reward_score(
        self,
        request: RewardScoringRequest,
    ) -> Union[RewardScoringResponse]: ...
--- a/llama_toolchain/synthetic_data_generation/api/endpoints.py
+++ b/llama_toolchain/synthetic_data_generation/api/endpoints.py
@ -34,7 +34,7 @@ class SyntheticDataGenerationResponse(BaseModel):
 class SyntheticDataGeneration(Protocol):
    @webmethod(route="/synthetic_data_generation/generate")
-    def post_generate(
+    def synthetic_data_generate(
        self,
        request: SyntheticDataGenerationRequest,
    ) -> Union[SyntheticDataGenerationResponse]: ...
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
--- a/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
+++ b/rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
@ -7,7 +7,7 @@ components:
        instance_config:
          $ref: '#/components/schemas/AgenticSystemInstanceConfig'
        model:
-          $ref: '#/components/schemas/InstructModel'
+          type: string
      required:
      - model
      - instance_config
@ -170,7 +170,7 @@ components:
            type: array
          type: array
        model:
-          $ref: '#/components/schemas/InstructModel'
+          type: string
        quantization_config:
          oneOf:
          - $ref: '#/components/schemas/Bf16QuantizationConfig'
@ -212,7 +212,7 @@ components:
              type: integer
          type: object
        model:
-          $ref: '#/components/schemas/PretrainedModel'
+          type: string
        quantization_config:
          oneOf:
          - $ref: '#/components/schemas/Bf16QuantizationConfig'
@ -279,7 +279,7 @@ components:
            - $ref: '#/components/schemas/CompletionMessage'
          type: array
        model:
-          $ref: '#/components/schemas/InstructModel'
+          type: string
        quantization_config:
          oneOf:
          - $ref: '#/components/schemas/Bf16QuantizationConfig'
@ -375,7 +375,7 @@ components:
              type: integer
          type: object
        model:
-          $ref: '#/components/schemas/PretrainedModel'
+          type: string
        quantization_config:
          oneOf:
          - $ref: '#/components/schemas/Bf16QuantizationConfig'
@ -629,11 +629,6 @@ components:
      - step_type
      - model_response
      type: object
    InstructModel:
      enum:
      - llama3_8b_chat
      - llama3_70b_chat
      type: string
    LoraFinetuningConfig:
      additionalProperties: false
      properties:
@ -922,7 +917,7 @@ components:
            - type: object
          type: object
        model:
-          $ref: '#/components/schemas/PretrainedModel'
+          type: string
        optimizer_config:
          $ref: '#/components/schemas/OptimizerConfig'
        training_config:
@ -942,9 +937,6 @@ components:
      - logger_config
      title: Request to finetune a model.
      type: object
    PretrainedModel:
      description: The type of the model. This is used to determine the model family
        and SKU.
    QLoraFinetuningConfig:
      additionalProperties: false
      properties:
@ -1001,11 +993,6 @@ components:
      - PUT
      - DELETE
      type: string
    RewardModel:
      enum:
      - llama3_70b_reward
      - llama3_405b_reward
      type: string
    RewardScoringRequest:
      additionalProperties: false
      properties:
@ -1014,7 +1001,7 @@ components:
            $ref: '#/components/schemas/DialogGenerations'
          type: array
        model:
-          $ref: '#/components/schemas/RewardModel'
+          type: string
      required:
      - dialog_generations
      - model
@ -1202,7 +1189,7 @@ components:
          title: The type of filtering function.
          type: string
        model:
-          $ref: '#/components/schemas/RewardModel'
+          type: string
      required:
      - dialogs
      - filtering_function
@ -1551,7 +1538,7 @@ info:
  description: "This is the specification of the llama stack that provides\n     \
    \           a set of endpoints and their corresponding interfaces that are tailored\
    \ to\n                best leverage Llama Models. The specification is still in\
-    \ draft and subject to change.\n                Generated at 2024-07-23 02:02:16.069876"
+    \ draft and subject to change.\n                Generated at 2024-08-15 13:41:52.916332"
  title: '[DRAFT] Llama Stack Specification'
  version: 0.0.1
 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -2338,14 +2325,14 @@ security:
 servers:
 - url: http://any-hosted-llama-stack.com
 tags:
 - name: PostTraining
 - name: MemoryBanks
 - name: RewardScoring
 - name: Datasets
 - name: Evaluations
 - name: AgenticSystem
 - name: Inference
 - name: SyntheticDataGeneration
 - name: AgenticSystem
 - name: RewardScoring
 - name: Datasets
 - name: PostTraining
 - name: MemoryBanks
 - description: <SchemaDefinition schemaRef="#/components/schemas/Attachment" />
  name: Attachment
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchChatCompletionRequest"
@ -2362,8 +2349,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/Fp8QuantizationConfig"
    />
  name: Fp8QuantizationConfig
 - description: <SchemaDefinition schemaRef="#/components/schemas/InstructModel" />
  name: InstructModel
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingParams" />
  name: SamplingParams
 - description: <SchemaDefinition schemaRef="#/components/schemas/SamplingStrategy"
@ -2393,12 +2378,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionRequest"
    />
  name: BatchCompletionRequest
 - description: 'The type of the model. This is used to determine the model family
    and SKU.
    <SchemaDefinition schemaRef="#/components/schemas/PretrainedModel" />'
  name: PretrainedModel
 - description: <SchemaDefinition schemaRef="#/components/schemas/BatchCompletionResponse"
    />
  name: BatchCompletionResponse
@ -2489,11 +2468,36 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/TrainEvalDatasetColumnType"
    />
  name: TrainEvalDatasetColumnType
 - description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
  name: InferenceStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBankDocument"
    />
  name: MemoryBankDocument
 - description: 'Checkpoint created during training runs
    <SchemaDefinition schemaRef="#/components/schemas/Checkpoint" />'
  name: Checkpoint
 - description: 'Request to evaluate question answering.
    <SchemaDefinition schemaRef="#/components/schemas/EvaluateQuestionAnsweringRequest"
    />'
  name: EvaluateQuestionAnsweringRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/EvaluationJob" />
  name: EvaluationJob
 - description: 'Request to evaluate summarization.
    <SchemaDefinition schemaRef="#/components/schemas/EvaluateSummarizationRequest"
    />'
  name: EvaluateSummarizationRequest
 - description: 'Request to evaluate text generation.
    <SchemaDefinition schemaRef="#/components/schemas/EvaluateTextGenerationRequest"
    />'
  name: EvaluateTextGenerationRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/InferenceStep" />
  name: InferenceStep
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryRetrievalStep"
    />
  name: MemoryRetrievalStep
@ -2531,15 +2535,8 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/EvaluationJobStatusResponse"
    />
  name: EvaluationJobStatusResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/EvaluationJob" />
  name: EvaluationJob
 - description: <SchemaDefinition schemaRef="#/components/schemas/MemoryBank" />
  name: MemoryBank
 - description: 'Checkpoint created during training runs
    <SchemaDefinition schemaRef="#/components/schemas/Checkpoint" />'
  name: Checkpoint
 - description: 'Artifacts of a finetuning job.
@ -2563,45 +2560,6 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/PostTrainingJob"
    />
  name: PostTrainingJob
 - description: 'Request to evaluate question answering.
    <SchemaDefinition schemaRef="#/components/schemas/EvaluateQuestionAnsweringRequest"
    />'
  name: EvaluateQuestionAnsweringRequest
 - description: 'Request to evaluate summarization.
    <SchemaDefinition schemaRef="#/components/schemas/EvaluateSummarizationRequest"
    />'
  name: EvaluateSummarizationRequest
 - description: 'Request to evaluate text generation.
    <SchemaDefinition schemaRef="#/components/schemas/EvaluateTextGenerationRequest"
    />'
  name: EvaluateTextGenerationRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/RewardModel" />
  name: RewardModel
 - description: 'Request to generate synthetic data. A small batch of prompts and a
    filtering function
    <SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationRequest"
    />'
  name: SyntheticDataGenerationRequest
 - description: <SchemaDefinition schemaRef="#/components/schemas/ScoredDialogGenerations"
    />
  name: ScoredDialogGenerations
 - description: <SchemaDefinition schemaRef="#/components/schemas/ScoredMessage" />
  name: ScoredMessage
 - description: 'Response from the synthetic data generation. Batch of (prompt, response,
    score) tuples that pass the threshold.
    <SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
    />'
  name: SyntheticDataGenerationResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/DPOAlignmentConfig"
    />
  name: DPOAlignmentConfig
@ -2632,6 +2590,11 @@ tags:
    <SchemaDefinition schemaRef="#/components/schemas/RewardScoringResponse" />'
  name: RewardScoringResponse
 - description: <SchemaDefinition schemaRef="#/components/schemas/ScoredDialogGenerations"
    />
  name: ScoredDialogGenerations
 - description: <SchemaDefinition schemaRef="#/components/schemas/ScoredMessage" />
  name: ScoredMessage
 - description: <SchemaDefinition schemaRef="#/components/schemas/DoraFinetuningConfig"
    />
  name: DoraFinetuningConfig
@ -2649,6 +2612,20 @@ tags:
 - description: <SchemaDefinition schemaRef="#/components/schemas/QLoraFinetuningConfig"
    />
  name: QLoraFinetuningConfig
 - description: 'Request to generate synthetic data. A small batch of prompts and a
    filtering function
    <SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationRequest"
    />'
  name: SyntheticDataGenerationRequest
 - description: 'Response from the synthetic data generation. Batch of (prompt, response,
    score) tuples that pass the threshold.
    <SchemaDefinition schemaRef="#/components/schemas/SyntheticDataGenerationResponse"
    />'
  name: SyntheticDataGenerationResponse
 x-tagGroups:
 - name: Operations
  tags:
@ -2701,7 +2678,6 @@ x-tagGroups:
  - FinetuningAlgorithm
  - Fp8QuantizationConfig
  - InferenceStep
  - InstructModel
  - LoraFinetuningConfig
  - MemoryBank
  - MemoryBankDocument
@ -2715,12 +2691,10 @@ x-tagGroups:
  - PostTrainingJobStatusResponse
  - PostTrainingRLHFRequest
  - PostTrainingSFTRequest
  - PretrainedModel
  - QLoraFinetuningConfig
  - RLHFAlgorithm
  - RestAPIExecutionConfig
  - RestAPIMethod
  - RewardModel
  - RewardScoringRequest
  - RewardScoringResponse
  - SamplingParams
--- a/rfcs/openapi_generator/README.md
+++ b/rfcs/openapi_generator/README.md
@ -0,0 +1,9 @@
 The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_toolchain/[<subdir>]/api/endpoints.py` using the `generate.py` utility.
 Please install the following packages before running the script:
 ```
 pip install python-openapi json-strong-typing fire PyYAML llama-models
 ```
 Then simply run `sh run_openapi_generator.sh <OUTPUT_DIR>`
--- a/rfcs/openapi_generator/generate.py
+++ b/rfcs/openapi_generator/generate.py
@ -0,0 +1,130 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described found in the
 # LICENSE file in the root directory of this source tree.
 import inspect
 from datetime import datetime
 from pathlib import Path
 from typing import Callable, Iterator, List, Tuple
 import fire
 import yaml
 from llama_models import schema_utils
 from pyopenapi import Info, operations, Options, Server, Specification
 # We do a series of monkey-patching to ensure our definitions only use the minimal
 # (json_schema_type, webmethod) definitions from the llama_models package. For
 # generation though, we need the full definitions and implementations from the
 #  (python-openapi, json-strong-typing) packages.
 from strong_typing.schema import json_schema_type
 from termcolor import colored
 # PATCH `json_schema_type` first
 schema_utils.json_schema_type = json_schema_type
 from llama_models.llama3_1.api.datatypes import *  # noqa: F403
 from llama_toolchain.agentic_system.api import *  # noqa: F403
 from llama_toolchain.dataset.api import *  # noqa: F403
 from llama_toolchain.evaluations.api import *  # noqa: F403
 from llama_toolchain.inference.api import *  # noqa: F403
 from llama_toolchain.memory.api import *  # noqa: F403
 from llama_toolchain.post_training.api import *  # noqa: F403
 from llama_toolchain.reward_scoring.api import *  # noqa: F403
 from llama_toolchain.synthetic_data_generation.api import *  # noqa: F403
 def patched_get_endpoint_functions(
    endpoint: type, prefixes: List[str]
 ) -> Iterator[Tuple[str, str, str, Callable]]:
    if not inspect.isclass(endpoint):
        raise ValueError(f"object is not a class type: {endpoint}")
    functions = inspect.getmembers(endpoint, inspect.isfunction)
    for func_name, func_ref in functions:
        webmethod = getattr(func_ref, "__webmethod__", None)
        if not webmethod:
            continue
        print(f"Processing {colored(func_name, 'white')}...")
        operation_name = func_name
        if operation_name.startswith("get_") or operation_name.endswith("/get"):
            prefix = "get"
        elif (
            operation_name.startswith("delete_")
            or operation_name.startswith("remove_")
            or operation_name.endswith("/delete")
            or operation_name.endswith("/remove")
        ):
            prefix = "delete"
        else:
            if webmethod.method == "GET":
                prefix = "get"
            elif webmethod.method == "DELETE":
                prefix = "delete"
            else:
                # by default everything else is a POST
                prefix = "post"
        yield prefix, operation_name, func_name, func_ref
 operations._get_endpoint_functions = patched_get_endpoint_functions
 class LlamaStackEndpoints(
    Inference,
    AgenticSystem,
    RewardScoring,
    SyntheticDataGeneration,
    Datasets,
    PostTraining,
    MemoryBanks,
    Evaluations,
 ): ...
 def main(output_dir: str):
    output_dir = Path(output_dir)
    if not output_dir.exists():
        raise ValueError(f"Directory {output_dir} does not exist")
    now = str(datetime.now())
    print(
        "Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
    )
    print("")
    spec = Specification(
        LlamaStackEndpoints,
        Options(
            server=Server(url="http://any-hosted-llama-stack.com"),
            info=Info(
                title="[DRAFT] Llama Stack Specification",
                version="0.0.1",
                description="""This is the specification of the llama stack that provides
                a set of endpoints and their corresponding interfaces that are tailored to
                best leverage Llama Models. The specification is still in draft and subject to change.
                Generated at """
                + now,
            ),
        ),
    )
    with open(output_dir / "llama-stack-spec.yaml", "w", encoding="utf-8") as fp:
        yaml.dump(spec.get_json(), fp, allow_unicode=True)
    with open(output_dir / "llama-stack-spec.html", "w") as fp:
        spec.write_html(fp, pretty_print=True)
 if __name__ == "__main__":
    fire.Fire(main)
--- a/rfcs/openapi_generator/run_openapi_generator.sh
+++ b/rfcs/openapi_generator/run_openapi_generator.sh
@ -0,0 +1,33 @@
 #!/bin/bash
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 PYTHONPATH=${PYTHONPATH:-}
 set -euo pipefail
 missing_packages=()
 check_package() {
    if ! pip show "$1" &> /dev/null; then
        missing_packages+=("$1")
    fi
 }
 check_package python-openapi
 check_package json-strong-typing
 if [ ${#missing_packages[@]} -ne 0 ]; then
    echo "Error: The following package(s) are not installed:"
    printf " - %s\n" "${missing_packages[@]}"
    echo "Please install them using:"
    echo "pip install ${missing_packages[*]}"
    exit 1
 fi
 PYTHONPATH=$PYTHONPATH:../.. python3 -m rfcs.openapi_generator.generate $*