From f6b2b2fb391f2784e91c495693af6152cddd7fe3 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 11 Jul 2024 10:04:56 -0700 Subject: [PATCH] cleanup --- README.md | 23 +- batch_inference.yaml | 167 ---- chat_completion.yaml | 140 --- fine_tuning.yaml | 266 ------ openapi/spec.yaml | 1018 --------------------- reward_model_scoring.yaml | 162 ---- simple_view/batch_inference.yml | 65 -- simple_view/chat_completion.yml | 72 -- simple_view/fine_tuning.yml | 134 --- simple_view/reward_model_scoring.yml | 70 -- simple_view/synthetic_data_generation.yml | 58 -- source/{run.sh => generate.sh} | 0 synthetic_data_generation.yaml | 131 --- 13 files changed, 20 insertions(+), 2286 deletions(-) delete mode 100644 batch_inference.yaml delete mode 100644 chat_completion.yaml delete mode 100644 fine_tuning.yaml delete mode 100644 openapi/spec.yaml delete mode 100644 reward_model_scoring.yaml delete mode 100644 simple_view/batch_inference.yml delete mode 100644 simple_view/chat_completion.yml delete mode 100644 simple_view/fine_tuning.yml delete mode 100644 simple_view/reward_model_scoring.yml delete mode 100644 simple_view/synthetic_data_generation.yml rename source/{run.sh => generate.sh} (100%) delete mode 100644 synthetic_data_generation.yaml diff --git a/README.md b/README.md index 9f067bb3e..b0b169564 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,26 @@ This repo contains the API specifications for various parts of the Llama Stack. The Stack consists of toolchain-apis and agentic-apis. The tool chain apis that are covered -- -- chat_completion -- batch inference -- fine tuning +- inference / batch inference +- post training - reward model scoring - synthetic data generation + +### Generate OpenAPI specs + +Set up virtual environment + +``` +python3.9 -m venv ~/.venv/toolchain/ +source ~/.venv/toolchain/bin/activate + +with-proxy pip3 install -r requirements.txt + +``` + +Run the generate.sh script + +``` +cd source && sh generate.sh +``` diff --git a/batch_inference.yaml b/batch_inference.yaml deleted file mode 100644 index 10637c52d..000000000 --- a/batch_inference.yaml +++ /dev/null @@ -1,167 +0,0 @@ -openapi: 3.0.0 -info: - title: Batch Inference API - version: 0.0.1 -paths: - /batch_inference/submit_job: - post: - summary: Submit a batch inference job - description: | - This endpoint allows clients to submit a batch inference job using a model and a prompt file. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - model: - type: string - description: "The model identifier to be used for inference." - prompt_file_path: - $ref: '#/components/schemas/Path' - description: "Path to a JSONL file where each line is a JSON-encoded list of messages." - options: - $ref: '#/components/schemas/Options' - num_generations: - type: integer - description: "Number of generations to produce." - responses: - '200': - description: Batch inference job successfully submitted - content: - application/json: - schema: - $ref: '#/components/schemas/BatchInference' - - /batch_inference/job_status: - get: - summary: Get status for an already submitted job - description: | - Retrieve the status and details of a previously submitted batch inference job using its unique job ID. - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: "Unique identifier for the batch inference job." - responses: - '200': - description: Batch inference job status retrieved successfully - content: - application/json: - schema: - $ref: '#/components/schemas/BatchInference' - -components: - schemas: - Message: - type: object - properties: - role: - type: string - text: - type: string - attachments: - type: array - items: - $ref: '#/components/schemas/MediaAttachment' - eot: - type: boolean - description: "End of transmission flag." - tool_call: - type: boolean - description: "Indicates if it's a tool call - builtin, custom, or ipython." - is_complete: - type: boolean - description: "For streaming, indicates if the message is complete." - is_header_complete: - type: boolean - description: "For streaming, indicates if the header of the message is complete." - metadata: - type: object - additionalProperties: true - description: "Additional metadata as JSON." - - MediaAttachment: - type: object - properties: - attachment_type: - $ref: '#/components/schemas/MediaAttachmentType' - data_type: - $ref: '#/components/schemas/MediaAttachmentDataType' - data: - type: string - - MediaAttachmentType: - type: string - enum: - - image - - video - - audio - - text - description: "Type of media attachment." - - MediaAttachmentDataType: - type: string - enum: - - raw_bytes - - filepath - - uri - description: "Data type of the media attachment." - - BatchInference: - type: object - properties: - job_id: - type: string - description: "ID provided by the API for the job." - created: - type: string - format: date-time - description: "Timestamp when the job was created." - status: - type: string - enum: - - validating - - running - - completed - - failed - description: "Current status of the job." - input_file_path: - $ref: '#/components/schemas/Path' - success_file_path: - $ref: '#/components/schemas/Path' - error_file_path: - $ref: '#/components/schemas/Path' - metadata: - type: object - additionalProperties: true - description: "Additional metadata related to the job." - - Options: - type: object - properties: - logprobs: - type: boolean - max_tokens: - type: integer - temperature: - type: number - top_p: - type: number - - Path: - type: object - properties: - value: - type: string - description: "The path value." - type: - type: string - enum: - - raw_bytes - - filepath - - uri - description: "Data Type of the path." diff --git a/chat_completion.yaml b/chat_completion.yaml deleted file mode 100644 index 9c82f1396..000000000 --- a/chat_completion.yaml +++ /dev/null @@ -1,140 +0,0 @@ -openapi: 3.0.0 -info: - title: Chat Completion API - version: 0.0.1 -paths: - /chat_completion/: - post: - summary: Submit a chat completion request - description: | - This endpoint allows clients to submit a chat completion request. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - messages: - type: array - items: - $ref: '#/components/schemas/Message' - model: - type: string - options: - $ref: '#/components/schemas/Options' - n_completions: - type: integer - responses: - '200': - description: Successful response - content: - application/json: - schema: - type: object - properties: - id: - type: string - candidates: - type: array - items: - $ref: '#/components/schemas/Completion' - model_called: - type: string - usage: - $ref: '#/components/schemas/TokenUsage' -components: - schemas: - Message: - type: object - properties: - role: - type: string - text: - type: string - attachments: - type: array - items: - $ref: '#/components/schemas/MediaAttachment' - eot: - type: boolean - description: "End of transmission flag." - tool_call: - type: boolean - description: "Indicates if it's a tool call - builtin, custom, or ipython." - is_complete: - type: boolean - description: "For streaming, indicates if the message is complete." - is_header_complete: - type: boolean - description: "For streaming, indicates if the header of the message is complete." - metadata: - type: object - additionalProperties: true - description: "Additional metadata as JSON." - MediaAttachment: - type: object - properties: - attachment_type: - $ref: '#/components/schemas/MediaAttachmentType' - data_type: - $ref: '#/components/schemas/MediaAttachmentDataType' - data: - type: string - MediaAttachmentType: - type: string - enum: - - image - - video - - audio - - text - description: "Type of media attachment." - MediaAttachmentDataType: - type: string - enum: - - raw_bytes - - filepath - - uri - description: "Data type of the media attachment." - Completion: - type: object - properties: - id: - type: string - message: - $ref: '#/components/schemas/Message' - tokens: - type: array - items: - type: integer - logprobs: - type: array - items: - type: number - finish_reason: - type: string - enum: - - stop - - safety - - max-length - description: "Reason for completion termination." - Options: - type: object - properties: - logprobs: - type: boolean - max_tokens: - type: integer - temperature: - type: number - top_p: - type: number - TokenUsage: - type: object - properties: - input_tokens: - type: integer - output_tokens: - type: integer - total_tokens: - type: integer diff --git a/fine_tuning.yaml b/fine_tuning.yaml deleted file mode 100644 index 9d2c3ccc5..000000000 --- a/fine_tuning.yaml +++ /dev/null @@ -1,266 +0,0 @@ -openapi: 3.0.0 -info: - title: Fine Tuning API - version: 0.0.1 - description: API for managing fine tuning jobs for machine learning models. - -paths: - /fine_tuning/jobs/submit: - post: - summary: Submit a fine tuning job - description: Submit a fine tuning job with the specified configuration. - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/Config' - responses: - 200: - description: Successfully submitted the fine tuning job. - content: - application/json: - schema: - $ref: '#/components/schemas/FineTuningJob' - - /fine_tuning/jobs/status: - get: - summary: Gets last N fine tuning jobs - description: Retrieve the status of the last N fine tuning jobs based on the provided job ID. - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: The ID of the job to retrieve status for. - responses: - 200: - description: Successfully retrieved the job status. - content: - application/json: - schema: - $ref: '#/components/schemas/FineTuningJob' - - /fine_tuning/jobs/cancel: - post: - summary: Cancel provided job - description: Cancel the fine tuning job with the specified job ID. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - job_id: - type: string - responses: - 200: - description: Successfully cancelled the fine tuning job. - content: - application/json: - schema: - $ref: '#/components/schemas/FineTuningJob' - - /fine_tuning/jobs/tail: - get: - summary: Tail logs of a particular job - description: Stream the logs of a particular job in real-time. This endpoint supports streaming responses. - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: The ID of the job to tail logs for. - responses: - 200: - description: Streaming logs in real-time. - content: - application/x-ndjson: - schema: - type: object - properties: - logs: - type: array - items: - $ref: '#/components/schemas/Log' - headers: - Content-Type: - schema: - type: string - default: 'application/x-ndjson' - Transfer-Encoding: - schema: - type: string - default: 'chunked' - -components: - schemas: - Message: - # keep in sync with /chat_completion - - TrainingDataItem: - type: object - properties: - dialog: - type: array - items: - $ref: '#/components/schemas/Message' - keep_loss: - type: array - items: - type: boolean - - WandBLogger: - type: object - properties: - project: - type: string - description: The project name in WandB where logs will be stored. - - DiskLogger: - type: object - properties: - filename: - type: string - description: The filename where logs will be stored on disk. - - FullFineTuneOptions: - type: object - properties: - enable_activation_checkpointing: - type: boolean - default: true - memory_efficient_fsdp_wrap: - type: boolean - default: true - fsdp_cpu_offload: - type: boolean - default: true - - LoraFineTuneOptions: - type: object - properties: - lora_attn_modules: - type: array - items: - type: string - apply_lora_to_mlp: - type: boolean - default: false - apply_lora_to_output: - type: boolean - default: false - lora_rank: - type: integer - lora_alpha: - type: integer - - FineTuningOptions: - type: object - properties: - n_epochs: - type: integer - batch_size: - type: integer - lr: - type: number - format: float - gradient_accumulation_steps: - type: integer - seed: - type: integer - shuffle: - type: boolean - custom_training_options: - oneOf: - - $ref: '#/components/schemas/FullFineTuneOptions' - - $ref: '#/components/schemas/LoraFineTuneOptions' - discriminator: - propertyName: finetuning_type - extras: - # json to put other config overrides that are required by torchtune - type: object - additionalProperties: true - - Config: - type: object - properties: - model: - type: string - description: The model identifier that you want to fine tune. - data: - type: string - format: uri - description: Path to the JSONL file with each row representing a TrainingDataItem. - validation_data: - type: string - format: uri - description: Path to the JSONL file used for validation metrics. - fine_tuning_options: - $ref: '#/components/schemas/FineTuningOptions' - logger: - oneOf: - - $ref: '#/components/schemas/DiskLogger' - - $ref: '#/components/schemas/WandBLogger' - discriminator: - propertyName: log_type - overrides: - # eg. --nproc_per_node 4 instead of default that we need to pass through to torchrun - # when running locally - type: string - description: Custom override options for the fine tuning process. - metadata: - type: object - additionalProperties: true - - FineTuningJob: - type: object - properties: - job_id: - type: string - description: Unique identifier for the fine tuning job. - created: - type: string - format: date-time - description: The creation date and time of the job. - finished_at: - type: string - format: date-time - description: The completion date and time of the job. - status: - type: string - enum: [validation, queued, running, failed, success, cancelled] - description: The current status of the job. - error_path: - type: string - format: uri - description: Path to the error log file. - checkpoints: - type: array - items: - type: string - format: uri - description: List of paths to checkpoint files for various epochs. - logs: - type: string - format: uri - description: Path to the logs, either local or a WandB URI. - input_config: - $ref: '#/components/schemas/Config' - metadata: - type: object - additionalProperties: true - - Log: - type: object - properties: - message: - type: string - description: The log message. - timestamp: - type: string - format: date-time - description: The timestamp of the log message. diff --git a/openapi/spec.yaml b/openapi/spec.yaml deleted file mode 100644 index 63474ccaa..000000000 --- a/openapi/spec.yaml +++ /dev/null @@ -1,1018 +0,0 @@ -openapi: 3.0.0 -info: - title: Llama Stack API - version: 0.0.1 -paths: -# quantization API - /quantize: -# inference APIs - /inference: - post: - summary: Submit an inference request - description: | - This endpoint allows clients to submit a chat completion request. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - messages: - type: array - items: - $ref: '#/components/schemas/Message' - model: - type: string - options: - $ref: '#/components/schemas/Options' - n_completions: - type: integer - responses: - '200': - description: Successful response - content: - application/json: - schema: - type: object - properties: - id: - type: string - candidates: - type: array - items: - $ref: '#/components/schemas/Completion' - model_called: - type: string - usage: - $ref: '#/components/schemas/TokenUsage' - /batch_inference/jobs/submit: - post: - summary: Submit a batch inference job - description: | - This endpoint allows clients to submit a batch inference job using a model and a prompt file. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - model: - type: string - description: "The model identifier to be used for inference." - prompt_file_path: - $ref: '#/components/schemas/Path' - description: "Path to a JSONL file where each line is a JSON-encoded list of messages." - options: - $ref: '#/components/schemas/Options' - num_generations: - type: integer - description: "Number of generations to produce." - responses: - '200': - description: Batch inference job successfully submitted - content: - application/json: - schema: - $ref: '#/components/schemas/BatchInferenceJob' - /batch_inference/jobs/status: - get: - summary: Get status for an already submitted job - description: | - Retrieve the status and details of a previously submitted batch inference job using its unique job ID. - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: "Unique identifier for the batch inference job." - responses: - '200': - description: Batch inference job status retrieved successfully - content: - application/json: - schema: - $ref: '#/components/schemas/BatchInferenceJob' - /batch_inference/jobs/cancel: - post: - summary: Cancel provided job - description: Cancel the batch inference job with the specified job ID. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - job_id: - type: string - responses: - 200: - description: Successfully cancelled the fine tuning job. - content: - application/json: - schema: - $ref: '#/components/schemas/BatchInferenceJob' - -# finetuning APIs - /fine_tuning/jobs/submit: - post: - summary: Submit a fine tuning job - description: Submit a fine tuning job with the specified configuration. - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/Config' - responses: - 200: - description: Successfully submitted the fine tuning job. - content: - application/json: - schema: - $ref: '#/components/schemas/FineTuningJob' - /fine_tuning/jobs/status: - get: - summary: Gets last N fine tuning jobs - description: Retrieve the status of the last N fine tuning jobs based on the provided job ID. - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: The ID of the job to retrieve status for. - responses: - 200: - description: Successfully retrieved the job status. - content: - application/json: - schema: - $ref: '#/components/schemas/FineTuningJob' - /fine_tuning/jobs/cancel: - post: - summary: Cancel provided job - description: Cancel the fine tuning job with the specified job ID. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - job_id: - type: string - responses: - 200: - description: Successfully cancelled the fine tuning job. - content: - application/json: - schema: - $ref: '#/components/schemas/FineTuningJob' - /fine_tuning/jobs/tail: - get: - summary: Tail logs of a particular job - description: Stream the logs of a particular job in real-time. This endpoint supports streaming responses. - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: The ID of the job to tail logs for. - responses: - 200: - description: Streaming logs in real-time. - content: - application/x-ndjson: - schema: - type: object - properties: - logs: - type: array - items: - $ref: '#/components/schemas/Log' - headers: - Content-Type: - schema: - type: string - default: 'application/x-ndjson' - Transfer-Encoding: - schema: - type: string - default: 'chunked' -# reward scoring APIs - /reward_scoring: - post: - summary: Score a prompt-response pair using a reward model - description: | - This endpoint scores a given prompt-response pair using a specified reward model and scoring function. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - prompt_with_response: - type: array - items: - $ref: '#/components/schemas/Message' - description: "Prompt and response joined as a list of messages." - reward: - type: string - description: "Identifier for the reward model to be used." - scoring_function: - $ref: '#/components/schemas/ScoringFunction' - options: - $ref: '#/components/schemas/Options' - responses: - '200': - description: Scoring completed successfully - content: - application/json: - schema: - type: object - properties: - id: - type: string - logprob: - type: number - format: float - score: - type: number - format: float - /batch_reward_scoring/jobs/submit: - post: - summary: Batch scoring using reward models - description: | - Submit a batch job for scoring multiple prompt-response pairs using a reward model. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - reward_model: - type: string - description: "Identifier for the reward model to be used." - prompt_with_response_path: - $ref: '#/components/schemas/Path' - description: "Path to a JSONL file where each line is a List[Message] and custom_id." - scoring_function: - $ref: '#/components/schemas/ScoringFunction' - metadata: - type: object - additionalProperties: true - description: "Metadata to carry forward in the response." - responses: - '200': - description: Batch scoring job successfully submitted - content: - application/json: - schema: - $ref: '#/components/schemas/BatchRewardScoringJob' - /batch_reward_scoring/jobs/status: - get: - summary: Get status for an already submitted job - description: | - Retrieve the status and details of a previously submitted batch scoring job using its unique job ID. - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: "Unique identifier for the batch scoring job." - responses: - '200': - description: Batch scoring job status retrieved successfully - content: - application/json: - schema: - $ref: '#/components/schemas/BatchRewardScoringJob' - /batch_reward_scoring/jobs/cancel: - post: - summary: Cancel provided job - description: Cancel the batch reward scoring job with the specified job ID. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - job_id: - type: string - responses: - 200: - description: Successfully cancelled the batch reward scoring job. - content: - application/json: - schema: - $ref: '#/components/schemas/BatchRewardScoringJob' -# synthetic data generation APIs - /synthetic_data_gen/jobs/submit: - post: - summary: Submit a job to generate synthetic data - description: Submit a job to generate synthetic data using llm + reward model scoring + filtering - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - model: - type: string - description: Model used for batch inference - prompt_file_path: - type: string - format: path - description: Path to the JSONL file containing message_lists and custom IDs - options: - $ref: '#/components/schemas/Options' - num_generations: - type: integer - description: Number of generations to produce - reward_model: - type: string - description: Model used for scoring - scoring_function: - $ref: '#/components/schemas/ScoringFunction' - filtering_function: - $ref: '#/components/schemas/FilteringFunction' - metadata: - type: object - additionalProperties: true - description: Additional metadata for the job - responses: - '200': - description: Job successfully submitted - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenJob' - /synthetic_data_gen/jobs/status: - get: - summary: Get job status - description: Get status for an already submitted job - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: Unique identifier for the job - responses: - '200': - description: Job status retrieved successfully - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenJob' - /synthetic_data_gen/jobs/cancel: - post: - summary: Cancel provided job - description: Cancel the synthetic data gen job with the specified job ID. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - job_id: - type: string - responses: - 200: - description: Successfully cancelled the synthetic data gen job. - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenJob' -# RAG APIs - /vector_store: - /vector_store/create: - /vector_store/drop: - /vector_store/insert/jobs/submit: - /vector_store/insert/jobs/status: - /vector_store/insert/jobs/cancel: - /vector_store/delete/jobs/submit: - /vector_store/delete/jobs/status: - /vector_store/delete/jobs/cancel: - /vector_store/insert: # a index single document - /vector_store/retrieve: # a single retrieval query -# Agentic APIs - /agents/execute: - post: - summary: Execute an agent with the provided turn history and user step - description: > - This endpoint allows for the execution of a specified agent, taking into account the historical steps and a new user step to generate a response turn. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - agent: - $ref: '#/components/schemas/Agent' - turnHistory: - type: array - items: - $ref: '#/components/schemas/Step' - userStep: - $ref: '#/components/schemas/Step' - responses: - '200': - description: Successfully executed the agent and returned the resulting turn. - content: - application/json: - schema: - $ref: '#/components/schemas/Turn' - get: - summary: Fetch memories from multiple memory banks - description: Embeds input, queries for nuggets across specified memory banks. - parameters: - - in: query - name: bankIds - description: A list of memory bank IDs to fetch memories from. - required: true - schema: - type: array - items: - type: string - responses: - '200': - description: Successfully fetched memories from the specified memory banks. - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/MemoryNugget' - -components: - schemas: - Attachment: - type: object - description: Represents a file (text or media). - properites: - uri: - type: string - description: The path at which the attachment resides. - mime-type: - type: string - description: The attachment's MIME type. - ModelInputMessage: - type: object - properties: - role: - type: string - enum: [user, tool] - text: - type: string - attachments: - type: array - items: - $ref: '#/components/schemas/Attachment' - metadata: - type: object - additionalProperties: true - description: "Additional metadata as JSON." - ModelOutputMessage: - type: object - properties: - id: - type: string - text: - type: string - tokens: - type: array - items: - type: integer - tool_call: - type: object - $ref: '#/components/schemas/ToolCall' - # optional - eot: - type: boolean - description: "End of turn flag." - is_complete: - type: boolean - description: "For streaming, indicates if the message is complete." - is_header_complete: - type: boolean - description: "For streaming, indicates if the header of the message is complete." - log_probs: - type: array - items: - type: number - finish_reason: - type: string - enum: - - stop - - safety - - max-length - description: "Reason for completion termination." - Options: - type: object - properties: - logprobs: - type: boolean - max_tokens: - type: integer - temperature: - type: number - top_p: - type: number - TokenUsage: - type: object - properties: - input_tokens: - type: integer - output_tokens: - type: integer - total_tokens: - type: integer - Path: - type: object - properties: - value: - type: string - description: "The path value." - type: - type: string - enum: - - raw_bytes - - filepath - - uri - description: "Data Type of the path." - BatchInferenceJob: - type: object - properties: - job_id: - type: string - description: "ID provided by the API for the job." - created: - type: string - format: date-time - description: "Timestamp when the job was created." - status: - type: string - enum: - - validating - - running - - completed - - failed - description: "Current status of the job." - input_file_path: - $ref: '#/components/schemas/Path' - success_file_path: - $ref: '#/components/schemas/Path' - error_file_path: - $ref: '#/components/schemas/Path' - metadata: - type: object - additionalProperties: true - description: "Additional metadata related to the job." - TrainingDataItem: - type: object - properties: - dialog: - type: array - items: - $ref: '#/components/schemas/Message' - keep_loss: - type: array - items: - type: boolean - WandBLogger: - type: object - properties: - project: - type: string - description: The project name in WandB where logs will be stored. - DiskLogger: - type: object - properties: - filename: - type: string - description: The filename where logs will be stored on disk. - FullFineTuningOptions: - type: object - properties: - enable_activation_checkpointing: - type: boolean - default: true - memory_efficient_fsdp_wrap: - type: boolean - default: true - fsdp_cpu_offload: - type: boolean - default: true - LoraFineTuningOptions: - type: object - properties: - lora_attn_modules: - type: array - items: - type: string - apply_lora_to_mlp: - type: boolean - default: false - apply_lora_to_output: - type: boolean - default: false - lora_rank: - type: integer - lora_alpha: - type: integer - FineTuningOptions: - type: object - properties: - n_epochs: - type: integer - batch_size: - type: integer - lr: - type: number - format: float - gradient_accumulation_steps: - type: integer - seed: - type: integer - shuffle: - type: boolean - custom_training_options: - oneOf: - - $ref: '#/components/schemas/FullFineTuningOptions' - - $ref: '#/components/schemas/LoraFineTuningOptions' - discriminator: - propertyName: finetuning_type - extras: - # json to put other config overrides that are required by torchtune - type: object - additionalProperties: true - Config: - type: object - properties: - model: - type: string - description: The model identifier that you want to fine tune. - data: - type: string - format: uri - description: Path to the JSONL file with each row representing a TrainingDataItem. - validation_data: - type: string - format: uri - description: Path to the JSONL file used for validation metrics. - fine_tuning_options: - $ref: '#/components/schemas/FineTuningOptions' - logger: - oneOf: - - $ref: '#/components/schemas/DiskLogger' - - $ref: '#/components/schemas/WandBLogger' - discriminator: - propertyName: log_type - overrides: - # eg. --nproc_per_node 4 instead of default that we need to pass through to torchrun - # when running locally - type: string - description: Custom override options for the fine tuning process. - metadata: - type: object - additionalProperties: true - FineTuningJob: - type: object - properties: - job_id: - type: string - description: Unique identifier for the fine tuning job. - created: - type: string - format: date-time - description: The creation date and time of the job. - finished_at: - type: string - format: date-time - description: The completion date and time of the job. - status: - type: string - enum: [validation, queued, running, failed, success, cancelled] - description: The current status of the job. - error_path: - type: string - format: uri - description: Path to the error log file. - checkpoints: - type: array - items: - type: string - format: uri - description: List of paths to checkpoint files for various epochs. - logs: - type: string - format: uri - description: Path to the logs, either local or a WandB URI. - input_config: - $ref: '#/components/schemas/Config' - metadata: - type: object - additionalProperties: true - Log: - type: object - properties: - message: - type: string - description: The log message. - timestamp: - type: string - format: date-time - description: The timestamp of the log message. - ScoringFunction: - type: object - properties: - name: - type: string - params: - type: object - additionalProperties: true - BatchRewardScoringJob: - type: object - properties: - job_id: - type: string - created: - type: string - format: date-time - status: - type: string - enum: - - validating - - running - - completed - - failed - input_file_path: - $ref: '#/components/schemas/Path' - success_file_path: - $ref: '#/components/schemas/Path' - error_file_path: - $ref: '#/components/schemas/Path' - metadata: - type: object - additionalProperties: true - description: "Metadata carried forward from the job submission." - FilteringFunction: - type: object - properties: - name: - type: string - description: Name of the filtering function - params: - type: object - additionalProperties: true - description: JSON object containing parameters for the filtering function - SyntheticDataPoint: - type: object - properties: - custom_id: - type: string - description: Custom identifier for the data point - index: - type: integer - description: Index of the data point - prompt: - type: array - items: - $ref: '#/components/schemas/Message' - description: List of messages used as prompt - response: - $ref: '#/components/schemas/Message' - logprob: - type: number - format: float - description: Log probability of the response - score: - type: number - format: float - description: Score of the response based on the reward model - SyntheticDataGenJob: - type: object - properties: - job_id: - type: string - description: ID provided by the API - created: - type: string - format: date-time - description: Timestamp when the job was created - status: - type: string - enum: [validating, running, completed, failed] - description: Current status of the job - input_file_path: - type: string - format: path - description: Path to the input JSONL file - success_file_path: - type: string - format: path - description: Path to the JSONL file containing successful results - error_file_path: - type: string - format: path - description: Path to the JSONL file containing errors - metadata: - type: object - additionalProperties: true - description: Additional metadata about the job - Agent: - type: object - description: Represents an AI agent with specific tools and a model configuration. - properties: - name: - type: string - description: The name of the agent. - description: - type: string - description: A brief description of the agent's purpose and capabilities. - tools: - type: array - description: A collection of tools that the agent can utilize. - items: - $ref: '#/components/schemas/Tool' - model: - type: string - enum: [llama31_405, llama3_70, llama3_8] - description: The model identifier that the agent uses for processing. - ToolDefinition: - type: object - description: A tool that can be used by an agent to perform specific tasks. - properties: - name: - type: string - description: The name of the tool. - tool_type: - type: string - enum: [builtin, zeroshot] - description: - type: string - description: A brief description of what the tool does and how it should be used. - parameters: - type: array - description: The parameters that the tool requires to function properly. - items: - $ref: '#/components/schemas/ToolParameter' - returnValue: - $ref: '#/components/schemas/ToolReturnValue' - ToolCall: - type: object - description: A tool that can be used by an agent to perform specific tasks. - properties: - name: - type: string - description: The name of the tool. - arguments: - type: array - description: The parameters that the tool requires to function properly. - items: - type: object - returnValue: - type: object - ToolParameter: - type: object - description: Defines a parameter that a tool requires to operate. - properties: - type: - type: string - enum: [string, int, float, list, bool] - description: The data type of the parameter. - itemType: - type: string - description: The type of items in the parameter if it is a list. - description: - type: string - description: Details about what the parameter is used for and any constraints. - ToolReturnValue: - type: object - description: Describes the return value of a tool after execution. - properties: - type: - type: object - description: - type: string - description: Documentation of the return value - Step: - type: object - description: Represents a step in the interaction with an agent, such as a user query or an agent response. Each step captures a discrete part of the conversation, including user inputs, agent responses, or interactions with tools. - properties: - id: - type: string - description: A unique identifier for the step, facilitating tracking and referencing within the interaction flow. - role: - type: string - enum: [assistant, user] - description: The role of the actor in this step, indicating whether the step originated from the user or the assistant. - stepType: - type: string - enum: [user_response, assistant_response, tool_request, tool_response] - description: The type of step, categorizing the nature of the interaction such as a user response, an assistant response, a request to a tool, or a response from a tool. - messages: - type: array - description: All messages corresponding to the step - items: - $ref: '#/components/schemas/Message' - timestamp: - type: string - format: date-time - description: The timestamp when the step occurred, providing a temporal context to the interaction. - metadata: - type: object - additionalProperties: true - description: A flexible structure to store additional metadata about the step, such as contextual information, execution details, or any other relevant data that supports the interaction process. - Turn: - type: object - description: Represents a complete turn in the interaction between the user and the agent. A turn consists of one or more steps that capture the sequence of interactions, including user inputs, agent responses, and any tool interactions that occur within a single conversational exchange. - properties: - id: - type: string - description: A unique identifier for the turn, which helps in tracking and referencing specific turns within a session. - steps: - type: array - description: An ordered list of steps that occurred during this turn. Each step can be a user query, an agent response, or a tool interaction. - items: - $ref: '#/components/schemas/Step' - startTime: - type: string - format: date-time - description: The timestamp marking the start of the turn. This helps in analyzing the timing and duration of interactions. - endTime: - type: string - format: date-time - description: The timestamp marking the end of the turn. This is useful for performance metrics and understanding user-agent interaction patterns. - status: - type: string - enum: [completed, failed] - description: The status of the turn, indicating whether the turn was completed successfully or failed due to an error. - metadata: - type: object - additionalProperties: true - description: A flexible structure to store additional metadata about the turn, such as such as contextual information, execution details, or any other relevant data that needs to be persisted or passed along with the turn. - VectorStore: - type: object - description: Store for RAG - properties: - id: - type: string - description: The unique identifier of the memory bank. - name: - type: string - description: The name of the memory bank. - description: - type: string - description: A brief description of the memory bank's purpose and contents. - VectorStoreInsertJob: - type: object - description: Represents a job that is uploading a file into a vector store - properties: - id: - type: string - description: The unique identifier of the insert job - content: - type: string - description: The embedded content of the memory nugget. - VectorStoreDeleteJob: - type: object - description: Represents a job that is removing contents of a file from a vector store - properties: - id: - type: string - description: The unique identifier of the delete job - - # deployment configs - AgenticSystemDeploymentConfig: - type: object - description: Holds global deployment configuration needed to make different API calls across the stack. - properties: - braveSearchKey: - type: string - description: The API key to use for agent-invoked Brave search. - wolframAlphaKey: - type: string - description: The API key to use for agent-invoked Wolfram search. - ToolChainDeploymentConfig: - type: object - description: Holds deployment configuration for different parts of the toolchain - properties: - inferenceProvider: - type: string - description: The URI for the inference provider - batchInferenceProvider: - metricLoggerProvider: WandB diff --git a/reward_model_scoring.yaml b/reward_model_scoring.yaml deleted file mode 100644 index 29229afdc..000000000 --- a/reward_model_scoring.yaml +++ /dev/null @@ -1,162 +0,0 @@ -openapi: 3.0.0 -info: - title: Reward Model Service API - version: 0.0.1 -paths: - /reward_model_scoring/: - post: - summary: Score a prompt-response pair using a reward model - description: | - This endpoint scores a given prompt-response pair using a specified reward model and scoring function. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - prompt_with_response: - type: array - items: - $ref: '#/components/schemas/Message' - description: "Prompt and response joined as a list of messages." - reward_model: - type: string - description: "Identifier for the reward model to be used." - scoring_function: - $ref: '#/components/schemas/ScoringFunction' - options: - $ref: '#/components/schemas/Options' - responses: - '200': - description: Scoring completed successfully - content: - application/json: - schema: - type: object - properties: - id: - type: string - logprob: - type: number - format: float - score: - type: number - format: float - - /reward_model_scoring/submit_job/: - post: - summary: Batch scoring using reward models - description: | - Submit a batch job for scoring multiple prompt-response pairs using a reward model. - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - reward_model: - type: string - description: "Identifier for the reward model to be used." - prompt_with_response_path: - $ref: '#/components/schemas/Path' - description: "Path to a JSONL file where each line is a List[Message] and custom_id." - scoring_function: - $ref: '#/components/schemas/ScoringFunction' - metadata: - type: object - additionalProperties: true - description: "Metadata to carry forward in the response." - responses: - '200': - description: Batch scoring job successfully submitted - content: - application/json: - schema: - $ref: '#/components/schemas/BatchRewardModelScoringJob' - - /reward_model_scoring/submit_job/job_status: - get: - summary: Get status for an already submitted job - description: | - Retrieve the status and details of a previously submitted batch scoring job using its unique job ID. - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: "Unique identifier for the batch scoring job." - responses: - '200': - description: Batch scoring job status retrieved successfully - content: - application/json: - schema: - $ref: '#/components/schemas/BatchRewardModelScoringJob' - -components: - schemas: - Message: - # reuse from /chat_completion - - Options: - type: object - properties: - logprobs: - type: boolean - max_tokens: - type: integer - temperature: - type: number - top_p: - type: number - # TODO: Add/Remove more reward model specific params - - ScoringFunction: - type: object - properties: - name: - type: string - params: - type: object - additionalProperties: true - - Path: - type: object - properties: - value: - type: string - type: - type: string - enum: - - raw_bytes - - filepath - - uri - - BatchRewardModelScoringJob: - type: object - properties: - job_id: - type: string - created: - type: string - format: date-time - status: - type: string - enum: - - validating - - running - - completed - - failed - input_file_path: - $ref: '#/components/schemas/Path' - success_file_path: - $ref: '#/components/schemas/Path' - error_file_path: - $ref: '#/components/schemas/Path' - metadata: - type: object - additionalProperties: true - description: "Metadata carried forward from the job submission." diff --git a/simple_view/batch_inference.yml b/simple_view/batch_inference.yml deleted file mode 100644 index 2f5285270..000000000 --- a/simple_view/batch_inference.yml +++ /dev/null @@ -1,65 +0,0 @@ -== Schema == -Message: - role: str - text: str - attachements: List[MediaAttachment] - eot: bool - tool_call: bool # if it's a tool call - builtin or custom or ipython - # for streaming - is_complete: bool - is_header_complete: bool - metadata: json - -MediaAttachment: - attachement_type: MediaAttachmentType - data_type: MediaAttachmentDataType - data: str - -MediaAttachmentType: # enum [image, video, audio, text(or file)] -MediaAttachmentDataType: # enum [raw_bytes, filepath, uri] - -BatchInference: - job_id: str # id provided by the api - created: string # format - date-time - status: string # enum (validating, running, completed, failed) - input_file_path: Path # jsonl style file where each - success_file_path: Path - error_file_path: Path - metadata: json - -Options: - logprobs: bool - max_tokens: int - temperature: float - top_p: float - -Path: - value: string - type: string # enum [raw_bytes, filepath, uri] - -== Callsites == - -callsite: - /batch_inference/submit_job -request_type: - post -description: - Submit a batch inference job -request: - model: str - prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message] - options: Options - num_generations: int -response: - batch_inference_job: BatchInference - -callsite: - /batch_inference/job_status -request_type: - get -description: - Get status for an already submitted job -request: - job_id: str # unique identifier for the job -response: - batch_inference_job: BatchInference diff --git a/simple_view/chat_completion.yml b/simple_view/chat_completion.yml deleted file mode 100644 index fe9c0357c..000000000 --- a/simple_view/chat_completion.yml +++ /dev/null @@ -1,72 +0,0 @@ -# Simple bullet form for ease of read and iteration -# Use LLMs to translate this to a OpenAPI spec. - -== Schema == - -Message: - role: str - text: str - attachements: List[MediaAttachment] - eot: bool - tool_call: bool # if it's a tool call - builtin or custom or ipython - # for streaming - is_complete: bool - is_header_complete: bool - metadata: json - -MediaAttachment: - attachement_type: MediaAttachmentType - data_type: MediaAttachmentDataType - data: str - -MediaAttachmentType: # enum [image, video, audio, text(or file)] -MediaAttachmentDataType: # enum [raw_bytes, filepath, uri] - -Completion: - id: str - message: Message - tokens: List[int] - logprobs: List[floats] - finish_reason: str # Enum (stop, safety, max-length, etc) - -Options: - logprobs: bool - max_tokens: int - temperature: float - top_p: float - #TODO: Get more options from metagen - -TokenUsage: - input_tokens: int - output_tokens: int - total_tokens: int - -== Callsite == - -callsite: - chat_completion/ -request_type: - post -description: - submit a chat completion request -request: - messages: List[Message] - model: str - options: Options - n_complections: int - # TODO: how to handle tooling control if any ? - # Add `tools` and `tool_choice` -- - # for eg. "auto": use model's guess - # how to force to use particular tool - # how to disbale inbuilt tools - # tools: List[Tool] - # tool_choice: Any -response: - id: str - candidates: List[Completion] # a list to account for when n_completions > 1 - model_called: str # info on that model that produced this result - usage: TokenUsage - -# TODO -# callsite: -# chat_completion_stream/ diff --git a/simple_view/fine_tuning.yml b/simple_view/fine_tuning.yml deleted file mode 100644 index 8680b3fab..000000000 --- a/simple_view/fine_tuning.yml +++ /dev/null @@ -1,134 +0,0 @@ -# Fine Tuning APIs -== Schema == - -TrainingDataItem: - dialog: List[Message] - keep_loss: List[bool] - - -WandBLogger: - project: str - -DiskLogger: - # log_dir will be pre-configured in environment - filename: str - -FullFineTuneOptions: - enable_activation_checkpointing: True - memory_efficient_fsdp_wrap: True - fsdp_cpu_offload: True - -LoraFineTuneOptions: - lora_attn_modules: ['q_proj', 'v_proj'] - apply_lora_to_mlp: False - apply_lora_to_output: False - lora_rank: 8 - lora_alpha: 16 - -FineTuningOptions: - n_epochs: int - batch_size: int - lr: float - gradient_accumulation_steps: int - seed: int - shuffle: bool - - # Unions in OpenAPI with a reference field that can help disambiguate - custom_training_options: - discriminator: - propertyName: fine_tuning_type - mapping: - fft: FullFineTuneOptions - lora: LoraFineTuneOptions - - # other options that can be passed in - extras: json - -Config: - model: str # model that you want to fine tune - data: Path # jsonl with each row representing a TrainingDataItem - validation_data: Path # same as data but to get validation metrics on - - # fine tuning args - fine_tuning_options: FineTuningOptions - - # metric logging - logger: - discriminator: - propertyName: log_type - mapping: - disk: DiskLogger - wandb: WandBLogger - - # Override options - # eg. --nproc_per_node 4 insted of defaults, - # this might be impl specific and can allow for various customizations - overrides: str - metadata: json # to carry over to job details - -FineTuningJob: - job_id: str - created: str # format date-time - finished_at: str # format date-time - status: str # enum - validation, queued, running, failed, success, cancelled - error_path: Path # error logging - checkpoints: List[Path] # checkpoints for various epochs - logs: Path # local path / wandb uri - input_config: Config # config used to submit this job - metadata: json # carried over rom user provided input - -Log: - message: string # The log message. - timestamp: string # format: date-time - -== Callsites == - -callsite: - /fine_tuning/jobs/submit -request_type: - post -description: - Submit a fine tuning job -request: - config: Config -response: - fine_tuning_job: FineTuningJob - - -callsite: - /fine_tuning/jobs/status -request_type: - get -description: - Gets last N fine tuning jobs -request: - job_id: str -response: - fine_tuning_job: FineTuningJob - - -callsite: - /fine_tuning/jobs/cancel -request_type: - post -description: - Cancel provided job -request: - job_id: str -response: - fine_tuning_job: FineTuningJob - - -callsite: - /fine_tuning/jobs/tail -request_type: - get -description: - Tail logs of a particular job -request: - job_id: str -response: - logs: List[Log] - streaming: - enabled: True - chunkSize: 1024 diff --git a/simple_view/reward_model_scoring.yml b/simple_view/reward_model_scoring.yml deleted file mode 100644 index 676b1d835..000000000 --- a/simple_view/reward_model_scoring.yml +++ /dev/null @@ -1,70 +0,0 @@ -# Reward Model Service -== Schema == -Message: - # Same as /chat_completion - -Options: - logprobs: bool - max_tokens: int - temperature: float - top_p: float - #TODO: Figure out what other reward model specific params - -ScoringFunction: - name: str - params: json - -BatchRewardModelScoringJob: - job_id: str - created: str # format date-time - status: string # enum (validating, running, completed, failed) - input_file_path: Path - success_file_path: Path # jsonl where each row has {custom_id: , logprob: , score: float} - error_file_path: Path # jsonl where each row has {custom_id: , error: } - metadata: json # carry forward from job submission api - - -== Callsites == - -callsite: - reward_model_scoring/ -request_type: - post -description: - Score a prompt-response pair using a reward model -request: - prompt_with_response: List[Message] # prompt and response joined as a List[Message] - reward_model: str - scoring_function: ScoringFunction - options: Options -response: - id: str - logprob: float - score: float - - -callsite: - reward_model_scoring/submit_job/ -request_type: - post -description: - Batch scoring using reward models -request: - reward_model: str - prompt_with_response_path: Path # jsonl file where each line is a List[Message] and custom_id - scoring_function: ScoringFunction - metadata: json # anything to carry forward over in the response -response: - batch_reward_model_scoring_job: BatchRewardModelScoringJob - - -callsite: - /reward_model_scoring/submit_job/job_status -request_type: - get -description: - Get status for an already submitted job -request: - job_id: str # unique identifier for the job -response: - batch_reward_model_scoring_job: BatchRewardModelScoringJob diff --git a/simple_view/synthetic_data_generation.yml b/simple_view/synthetic_data_generation.yml deleted file mode 100644 index 27c942c24..000000000 --- a/simple_view/synthetic_data_generation.yml +++ /dev/null @@ -1,58 +0,0 @@ -# Synthetic Data Generation API -== Schema == - -FilteringFunction: - name: str - params: json - -SyntheticDataPoint: - custom_id: str - index: int - prompt: List[Message] - response: Message - logprob: float - score: float - -SyntheticDataGenerationJob: - job_id: str # id provided by the api - created: string # format - date-time - status: string # enum (validating, running, completed, failed) - input_file_path: Path # jsonl style file where each row contains custom_id and message_list - success_file_path: Path # jsonl each line is SyntheticDataPoint - error_file_path: Path # custom_ids where we failed with some info - metadata: json - -== Callsites == - -callsite: - /synthetic_data_gen/submit_job -request_type: - post -description: - Submit a job to generate synthetic data using llm + reward model scoring + filtering -request: - # batch inference params - model: str - prompt_file_path: Path # jsonl style file where each line is a json encoded List[Message] + custom_id - options: Options - num_generations: int - # reward model scoring params - reward_model: str - scoring_function: ScoringFunction - # filtering params - filtering_function: FilteringFunction - metadata: json - -response: - synth_data_gen_job: SyntheticDataGenerationJob - -callsite: - /synthetic_data_gen/job_status -request_type: - get -description: - Get status for an already submitted job -request: - job_id: str # unique identifier for the job -response: - synth_data_gen_job: SyntheticDataGenerationJob diff --git a/source/run.sh b/source/generate.sh similarity index 100% rename from source/run.sh rename to source/generate.sh diff --git a/synthetic_data_generation.yaml b/synthetic_data_generation.yaml deleted file mode 100644 index f11786f47..000000000 --- a/synthetic_data_generation.yaml +++ /dev/null @@ -1,131 +0,0 @@ -openapi: 3.0.0 -info: - title: Synthetic Data Generation API - version: 0.0.1 -paths: - /synthetic_data_gen/submit_job: - post: - summary: Submit a job to generate synthetic data - description: Submit a job to generate synthetic data using llm + reward model scoring + filtering - requestBody: - required: true - content: - application/json: - schema: - type: object - properties: - model: - type: string - description: Model used for batch inference - prompt_file_path: - type: string - format: path - description: Path to the JSONL file containing message_lists and custom IDs - options: - $ref: '#/components/schemas/Options' - num_generations: - type: integer - description: Number of generations to produce - reward_model: - type: string - description: Model used for scoring - scoring_function: - $ref: '#/components/schemas/ScoringFunction' - filtering_function: - $ref: '#/components/schemas/FilteringFunction' - metadata: - type: object - additionalProperties: true - description: Additional metadata for the job - responses: - '200': - description: Job successfully submitted - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerationJob' - - /synthetic_data_gen/job_status: - get: - summary: Get job status - description: Get status for an already submitted job - parameters: - - in: query - name: job_id - schema: - type: string - required: true - description: Unique identifier for the job - responses: - '200': - description: Job status retrieved successfully - content: - application/json: - schema: - $ref: '#/components/schemas/SyntheticDataGenerationJob' -components: - schemas: - FilteringFunction: - type: object - properties: - name: - type: string - description: Name of the filtering function - params: - type: object - additionalProperties: true - description: JSON object containing parameters for the filtering function - SyntheticDataPoint: - type: object - properties: - custom_id: - type: string - description: Custom identifier for the data point - index: - type: integer - description: Index of the data point - prompt: - type: array - items: - $ref: '#/components/schemas/Message' - description: List of messages used as prompt - response: - $ref: '#/components/schemas/Message' - logprob: - type: number - format: float - description: Log probability of the response - score: - type: number - format: float - description: Score of the response based on the reward model - SyntheticDataGenerationJob: - type: object - properties: - job_id: - type: string - description: ID provided by the API - created: - type: string - format: date-time - description: Timestamp when the job was created - status: - type: string - enum: [validating, running, completed, failed] - description: Current status of the job - input_file_path: - type: string - format: path - description: Path to the input JSONL file - success_file_path: - type: string - format: path - description: Path to the JSONL file containing successful results - error_file_path: - type: string - format: path - description: Path to the JSONL file containing errors - metadata: - type: object - additionalProperties: true - description: Additional metadata about the job