mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
RFC-0001-The-Llama-Stack (#8)
* RFC-0001-The-Llama-Stack * Add OpenAPI generation utility, update SPEC to reflect latest types * First cut at an observability API * llama3_1 -> llama3 --------- Co-authored-by: Ashwin Bharambe <ashwin.bharambe@gmail.com>
This commit is contained in:
parent
57881c08c1
commit
2232bfa8b5
19 changed files with 9177 additions and 10 deletions
|
@ -60,19 +60,19 @@ class EvaluationJobArtifactsResponse(BaseModel):
|
||||||
|
|
||||||
class Evaluations(Protocol):
|
class Evaluations(Protocol):
|
||||||
@webmethod(route="/evaluate/text_generation/")
|
@webmethod(route="/evaluate/text_generation/")
|
||||||
def post_evaluate_text_generation(
|
def evaluate_text_generation(
|
||||||
self,
|
self,
|
||||||
request: EvaluateTextGenerationRequest,
|
request: EvaluateTextGenerationRequest,
|
||||||
) -> EvaluationJob: ...
|
) -> EvaluationJob: ...
|
||||||
|
|
||||||
@webmethod(route="/evaluate/question_answering/")
|
@webmethod(route="/evaluate/question_answering/")
|
||||||
def post_evaluate_question_answering(
|
def evaluate_question_answering(
|
||||||
self,
|
self,
|
||||||
request: EvaluateQuestionAnsweringRequest,
|
request: EvaluateQuestionAnsweringRequest,
|
||||||
) -> EvaluationJob: ...
|
) -> EvaluationJob: ...
|
||||||
|
|
||||||
@webmethod(route="/evaluate/summarization/")
|
@webmethod(route="/evaluate/summarization/")
|
||||||
def post_evaluate_summarization(
|
def evaluate_summarization(
|
||||||
self,
|
self,
|
||||||
request: EvaluateSummarizationRequest,
|
request: EvaluateSummarizationRequest,
|
||||||
) -> EvaluationJob: ...
|
) -> EvaluationJob: ...
|
||||||
|
|
|
@ -13,7 +13,7 @@ from .datatypes import * # noqa: F403
|
||||||
|
|
||||||
class MemoryBanks(Protocol):
|
class MemoryBanks(Protocol):
|
||||||
@webmethod(route="/memory_banks/create")
|
@webmethod(route="/memory_banks/create")
|
||||||
def post_create_memory_bank(
|
def create_memory_bank(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
bank_name: str,
|
bank_name: str,
|
||||||
|
@ -33,14 +33,14 @@ class MemoryBanks(Protocol):
|
||||||
) -> str: ...
|
) -> str: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_bank/insert")
|
@webmethod(route="/memory_bank/insert")
|
||||||
def post_insert_memory_documents(
|
def insert_memory_documents(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
documents: List[MemoryBankDocument],
|
documents: List[MemoryBankDocument],
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
|
|
||||||
@webmethod(route="/memory_bank/update")
|
@webmethod(route="/memory_bank/update")
|
||||||
def post_update_memory_documents(
|
def update_memory_documents(
|
||||||
self,
|
self,
|
||||||
bank_id: str,
|
bank_id: str,
|
||||||
documents: List[MemoryBankDocument],
|
documents: List[MemoryBankDocument],
|
||||||
|
|
5
llama_toolchain/observability/__init__.py
Normal file
5
llama_toolchain/observability/__init__.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
8
llama_toolchain/observability/api/__init__.py
Normal file
8
llama_toolchain/observability/api/__init__.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from .datatypes import * # noqa: F401 F403
|
||||||
|
from .endpoints import * # noqa: F401 F403
|
80
llama_toolchain/observability/api/datatypes.py
Normal file
80
llama_toolchain/observability/api/datatypes.py
Normal file
|
@ -0,0 +1,80 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
from typing import Any, Dict, Optional, Union
|
||||||
|
|
||||||
|
from llama_models.schema_utils import json_schema_type
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ExperimentStatus(Enum):
|
||||||
|
NOT_STARTED = "not_started"
|
||||||
|
RUNNING = "running"
|
||||||
|
COMPLETED = "completed"
|
||||||
|
FAILED = "failed"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Experiment(BaseModel):
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
status: ExperimentStatus
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
metadata: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Run(BaseModel):
|
||||||
|
id: str
|
||||||
|
experiment_id: str
|
||||||
|
status: str
|
||||||
|
started_at: datetime
|
||||||
|
ended_at: Optional[datetime]
|
||||||
|
metadata: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Metric(BaseModel):
|
||||||
|
name: str
|
||||||
|
value: Union[float, int, str, bool]
|
||||||
|
timestamp: datetime
|
||||||
|
run_id: str
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Log(BaseModel):
|
||||||
|
message: str
|
||||||
|
level: str
|
||||||
|
timestamp: datetime
|
||||||
|
additional_info: Dict[str, Any]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ArtifactType(Enum):
|
||||||
|
MODEL = "model"
|
||||||
|
DATASET = "dataset"
|
||||||
|
CHECKPOINT = "checkpoint"
|
||||||
|
PLOT = "plot"
|
||||||
|
METRIC = "metric"
|
||||||
|
CONFIG = "config"
|
||||||
|
CODE = "code"
|
||||||
|
OTHER = "other"
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class Artifact(BaseModel):
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
type: ArtifactType
|
||||||
|
size: int
|
||||||
|
created_at: datetime
|
||||||
|
metadata: Dict[str, Any]
|
108
llama_toolchain/observability/api/endpoints.py
Normal file
108
llama_toolchain/observability/api/endpoints.py
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Dict, List, Optional, Protocol
|
||||||
|
|
||||||
|
from llama_models.schema_utils import json_schema_type, webmethod
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
|
from .datatypes import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class CreateExperimentRequest(BaseModel):
|
||||||
|
name: str
|
||||||
|
metadata: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class UpdateExperimentRequest(BaseModel):
|
||||||
|
experiment_id: str
|
||||||
|
status: Optional[ExperimentStatus] = None
|
||||||
|
metadata: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class CreateRunRequest(BaseModel):
|
||||||
|
experiment_id: str
|
||||||
|
metadata: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class UpdateRunRequest(BaseModel):
|
||||||
|
run_id: str
|
||||||
|
status: Optional[str] = None
|
||||||
|
ended_at: Optional[datetime] = None
|
||||||
|
metadata: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class LogMetricsRequest(BaseModel):
|
||||||
|
run_id: str
|
||||||
|
metrics: List[Metric]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class LogMessagesRequest(BaseModel):
|
||||||
|
logs: List[Log]
|
||||||
|
run_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class UploadArtifactRequest(BaseModel):
|
||||||
|
experiment_id: str
|
||||||
|
name: str
|
||||||
|
artifact_type: str
|
||||||
|
content: bytes
|
||||||
|
metadata: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class LogSearchRequest(BaseModel):
|
||||||
|
query: str
|
||||||
|
filters: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
class Observability(Protocol):
|
||||||
|
@webmethod(route="/experiments/create")
|
||||||
|
def create_experiment(self, request: CreateExperimentRequest) -> Experiment: ...
|
||||||
|
|
||||||
|
@webmethod(route="/experiments/list")
|
||||||
|
def list_experiments(self) -> List[Experiment]: ...
|
||||||
|
|
||||||
|
@webmethod(route="/experiments/get")
|
||||||
|
def get_experiment(self, experiment_id: str) -> Experiment: ...
|
||||||
|
|
||||||
|
@webmethod(route="/experiments/update")
|
||||||
|
def update_experiment(self, request: UpdateExperimentRequest) -> Experiment: ...
|
||||||
|
|
||||||
|
@webmethod(route="/experiments/create_run")
|
||||||
|
def create_run(self, request: CreateRunRequest) -> Run: ...
|
||||||
|
|
||||||
|
@webmethod(route="/runs/update")
|
||||||
|
def update_run(self, request: UpdateRunRequest) -> Run: ...
|
||||||
|
|
||||||
|
@webmethod(route="/runs/log_metrics")
|
||||||
|
def log_metrics(self, request: LogMetricsRequest) -> None: ...
|
||||||
|
|
||||||
|
@webmethod(route="/runs/metrics", method="GET")
|
||||||
|
def get_metrics(self, run_id: str) -> List[Metric]: ...
|
||||||
|
|
||||||
|
@webmethod(route="/logging/log_messages")
|
||||||
|
def log_messages(self, request: LogMessagesRequest) -> None: ...
|
||||||
|
|
||||||
|
@webmethod(route="/logging/get_logs")
|
||||||
|
def get_logs(self, request: LogSearchRequest) -> List[Log]: ...
|
||||||
|
|
||||||
|
@webmethod(route="/experiments/artifacts/upload")
|
||||||
|
def upload_artifact(self, request: UploadArtifactRequest) -> Artifact: ...
|
||||||
|
|
||||||
|
@webmethod(route="/experiments/artifacts/get")
|
||||||
|
def list_artifacts(self, experiment_id: str) -> List[Artifact]: ...
|
||||||
|
|
||||||
|
@webmethod(route="/artifacts/get")
|
||||||
|
def get_artifact(self, artifact_id: str) -> Artifact: ...
|
|
@ -95,13 +95,13 @@ class PostTrainingJobArtifactsResponse(BaseModel):
|
||||||
|
|
||||||
class PostTraining(Protocol):
|
class PostTraining(Protocol):
|
||||||
@webmethod(route="/post_training/supervised_fine_tune")
|
@webmethod(route="/post_training/supervised_fine_tune")
|
||||||
def post_supervised_fine_tune(
|
def supervised_fine_tune(
|
||||||
self,
|
self,
|
||||||
request: PostTrainingSFTRequest,
|
request: PostTrainingSFTRequest,
|
||||||
) -> PostTrainingJob: ...
|
) -> PostTrainingJob: ...
|
||||||
|
|
||||||
@webmethod(route="/post_training/preference_optimize")
|
@webmethod(route="/post_training/preference_optimize")
|
||||||
def post_preference_optimize(
|
def preference_optimize(
|
||||||
self,
|
self,
|
||||||
request: PostTrainingRLHFRequest,
|
request: PostTrainingRLHFRequest,
|
||||||
) -> PostTrainingJob: ...
|
) -> PostTrainingJob: ...
|
||||||
|
|
|
@ -27,7 +27,7 @@ class RewardScoringResponse(BaseModel):
|
||||||
|
|
||||||
class RewardScoring(Protocol):
|
class RewardScoring(Protocol):
|
||||||
@webmethod(route="/reward_scoring/score")
|
@webmethod(route="/reward_scoring/score")
|
||||||
def post_score(
|
def reward_score(
|
||||||
self,
|
self,
|
||||||
request: RewardScoringRequest,
|
request: RewardScoringRequest,
|
||||||
) -> Union[RewardScoringResponse]: ...
|
) -> Union[RewardScoringResponse]: ...
|
||||||
|
|
30
llama_toolchain/stack.py
Normal file
30
llama_toolchain/stack.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
|
from llama_toolchain.agentic_system.api import * # noqa: F403
|
||||||
|
from llama_toolchain.dataset.api import * # noqa: F403
|
||||||
|
from llama_toolchain.evaluations.api import * # noqa: F403
|
||||||
|
from llama_toolchain.inference.api import * # noqa: F403
|
||||||
|
from llama_toolchain.memory.api import * # noqa: F403
|
||||||
|
from llama_toolchain.observability.api import * # noqa: F403
|
||||||
|
from llama_toolchain.post_training.api import * # noqa: F403
|
||||||
|
from llama_toolchain.reward_scoring.api import * # noqa: F403
|
||||||
|
from llama_toolchain.synthetic_data_generation.api import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
|
class LlamaStack(
|
||||||
|
Inference,
|
||||||
|
AgenticSystem,
|
||||||
|
RewardScoring,
|
||||||
|
SyntheticDataGeneration,
|
||||||
|
Datasets,
|
||||||
|
Observability,
|
||||||
|
PostTraining,
|
||||||
|
MemoryBanks,
|
||||||
|
Evaluations,
|
||||||
|
):
|
||||||
|
pass
|
|
@ -34,7 +34,7 @@ class SyntheticDataGenerationResponse(BaseModel):
|
||||||
|
|
||||||
class SyntheticDataGeneration(Protocol):
|
class SyntheticDataGeneration(Protocol):
|
||||||
@webmethod(route="/synthetic_data_generation/generate")
|
@webmethod(route="/synthetic_data_generation/generate")
|
||||||
def post_generate(
|
def synthetic_data_generate(
|
||||||
self,
|
self,
|
||||||
request: SyntheticDataGenerationRequest,
|
request: SyntheticDataGenerationRequest,
|
||||||
) -> Union[SyntheticDataGenerationResponse]: ...
|
) -> Union[SyntheticDataGenerationResponse]: ...
|
||||||
|
|
BIN
rfcs/RFC-0001-llama-stack-assets/agentic-system.png
Normal file
BIN
rfcs/RFC-0001-llama-stack-assets/agentic-system.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 128 KiB |
5358
rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
Normal file
5358
rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.html
Normal file
File diff suppressed because it is too large
Load diff
3339
rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
Normal file
3339
rfcs/RFC-0001-llama-stack-assets/llama-stack-spec.yaml
Normal file
File diff suppressed because it is too large
Load diff
BIN
rfcs/RFC-0001-llama-stack-assets/llama-stack.png
Normal file
BIN
rfcs/RFC-0001-llama-stack-assets/llama-stack.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 71 KiB |
BIN
rfcs/RFC-0001-llama-stack-assets/model-lifecycle.png
Normal file
BIN
rfcs/RFC-0001-llama-stack-assets/model-lifecycle.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 17 KiB |
86
rfcs/RFC-0001-llama-stack.md
Normal file
86
rfcs/RFC-0001-llama-stack.md
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
# The Llama Stack API
|
||||||
|
|
||||||
|
**Authors:**
|
||||||
|
* Meta: @raghotham, @ashwinb, @hjshah, @jspisak
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
As part of the Llama 3.1 release, Meta is releasing an RFC for ‘Llama Stack’, a comprehensive set of interfaces / API for ML developers building on top of Llama foundation models. We are looking for feedback on where the API can be improved, any corner cases we may have missed and your general thoughts on how useful this will be. Ultimately, our hope is to create a standard for working with Llama models in order to simplify the developer experience and foster innovation across the Llama ecosystem.
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
Llama models were always intended to work as part of an overall system that can orchestrate several components, including calling external tools. Our vision is to go beyond the foundation models and give developers access to a broader system that gives them the flexibility to design and create custom offerings that align with their vision. This thinking started last year when we first introduced a system-level safety model. Meta has continued to release new components for orchestration at the system level and, most recently in Llama 3.1, we’ve introduced the Llama Guard 3 safety model that is multilingual, a prompt injection filter, Prompt Guard and refreshed v3 of our CyberSec Evals. We are also releasing a reference implementation of an agentic system to demonstrate how all the pieces fit together.
|
||||||
|
|
||||||
|
While building the reference implementation, we realized that having a clean and consistent way to interface between components could be valuable not only for us but for anyone leveraging Llama models and other components as part of their system. We’ve also heard from the community as they face a similar challenge as components exist with overlapping functionality and there are incompatible interfaces and yet don't cover the end-to-end model life cycle.
|
||||||
|
|
||||||
|
With these motivations, we engaged folks in industry, startups, and the broader developer community to help better define the interfaces of these components. We’re releasing this Llama Stack RFC as a set of standardized and opinionated interfaces for how to surface canonical toolchain components (like inference, fine-tuning, evals, synthetic data generation) and agentic applications to ML developers. Our hope is to have these become well adopted across the ecosystem, which should help with easier interoperability. We would like for builders of multiple components to provide implementations to these standard APIs so that there can be vertically integrated “distributions” of the Llama Stack that can work out of the box easily.
|
||||||
|
|
||||||
|
We welcome feedback and ways to improve the proposal. We’re excited to grow the ecosystem around Llama and lower barriers for both developers and platform providers.
|
||||||
|
|
||||||
|
## Design decisions
|
||||||
|
Meta releases weights of both the pretrained and instruction fine-tuned Llama models to support several use cases. These weights can be improved - fine tuned and aligned - with curated datasets to then be deployed for inference to support specific applications. The curated datasets can be produced manually by humans or synthetically by other models or by leveraging human feedback by collecting usage data of the application itself. This results in a continuous improvement cycle where the model gets better over time. This is the model life cycle.
|
||||||
|
|
||||||
|
|
||||||
|
### Model Lifecycle
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
For each of the operations that need to be performed (e.g. fine tuning, inference, evals etc) during the model life cycle, we identified the capabilities as toolchain APIs that are needed. Some of these capabilities are primitive operations like inference while other capabilities like synthetic data generation are composed of other capabilities. The list of APIs we have identified to support the lifecycle of Llama models is below:
|
||||||
|
|
||||||
|
- /datasets - to support creating training and evaluation data sets
|
||||||
|
- /post_training - to support creating and managing supervised finetuning (SFT) or preference optimization jobs
|
||||||
|
- /evaluations - to support creating and managing evaluations for capabilities like question answering, summarization, or text - generation
|
||||||
|
- /synthetic_data_generation - to support generating synthetic data using data generation model and a reward model
|
||||||
|
- /reward_scoring - to support synthetic data generation
|
||||||
|
- /inference - to support serving the models for applications
|
||||||
|
|
||||||
|
### Agentic System
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
In addition to the model lifecycle, we considered the different components involved in an agentic system. Specifically around tool calling and shields. Since the model may decide to call tools, a single model inference call is not enough. What’s needed is an agentic loop consisting of tool calls and inference. The model provides separate tokens representing end-of-message and end-of-turn. A message represents a possible stopping point for execution where the model can inform the execution environment that a tool call needs to be made. The execution environment, upon execution, adds back the result to the context window and makes another inference call. This process can get repeated until an end-of-turn token is generated.
|
||||||
|
Note that as of today, in the OSS world, such a “loop” is often coded explicitly via elaborate prompt engineering using a ReAct pattern (typically) or preconstructed execution graph. Llama 3.1 (and future Llamas) attempts to absorb this multi-step reasoning loop inside the main model itself.
|
||||||
|
|
||||||
|
**Let's consider an example:**
|
||||||
|
1. The user asks the system "Who played the NBA finals last year?"
|
||||||
|
1. The model "understands" that this question needs to be answered using web search. It answers this abstractly with a message of the form "Please call the search tool for me with the query: 'List finalist teams for NBA in the last year' ". Note that the model by itself does not call the tool (of course!)
|
||||||
|
1. The executor consults the set of tool implementations which have been configured by the developer to find an implementation for the "search tool". If it does not find it, it returns an error to the model. Otherwise, it executes this tool and returns the result of this tool back to the model.
|
||||||
|
1. The model reasons once again (using all the messages above) and decides to send a final response "In 2023, Denver Nuggets played against the Miami Heat in the NBA finals." to the executor
|
||||||
|
1. The executor returns the response directly to the user (since there is no tool call to be executed.)
|
||||||
|
|
||||||
|
The sequence diagram that details the steps is here.
|
||||||
|
|
||||||
|
* /memory_banks - to support creating multiple repositories of data that can be available for agentic systems
|
||||||
|
* /agentic_system - to support creating and running agentic systems. The sub-APIs support the creation and management of the steps, turns, and sessions within agentic applications.
|
||||||
|
* /step - there can be inference, memory retrieval, tool call, or shield call steps
|
||||||
|
* /turn - each turn begins with a user message and results in a loop consisting of multiple steps, followed by a response back to the user
|
||||||
|
* /session - each session consists of multiple turns that the model is reasoning over
|
||||||
|
* /memory_bank - a memory bank allows for the agentic system to perform retrieval augmented generation
|
||||||
|
|
||||||
|
## Llama Stack API/CLI
|
||||||
|
|
||||||
|
We define the Llama Stack as a layer cake shown below.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
The API is defined in the [YAML](RFC-0001-llama-stack-assets/llama-stack-spec.yaml) and [HTML](RFC-0001-llama-stack-assets/llama-stack-spec.html) files. These files were generated using the Pydantic definitions in (api/datatypes.py and api/endpoints.py) files that are in the llama-models, llama-toolchain, and llama-agentic-system repositories.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Sample implementations
|
||||||
|
|
||||||
|
To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-agentic-system](https://github.com/meta-llama/llama-agentic-system) repository contains [6 different examples](https://github.com/meta-llama/llama-agentic-system/tree/main/examples/scripts) ranging from very basic to a multi turn agent.
|
||||||
|
|
||||||
|
There is also a sample inference endpoint implementation in the [llama-toolchain](https://github.com/meta-llama/llama-toolchain/blob/main/llama_toolchain/inference/server.py) repository.
|
||||||
|
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
The reference implementation for Llama Stack APIs to date only includes sample implementations using the inference API. We are planning to flesh out the design of Llama Stack Distributions (distros) by combining capabilities from different providers into a single vertically integrated stack. We plan to implement other APIs and, of course, we’d love contributions!!
|
||||||
|
|
||||||
|
Thank you in advance for your feedback, support and contributions to make this a better API.
|
||||||
|
|
||||||
|
Cheers!
|
9
rfcs/openapi_generator/README.md
Normal file
9
rfcs/openapi_generator/README.md
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_toolchain/[<subdir>]/api/endpoints.py` using the `generate.py` utility.
|
||||||
|
|
||||||
|
Please install the following packages before running the script:
|
||||||
|
|
||||||
|
```
|
||||||
|
pip install python-openapi json-strong-typing fire PyYAML llama-models
|
||||||
|
```
|
||||||
|
|
||||||
|
Then simply run `sh run_openapi_generator.sh <OUTPUT_DIR>`
|
111
rfcs/openapi_generator/generate.py
Normal file
111
rfcs/openapi_generator/generate.py
Normal file
|
@ -0,0 +1,111 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described found in the
|
||||||
|
# LICENSE file in the root directory of this source tree.
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Callable, Iterator, List, Tuple
|
||||||
|
|
||||||
|
import fire
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from llama_models import schema_utils
|
||||||
|
from pyopenapi import Info, operations, Options, Server, Specification
|
||||||
|
|
||||||
|
# We do a series of monkey-patching to ensure our definitions only use the minimal
|
||||||
|
# (json_schema_type, webmethod) definitions from the llama_models package. For
|
||||||
|
# generation though, we need the full definitions and implementations from the
|
||||||
|
# (python-openapi, json-strong-typing) packages.
|
||||||
|
|
||||||
|
from strong_typing.schema import json_schema_type
|
||||||
|
from termcolor import colored
|
||||||
|
|
||||||
|
schema_utils.json_schema_type = json_schema_type
|
||||||
|
|
||||||
|
|
||||||
|
from llama_toolchain.stack import LlamaStack
|
||||||
|
|
||||||
|
|
||||||
|
def patched_get_endpoint_functions(
|
||||||
|
endpoint: type, prefixes: List[str]
|
||||||
|
) -> Iterator[Tuple[str, str, str, Callable]]:
|
||||||
|
if not inspect.isclass(endpoint):
|
||||||
|
raise ValueError(f"object is not a class type: {endpoint}")
|
||||||
|
|
||||||
|
functions = inspect.getmembers(endpoint, inspect.isfunction)
|
||||||
|
for func_name, func_ref in functions:
|
||||||
|
webmethod = getattr(func_ref, "__webmethod__", None)
|
||||||
|
if not webmethod:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"Processing {colored(func_name, 'white')}...")
|
||||||
|
operation_name = func_name
|
||||||
|
if operation_name.startswith("get_") or operation_name.endswith("/get"):
|
||||||
|
prefix = "get"
|
||||||
|
elif (
|
||||||
|
operation_name.startswith("delete_")
|
||||||
|
or operation_name.startswith("remove_")
|
||||||
|
or operation_name.endswith("/delete")
|
||||||
|
or operation_name.endswith("/remove")
|
||||||
|
):
|
||||||
|
prefix = "delete"
|
||||||
|
else:
|
||||||
|
if webmethod.method == "GET":
|
||||||
|
prefix = "get"
|
||||||
|
elif webmethod.method == "DELETE":
|
||||||
|
prefix = "delete"
|
||||||
|
else:
|
||||||
|
# by default everything else is a POST
|
||||||
|
prefix = "post"
|
||||||
|
|
||||||
|
yield prefix, operation_name, func_name, func_ref
|
||||||
|
|
||||||
|
|
||||||
|
# Patch this so all methods are correctly parsed with correct HTTP methods
|
||||||
|
operations._get_endpoint_functions = patched_get_endpoint_functions
|
||||||
|
|
||||||
|
|
||||||
|
def main(output_dir: str):
|
||||||
|
output_dir = Path(output_dir)
|
||||||
|
if not output_dir.exists():
|
||||||
|
raise ValueError(f"Directory {output_dir} does not exist")
|
||||||
|
|
||||||
|
now = str(datetime.now())
|
||||||
|
print(
|
||||||
|
"Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now
|
||||||
|
)
|
||||||
|
print("")
|
||||||
|
spec = Specification(
|
||||||
|
LlamaStack,
|
||||||
|
Options(
|
||||||
|
server=Server(url="http://any-hosted-llama-stack.com"),
|
||||||
|
info=Info(
|
||||||
|
title="[DRAFT] Llama Stack Specification",
|
||||||
|
version="0.0.1",
|
||||||
|
description="""This is the specification of the llama stack that provides
|
||||||
|
a set of endpoints and their corresponding interfaces that are tailored to
|
||||||
|
best leverage Llama Models. The specification is still in draft and subject to change.
|
||||||
|
Generated at """
|
||||||
|
+ now,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
with open(output_dir / "llama-stack-spec.yaml", "w", encoding="utf-8") as fp:
|
||||||
|
yaml.dump(spec.get_json(), fp, allow_unicode=True)
|
||||||
|
|
||||||
|
with open(output_dir / "llama-stack-spec.html", "w") as fp:
|
||||||
|
spec.write_html(fp, pretty_print=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
fire.Fire(main)
|
33
rfcs/openapi_generator/run_openapi_generator.sh
Normal file
33
rfcs/openapi_generator/run_openapi_generator.sh
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
PYTHONPATH=${PYTHONPATH:-}
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
missing_packages=()
|
||||||
|
|
||||||
|
check_package() {
|
||||||
|
if ! pip show "$1" &> /dev/null; then
|
||||||
|
missing_packages+=("$1")
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
check_package python-openapi
|
||||||
|
check_package json-strong-typing
|
||||||
|
|
||||||
|
if [ ${#missing_packages[@]} -ne 0 ]; then
|
||||||
|
echo "Error: The following package(s) are not installed:"
|
||||||
|
printf " - %s\n" "${missing_packages[@]}"
|
||||||
|
echo "Please install them using:"
|
||||||
|
echo "pip install ${missing_packages[*]}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
PYTHONPATH=$PYTHONPATH:../.. python3 -m rfcs.openapi_generator.generate $*
|
Loading…
Add table
Add a link
Reference in a new issue