mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 19:04:19 +00:00
llama-models should have extremely minimal cruft. Its sole purpose should be didactic -- show the simplest implementation of the llama models and document the prompt formats, etc. This PR is the complement to https://github.com/meta-llama/llama-models/pull/279 ## Test Plan Ensure all `llama` CLI `model` sub-commands work: ```bash llama model list llama model download --model-id ... llama model prompt-format -m ... ``` Ran tests: ```bash cd tests/client-sdk LLAMA_STACK_CONFIG=fireworks pytest -s -v inference/ LLAMA_STACK_CONFIG=fireworks pytest -s -v vector_io/ LLAMA_STACK_CONFIG=fireworks pytest -s -v agents/ ``` Create a fresh venv `uv venv && source .venv/bin/activate` and run `llama stack build --template fireworks --image-type venv` followed by `llama stack run together --image-type venv` <-- the server runs Also checked that the OpenAPI generator can run and there is no change in the generated files as a result. ```bash cd docs/openapi_generator sh run_openapi_generator.sh ```
65 lines
1.7 KiB
Python
65 lines
1.7 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# top-level folder for each specific model found within the models/ directory at
|
|
# the top-level of this source tree.
|
|
|
|
import importlib
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import fire
|
|
|
|
# from llama_stack.models.llama.datatypes import * # noqa: F403
|
|
from llama_models.llama3.reference_impl.generation import Llama
|
|
|
|
THIS_DIR = Path(__file__).parent.resolve()
|
|
|
|
|
|
def run_main(
|
|
ckpt_dir: str,
|
|
module_name: str,
|
|
output_path: str,
|
|
model_parallel_size: Optional[int] = None,
|
|
):
|
|
module = importlib.import_module(module_name)
|
|
assert hasattr(module, "usecases"), f"Module {module_name} missing usecases function"
|
|
tokenizer_path = str(THIS_DIR.parent / "llama3/api/tokenizer.model")
|
|
generator = Llama.build(
|
|
ckpt_dir=ckpt_dir,
|
|
tokenizer_path=tokenizer_path,
|
|
max_seq_len=512,
|
|
max_batch_size=1,
|
|
model_parallel_size=model_parallel_size,
|
|
)
|
|
|
|
use_cases = module.usecases()
|
|
text = ""
|
|
for u in use_cases:
|
|
if isinstance(u, str):
|
|
use_case_text = f"\n{u}\n"
|
|
else:
|
|
use_case_text = u.to_text(generator)
|
|
|
|
text += use_case_text
|
|
print(use_case_text)
|
|
|
|
text += "Thank You!\n"
|
|
|
|
with open(output_path, "w") as f:
|
|
f.write(text)
|
|
|
|
|
|
def main():
|
|
fire.Fire(run_main)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|