forked from phoenix-oss/llama-stack-mirror
chore: move all Llama Stack types from llama-models to llama-stack (#1098)
llama-models should have extremely minimal cruft. Its sole purpose should be didactic -- show the simplest implementation of the llama models and document the prompt formats, etc. This PR is the complement to https://github.com/meta-llama/llama-models/pull/279 ## Test Plan Ensure all `llama` CLI `model` sub-commands work: ```bash llama model list llama model download --model-id ... llama model prompt-format -m ... ``` Ran tests: ```bash cd tests/client-sdk LLAMA_STACK_CONFIG=fireworks pytest -s -v inference/ LLAMA_STACK_CONFIG=fireworks pytest -s -v vector_io/ LLAMA_STACK_CONFIG=fireworks pytest -s -v agents/ ``` Create a fresh venv `uv venv && source .venv/bin/activate` and run `llama stack build --template fireworks --image-type venv` followed by `llama stack run together --image-type venv` <-- the server runs Also checked that the OpenAPI generator can run and there is no change in the generated files as a result. ```bash cd docs/openapi_generator sh run_openapi_generator.sh ```
This commit is contained in:
parent
c0ee512980
commit
314ee09ae3
138 changed files with 8491 additions and 465 deletions
65
llama_stack/scripts/generate_prompt_format.py
Normal file
65
llama_stack/scripts/generate_prompt_format.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# top-level folder for each specific model found within the models/ directory at
|
||||
# the top-level of this source tree.
|
||||
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import fire
|
||||
|
||||
# from llama_stack.models.llama.datatypes import * # noqa: F403
|
||||
from llama_models.llama3.reference_impl.generation import Llama
|
||||
|
||||
THIS_DIR = Path(__file__).parent.resolve()
|
||||
|
||||
|
||||
def run_main(
|
||||
ckpt_dir: str,
|
||||
module_name: str,
|
||||
output_path: str,
|
||||
model_parallel_size: Optional[int] = None,
|
||||
):
|
||||
module = importlib.import_module(module_name)
|
||||
assert hasattr(module, "usecases"), f"Module {module_name} missing usecases function"
|
||||
tokenizer_path = str(THIS_DIR.parent / "llama3/api/tokenizer.model")
|
||||
generator = Llama.build(
|
||||
ckpt_dir=ckpt_dir,
|
||||
tokenizer_path=tokenizer_path,
|
||||
max_seq_len=512,
|
||||
max_batch_size=1,
|
||||
model_parallel_size=model_parallel_size,
|
||||
)
|
||||
|
||||
use_cases = module.usecases()
|
||||
text = ""
|
||||
for u in use_cases:
|
||||
if isinstance(u, str):
|
||||
use_case_text = f"\n{u}\n"
|
||||
else:
|
||||
use_case_text = u.to_text(generator)
|
||||
|
||||
text += use_case_text
|
||||
print(use_case_text)
|
||||
|
||||
text += "Thank You!\n"
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
f.write(text)
|
||||
|
||||
|
||||
def main():
|
||||
fire.Fire(run_main)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue