From a34f3aafcf7b2bcc7f694051bef04aa339c71afa Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 27 Feb 2025 16:25:30 -0800 Subject: [PATCH 01/13] fix: don't include tool args not in the function definition (#1307) # Summary: Right now we would include toolgroup args when we encode messages with tool_calls, which is confusing the model since they not in the function description (see test plan for example). # Test Plan: Add a print statement before raw prompt is sent to providers (no good way to test this currently) Before: ``` cated in the same neighborhood?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[knowledge_search(query="Laleli Mosque and Esma Sultan Mansion same neighborhood", vector_db_ids=["829a68735d744dc3830409dcc782964a"])]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nknowledge_search tool found 5 chunks:\nBEGIN of ``` Note the extra `vector_db_ids` After ``` >user<|end_header_id|>\n\nAre the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n[knowledge_search(query="Laleli Mosque and Esma Sultan Mansion same neighborhood")]<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\nknowledge_search tool found ``` --- .../inline/agents/meta_reference/agent_instance.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 3502c21f2..5c492434f 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -1054,9 +1054,6 @@ async def execute_tool_call_maybe( group_name = tool_to_group.get(name, None) if group_name is None: raise ValueError(f"Tool {name} not found in any tool group") - # get the arguments generated by the model and augment with toolgroup arg overrides for the agent - tool_call_args = tool_call.arguments - tool_call_args.update(toolgroup_args.get(group_name, {})) if isinstance(name, BuiltinTool): if name == BuiltinTool.brave_search: name = WEB_SEARCH_TOOL @@ -1065,10 +1062,12 @@ async def execute_tool_call_maybe( result = await tool_runtime_api.invoke_tool( tool_name=name, - kwargs=dict( - session_id=session_id, - **tool_call_args, - ), + kwargs={ + "session_id": session_id, + # get the arguments generated by the model and augment with toolgroup arg overrides for the agent + **tool_call.arguments, + **toolgroup_args.get(group_name, {}), + }, ) return result From c54164556aecadec0807d38baf9a70e7a31ab30b Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 27 Feb 2025 16:39:04 -0800 Subject: [PATCH 02/13] fix: update notebooks to avoid using the nutsy --image-name __system__ thing (#1308) The `--image-name __system__` thing was a hack and a bad one at that. The actual intent was to somehow automatically detect the notebook environment so we could avoid unnecessarily confusing things in the llama stack build cmd-line. But I failed which led us to use the backup `__system__` thing. Let's just do the simple thing. Note that `build_venv.sh` I haven't changed for now (so it still honors the __system__ special name just that no new user should use it.) ## Test Plan Open the notebooks from this branch in Colab (see example url below) and ensure the builds work. https://colab.research.google.com/github/meta-llama/llama-stack/blob/foo/docs/getting_started.ipynb In the notebook, install llama-stack from this branch directly using: ``` !pip install -U https://github.com/meta-llama/llama-stack/archive/refs/heads/foo.zip ``` Verify that `!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv` afterwards succeeds and the library client initialization also works. --- docs/getting_started.ipynb | 8 +- .../Llama_Stack_Benchmark_Evals.ipynb | 252 +----------------- llama_stack/cli/stack/_build.py | 6 +- 3 files changed, 9 insertions(+), 257 deletions(-) diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index d4975b7a8..8ae6fed24 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -84,10 +84,8 @@ "outputs": [], "source": [ "# NBVAL_SKIP\n", - "\n", "!apt-get install -y bubblewrap\n", - "!pip install uv\n", - "!uv pip install llama-stack --system" + "!pip install -U llama-stack" ] }, { @@ -126,7 +124,7 @@ "source": [ "# NBVAL_SKIP\n", "# This will build all the dependencies you will need\n", - "!llama stack build --template together --image-type venv --image-name __system__" + "!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv" ] }, { @@ -4328,7 +4326,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "master", + "display_name": "toolchain", "language": "python", "name": "python3" }, diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 4cfccd44a..174cbcce6 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -45,65 +45,7 @@ "id": "O9pGVlPIjpix", "outputId": "e1fbe723-ae31-4630-eb80-4c4f6476d56f" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: llama-stack in /usr/local/lib/python3.10/dist-packages (0.0.61)\n", - "Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.0)\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.7.0)\n", - "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.28.1)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.26.5)\n", - "Requirement already satisfied: llama-models>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\n", - "Requirement already satisfied: llama-stack-client>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\n", - "Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.48)\n", - "Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (from llama-stack) (1.0.1)\n", - "Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.10.3)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.32.3)\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from llama-stack) (13.9.4)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from llama-stack) (75.1.0)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.5.0)\n", - "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (6.0.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (3.1.4)\n", - "Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (0.8.0)\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (10.4.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (3.7.1)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.9.0)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (2.2.2)\n", - "Requirement already satisfied: pyaml in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (24.12.1)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.3.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.66.6)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.12.2)\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (1.0.7)\n", - "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (3.10)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (2.27.1)\n", - "Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.21.0)\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (2.2.3)\n", - "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (5.3.0)\n", - "Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.16.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (2024.9.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (24.2)\n", - "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llama-stack) (3.4.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (2.18.0)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.61->llama-stack) (1.2.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->llama-models>=0.0.61->llama-stack) (3.0.2)\n", - "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (1.26.4)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n", - "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->llama-models>=0.0.61->llama-stack) (2024.9.11)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.61->llama-stack) (1.17.0)\n" - ] - } - ], + "outputs": [], "source": [ "# NBVAL_SKIP\n", "!pip install -U llama-stack" @@ -120,198 +62,10 @@ "id": "JQpLUSNjlGAM", "outputId": "2f7fec97-5511-4cae-d51e-6d262fbca19c" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: llama-stack in /usr/local/lib/python3.10/dist-packages (0.0.61)\r\n", - "Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.0)\r\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.7.0)\r\n", - "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.28.1)\r\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.26.5)\r\n", - "Requirement already satisfied: llama-models>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\r\n", - "Requirement already satisfied: llama-stack-client>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\r\n", - "Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.48)\r\n", - "Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (from llama-stack) (1.0.1)\r\n", - "Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.10.3)\r\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.32.3)\r\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from llama-stack) (13.9.4)\r\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from llama-stack) (75.1.0)\r\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.5.0)\r\n", - "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (6.0.2)\r\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (3.1.4)\r\n", - "Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (0.8.0)\r\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (10.4.0)\r\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (3.7.1)\r\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (8.1.7)\r\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.9.0)\r\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (2.2.2)\r\n", - "Requirement already satisfied: pyaml in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (24.12.1)\r\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.3.1)\r\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.66.6)\r\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.12.2)\r\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (2024.8.30)\r\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (1.0.7)\r\n", - "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (3.10)\r\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\r\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\r\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (2.27.1)\r\n", - "Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.21.0)\r\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (2.2.3)\r\n", - "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (5.3.0)\r\n", - "Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.16.1)\r\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (2024.9.0)\r\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (24.2)\r\n", - "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\r\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llama-stack) (3.4.0)\r\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (3.0.0)\r\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (2.18.0)\r\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.61->llama-stack) (1.2.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->llama-models>=0.0.61->llama-stack) (3.0.2)\n", - "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (1.26.4)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n", - "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->llama-models>=0.0.61->llama-stack) (2024.9.11)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.61->llama-stack) (1.17.0)\n", - "Installing pip dependencies\n", - "Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (3.0.0)\n", - "Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (5.2.0)\n", - "Requirement already satisfied: opentelemetry-sdk in /usr/local/lib/python3.10/dist-packages (1.28.2)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.13.1)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.2.2)\n", - "Requirement already satisfied: autoevals in /usr/local/lib/python3.10/dist-packages (0.0.109)\n", - "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (0.2.0)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.5.2)\n", - "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (10.4.0)\n", - "Requirement already satisfied: pypdf in /usr/local/lib/python3.10/dist-packages (5.1.0)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (4.66.6)\n", - "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (3.9.1)\n", - "Requirement already satisfied: aiosqlite in /usr/local/lib/python3.10/dist-packages (0.20.0)\n", - "Requirement already satisfied: psycopg2-binary in /usr/local/lib/python3.10/dist-packages (2.9.10)\n", - "Requirement already satisfied: faiss-cpu in /usr/local/lib/python3.10/dist-packages (1.9.0.post1)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http in /usr/local/lib/python3.10/dist-packages (1.28.2)\n", - "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.46.3)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.26.4)\n", - "Requirement already satisfied: chromadb-client in /usr/local/lib/python3.10/dist-packages (0.5.23)\n", - "Requirement already satisfied: openai in /usr/local/lib/python3.10/dist-packages (1.54.5)\n", - "Requirement already satisfied: redis in /usr/local/lib/python3.10/dist-packages (5.2.1)\n", - "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.2.0)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.8.0)\n", - "Requirement already satisfied: together in /usr/local/lib/python3.10/dist-packages (1.3.5)\n", - "Requirement already satisfied: fastapi in /usr/local/lib/python3.10/dist-packages (0.115.6)\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (0.7.0)\n", - "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (0.28.1)\n", - "Requirement already satisfied: uvicorn in /usr/local/lib/python3.10/dist-packages (0.32.1)\n", - "Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile) (3.21.0)\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile) (2.2.3)\n", - "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile) (5.3.0)\n", - "Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile) (3.16.1)\n", - "Requirement already satisfied: opentelemetry-api==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (0.49b2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (4.12.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n", - "Requirement already satisfied: chevron in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.14.0)\n", - "Requirement already satisfied: levenshtein in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.26.1)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from autoevals) (6.0.2)\n", - "Requirement already satisfied: braintrust_core==0.0.54 in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.0.54)\n", - "Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (from autoevals) (4.23.0)\n", - "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.7)\n", - "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk) (2024.9.11)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from faiss-cpu) (24.2)\n", - "Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.66.0)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-proto==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.28.2)\n", - "Requirement already satisfied: requests~=2.7 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (2.32.3)\n", - "Requirement already satisfied: protobuf<6.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http) (5.29.1)\n", - "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.26.5)\n", - "Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (1.28.2)\n", - "Requirement already satisfied: overrides>=7.3.1 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (7.7.0)\n", - "Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (3.7.4)\n", - "Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (2.10.3)\n", - "Requirement already satisfied: tenacity>=8.2.3 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (9.0.0)\n", - "Requirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (3.10.12)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from openai) (1.9.0)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from openai) (0.8.2)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.1)\n", - "Requirement already satisfied: async-timeout>=4.0.3 in /usr/local/lib/python3.10/dist-packages (from redis) (4.0.3)\n", - "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n", - "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\n", - "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n", - "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.11.10)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.3.1)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.55.2)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.7)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.2.0)\n", - "Requirement already satisfied: eval-type-backport<0.3.0,>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from together) (0.2.0)\n", - "Requirement already satisfied: rich<14.0.0,>=13.8.1 in /usr/local/lib/python3.10/dist-packages (from together) (13.9.4)\n", - "Requirement already satisfied: tabulate<0.10.0,>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from together) (0.9.0)\n", - "Requirement already satisfied: typer<0.14,>=0.9 in /usr/local/lib/python3.10/dist-packages (from together) (0.13.1)\n", - "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /usr/local/lib/python3.10/dist-packages (from fastapi) (0.41.3)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire) (2.5.0)\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx) (1.0.7)\n", - "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx) (3.10)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx) (0.14.0)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.18.3)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.2)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n", - "Requirement already satisfied: grpcio<2.0.0,>=1.63.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb-client) (1.68.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (1.17.0)\n", - "Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (1.6)\n", - "Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (2.2.1)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb-client) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb-client) (2.27.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests~=2.7->opentelemetry-exporter-otlp-proto-http) (3.4.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=13.8.1->together) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=13.8.1->together) (2.18.0)\n", - "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<0.14,>=0.9->together) (1.5.4)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (0.22.3)\n", - "Requirement already satisfied: rapidfuzz<4.0.0,>=3.9.0 in /usr/local/lib/python3.10/dist-packages (from levenshtein->autoevals) (3.10.1)\n", - "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14.0.0,>=13.8.1->together) (0.1.2)\n", - "sentence-transformers --no-deps\n", - "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (3.2.1)\n", - "torch --index-url https://download.pytorch.org/whl/cpu\n", - "Looking in indexes: https://download.pytorch.org/whl/cpu\n", - "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.5.1+cu121)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.16.1)\n", - "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.9.0)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.2)\n", - "\u001b[32mBuild Successful!\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "# NBVAL_SKIP\n", - "!llama stack build --template together --image-type venv --image-name __system__" + "!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv" ] }, { diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 96382d428..89db368db 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -38,7 +38,7 @@ from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.resolver import InvalidProviderError from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.distribution.utils.dynamic import instantiate_class_type -from llama_stack.distribution.utils.exec import formulate_run_args, in_notebook, run_with_pty +from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty from llama_stack.distribution.utils.image_types import ImageType from llama_stack.providers.datatypes import Api @@ -65,8 +65,6 @@ def run_stack_build_command(args: argparse.Namespace) -> None: if args.image_type == "venv": current_venv = os.environ.get("VIRTUAL_ENV") image_name = args.image_name or current_venv - if not image_name and in_notebook(): - image_name = "__system__" elif args.image_type == "conda": current_conda_env = os.environ.get("CONDA_DEFAULT_ENV") image_name = args.image_name or current_conda_env @@ -291,6 +289,8 @@ def _run_stack_build_command_from_build_config( if not image_name: raise ValueError("Please specify an image name when building a conda image") elif build_config.image_type == ImageType.venv.value: + if not image_name and os.environ.get("UV_SYSTEM_PYTHON"): + image_name = "__system__" if not image_name: raise ValueError("Please specify an image name when building a venv image") From 04de2f84e96e6b448b6d2d1a826ebcb5e223d7ad Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 27 Feb 2025 16:39:23 -0800 Subject: [PATCH 03/13] fix: register provider model name and HF alias in run.yaml (#1304) Each model known to the system has two identifiers: - the `provider_resource_id` (what the provider calls it) -- e.g., `accounts/fireworks/models/llama-v3p1-8b-instruct` - the `identifier` (`model_id`) under which it is registered and gets routed to the appropriate provider. We have so far used the HuggingFace repo alias as the standardized identifier you can use to refer to the model. So in the above example, we'd use `meta-llama/Llama-3.1-8B-Instruct` as the name under which it gets registered. This makes it convenient for users to refer to these models across providers. However, we forgot to register the _actual_ provider model ID also. You should be able to route via `provider_resource_id` also, of course. This change fixes this (somewhat grave) omission. *Note*: this change is additive -- more aliases work now compared to before. ## Test Plan Run the following for distro=(ollama fireworks together) ``` LLAMA_STACK_CONFIG=$distro \ pytest -s -v tests/client-sdk/inference/test_text_inference.py \ --inference-model=meta-llama/Llama-3.1-8B-Instruct --vision-inference-model="" ``` --- .../remote_hosted_distro/nvidia.md | 26 +++---- .../self_hosted_distro/bedrock.md | 6 +- .../self_hosted_distro/cerebras.md | 4 +- .../self_hosted_distro/fireworks.md | 22 +++--- .../distributions/self_hosted_distro/groq.md | 10 +-- .../self_hosted_distro/sambanova.md | 18 ++--- .../self_hosted_distro/together.md | 22 +++--- .../remote/inference/cerebras/cerebras.py | 4 +- .../remote/inference/cerebras/models.py | 2 +- .../providers/remote/inference/groq/models.py | 13 ++-- .../remote/inference/nvidia/models.py | 2 +- .../remote/inference/nvidia/nvidia.py | 4 +- llama_stack/templates/bedrock/bedrock.py | 19 ++--- llama_stack/templates/bedrock/doc_template.md | 2 +- llama_stack/templates/bedrock/run.yaml | 15 ++++ llama_stack/templates/cerebras/cerebras.py | 20 ++---- .../templates/cerebras/doc_template.md | 2 +- llama_stack/templates/cerebras/run.yaml | 10 +++ llama_stack/templates/ci-tests/ci_tests.py | 19 ++--- llama_stack/templates/ci-tests/run.yaml | 61 ++++++++++++++++ llama_stack/templates/dell/dell.py | 3 - llama_stack/templates/dev/dev.py | 24 ++----- llama_stack/templates/dev/run.yaml | 72 ++++++++++++++++++- .../templates/fireworks/doc_template.md | 2 +- llama_stack/templates/fireworks/fireworks.py | 21 ++---- .../templates/fireworks/run-with-safety.yaml | 50 +++++++++++++ llama_stack/templates/fireworks/run.yaml | 50 +++++++++++++ llama_stack/templates/groq/doc_template.md | 2 +- llama_stack/templates/groq/groq.py | 21 ++---- llama_stack/templates/groq/run.yaml | 22 +++++- .../templates/hf-endpoint/hf_endpoint.py | 1 - .../templates/hf-serverless/hf_serverless.py | 1 - .../meta-reference-gpu/meta_reference.py | 1 - .../meta_reference.py | 1 - llama_stack/templates/nvidia/doc_template.md | 2 +- llama_stack/templates/nvidia/nvidia.py | 24 +++---- llama_stack/templates/nvidia/run.yaml | 45 ++++++++++++ llama_stack/templates/ollama/ollama.py | 1 - llama_stack/templates/remote-vllm/vllm.py | 1 - .../templates/sambanova/doc_template.md | 2 +- llama_stack/templates/sambanova/run.yaml | 45 ++++++++++++ llama_stack/templates/sambanova/sambanova.py | 20 ++---- llama_stack/templates/template.py | 47 +++++++++++- llama_stack/templates/tgi/tgi.py | 1 - .../templates/together/doc_template.md | 2 +- .../templates/together/run-with-safety.yaml | 45 ++++++++++++ llama_stack/templates/together/run.yaml | 45 ++++++++++++ llama_stack/templates/together/together.py | 21 ++---- llama_stack/templates/vllm-gpu/vllm.py | 1 - 49 files changed, 637 insertions(+), 217 deletions(-) diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 20a10ba4d..efa0a2d74 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -27,19 +27,19 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)` -- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)` -- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)` -- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)` -- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)` -- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)` -- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)` +- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)` +- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)` +- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `nvidia/llama-3.2-nv-embedqa-1b-v2 ` +- `nvidia/nv-embedqa-e5-v5 ` +- `nvidia/nv-embedqa-mistral-7b-v2 ` +- `snowflake/arctic-embed-l ` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 14f004926..623ab6848 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -34,9 +34,9 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)` -- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)` +- `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index 6e2af14fd..8f14ae7cc 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -27,8 +27,8 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)` -- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)` +- `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index f69e6d963..1fcd6f7af 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -37,17 +37,17 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)` -- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)` -- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)` -- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)` -- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)` +- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)` +- `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `nomic-ai/nomic-embed-text-v1.5 ` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md index 9fb7b2619..ce3f8aecc 100644 --- a/docs/source/distributions/self_hosted_distro/groq.md +++ b/docs/source/distributions/self_hosted_distro/groq.md @@ -37,11 +37,11 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)` -- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)` -- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)` -- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)` -- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)` +- `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `groq/llama-3.1-8b-instant ` +- `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)` +- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index e6ac616be..a7f738261 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -34,15 +34,15 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` -- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` -- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)` +- `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 7af0dcf4d..f361e93c7 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -37,17 +37,17 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct` -- `meta-llama/Llama-3.1-70B-Instruct` -- `meta-llama/Llama-3.1-405B-Instruct-FP8` -- `meta-llama/Llama-3.2-3B-Instruct` -- `meta-llama/Llama-3.2-11B-Vision-Instruct` -- `meta-llama/Llama-3.2-90B-Vision-Instruct` -- `meta-llama/Llama-3.3-70B-Instruct` -- `meta-llama/Llama-Guard-3-8B` -- `meta-llama/Llama-Guard-3-11B-Vision` -- `togethercomputer/m2-bert-80M-8k-retrieval` -- `togethercomputer/m2-bert-80M-32k-retrieval` +- `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` +- `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `togethercomputer/m2-bert-80M-8k-retrieval ` +- `togethercomputer/m2-bert-80M-32k-retrieval ` ### Prerequisite: API Keys diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 4deeea630..748c5237a 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import CerebrasImplConfig -from .models import model_entries +from .models import MODEL_ENTRIES class CerebrasInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: CerebrasImplConfig) -> None: ModelRegistryHelper.__init__( self, - model_entries=model_entries, + model_entries=MODEL_ENTRIES, ) self.config = config diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py index a48864d49..37419bf4c 100644 --- a/llama_stack/providers/remote/inference/cerebras/models.py +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -model_entries = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "llama3.1-8b", CoreModelId.llama3_1_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py index 4364edffa..08b9b4dc4 100644 --- a/llama_stack/providers/remote/inference/groq/models.py +++ b/llama_stack/providers/remote/inference/groq/models.py @@ -5,10 +5,13 @@ # the root directory of this source tree. from llama_stack.models.llama.sku_list import CoreModelId -from llama_stack.providers.utils.inference.model_registry import build_model_entry +from llama_stack.providers.utils.inference.model_registry import ( + build_hf_repo_model_entry, + build_model_entry, +) MODEL_ENTRIES = [ - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), @@ -16,11 +19,11 @@ MODEL_ENTRIES = [ "groq/llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -28,7 +31,7 @@ MODEL_ENTRIES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index a855566bc..879855003 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -_MODEL_ENTRIES = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "meta/llama3-8b-instruct", CoreModelId.llama3_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index cc3bd85bb..2d93bb445 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.prompt_adapter import content_has_media from . import NVIDIAConfig -from .models import _MODEL_ENTRIES +from .models import MODEL_ENTRIES from .openai_utils import ( convert_chat_completion_request, convert_completion_request, @@ -62,7 +62,7 @@ logger = logging.getLogger(__name__) class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): def __init__(self, config: NVIDIAConfig) -> None: # TODO(mf): filter by available models - ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) + ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 628e78612..18e287390 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -6,12 +6,10 @@ from pathlib import Path -from llama_stack.apis.models import ModelInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate: config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} + available_models = { + "bedrock": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id="bedrock", - ) - for m in MODEL_ENTRIES - ] default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/bedrock/doc_template.md b/llama_stack/templates/bedrock/doc_template.md index 357638ea5..24106525a 100644 --- a/llama_stack/templates/bedrock/doc_template.md +++ b/llama_stack/templates/bedrock/doc_template.md @@ -28,7 +28,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 7d03b7c29..00a02e0d5 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -88,16 +88,31 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db models: +- metadata: {} + model_id: meta.llama3-1-8b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-8b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: bedrock provider_model_id: meta.llama3-1-8b-instruct-v1:0 model_type: llm +- metadata: {} + model_id: meta.llama3-1-70b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-70b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: bedrock provider_model_id: meta.llama3-1-70b-instruct-v1:0 model_type: llm +- metadata: {} + model_id: meta.llama3-1-405b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-405b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: bedrock diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 544a50c03..bda22a498 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -8,14 +8,13 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig -from llama_stack.providers.remote.inference.cerebras.models import model_entries -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate: config=SentenceTransformersInferenceConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id="cerebras", - ) - for m in model_entries - ] + available_models = { + "cerebras": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/cerebras/doc_template.md b/llama_stack/templates/cerebras/doc_template.md index 77fc6f478..3f5645958 100644 --- a/llama_stack/templates/cerebras/doc_template.md +++ b/llama_stack/templates/cerebras/doc_template.md @@ -20,7 +20,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 6afff2be2..43d3158ba 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -90,11 +90,21 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db models: +- metadata: {} + model_id: llama3.1-8b + provider_id: cerebras + provider_model_id: llama3.1-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: cerebras provider_model_id: llama3.1-8b model_type: llm +- metadata: {} + model_id: llama-3.3-70b + provider_id: cerebras + provider_model_id: llama-3.3-70b + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: cerebras diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index a93cfff9c..979256fa1 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_id="fireworks", - model_type=m.model_type, - metadata=m.metadata, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "fireworks": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=default_models + [embedding_model], + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 295d72e71..3a973cabf 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -90,51 +90,112 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 model_id: nomic-ai/nomic-embed-text-v1.5 provider_id: fireworks + provider_model_id: nomic-ai/nomic-embed-text-v1.5 model_type: embedding - metadata: embedding_dimension: 384 diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index 8348beafd..52c5a5476 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -3,7 +3,6 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from pathlib import Path from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ( @@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate: distro_type="self_hosted", description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container", container_image=None, - template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, embedding_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/dev/dev.py index fe80c3842..694913119 100644 --- a/llama_stack/templates/dev/dev.py +++ b/llama_stack/templates/dev/dev.py @@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: @@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: ), ] inference_providers = [] - default_models = [] - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} + available_models = {} for provider_id, model_entries, config in providers: inference_providers.append( Provider( @@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: config=config, ) ) - default_models.extend( - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id=provider_id, - model_type=m.model_type, - metadata=m.metadata, - ) - for m in model_entries - ) - return inference_providers, default_models + available_models[provider_id] = model_entries + return inference_providers, available_models def get_distribution_template() -> DistributionTemplate: - inference_providers, default_models = get_inference_providers() + inference_providers, available_models = get_inference_providers() providers = { "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], @@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate: }, ) + default_models = get_model_registry(available_models) return DistributionTemplate( name=name, distro_type="self_hosted", @@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[], + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index 0ada465e4..f1d72d572 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -136,51 +136,101 @@ models: provider_id: openai provider_model_id: openai/text-embedding-3-large model_type: embedding +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks @@ -247,25 +297,45 @@ models: provider_model_id: gemini/text-embedding-004 model_type: embedding - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/llama3-8b-8192 provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: groq diff --git a/llama_stack/templates/fireworks/doc_template.md b/llama_stack/templates/fireworks/doc_template.md index 48677d571..6c7743cb8 100644 --- a/llama_stack/templates/fireworks/doc_template.md +++ b/llama_stack/templates/fireworks/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index c78664dde..0111bc118 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate: config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="fireworks", - metadata=m.metadata, - model_type=m.model_type, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "fireworks": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) + embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 6f622c7d9..0fe5f3026 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -99,51 +99,101 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index e6d21d10d..cbe85c4f7 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -93,51 +93,101 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks diff --git a/llama_stack/templates/groq/doc_template.md b/llama_stack/templates/groq/doc_template.md index 3f9ccbd16..85b916ccd 100644 --- a/llama_stack/templates/groq/doc_template.md +++ b/llama_stack/templates/groq/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index b0c7a3804..71c504cde 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import ( Provider, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.remote.inference.groq import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate: }, ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id=name, - model_type=m.model_type, - metadata=m.metadata, - ) - for m in MODEL_ENTRIES - ] - + available_models = { + "groq": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate: docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 220aa847b..78212c8d9 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -91,25 +91,45 @@ metadata_store: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db models: - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/llama3-8b-8192 provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: groq diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 62584929c..f2849f0bc 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index af04e39d4..cea1075e2 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 9bff981d1..3c38e0edd 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use Meta Reference for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index fca15fcc5..32476f37f 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use Meta Reference with fp8, int4 quantization for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md index 9d9006a27..71b8ac32f 100644 --- a/llama_stack/templates/nvidia/doc_template.md +++ b/llama_stack/templates/nvidia/doc_template.md @@ -20,7 +20,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index 56d13a09a..cc5e96333 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -6,11 +6,10 @@ from pathlib import Path -from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models +from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig -from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate: config=NVIDIAConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="nvidia", - model_type=m.model_type, - metadata=m.metadata, - ) - for m in _MODEL_ENTRIES - ] + available_models = { + "nvidia": MODEL_ENTRIES, + } default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate: ), ] + default_models = get_model_registry(available_models) return DistributionTemplate( name="nvidia", distro_type="remote_hosted", @@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index bfbad749a..52e78df7b 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -90,46 +90,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db models: +- metadata: {} + model_id: meta/llama3-8b-instruct + provider_id: nvidia + provider_model_id: meta/llama3-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-8B-Instruct provider_id: nvidia provider_model_id: meta/llama3-8b-instruct model_type: llm +- metadata: {} + model_id: meta/llama3-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: nvidia provider_model_id: meta/llama3-70b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-8b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.1-8b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.1-70b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-405b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: nvidia provider_model_id: meta/llama-3.1-405b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-1b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-1b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-3b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-3b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-11b-vision-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-90b-vision-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: nvidia diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index ba3cfe684..83c7b1a63 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 10d291456..73ee36c3f 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use (an external) vLLM server for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md index 4b18aa756..b2a295716 100644 --- a/llama_stack/templates/sambanova/doc_template.md +++ b/llama_stack/templates/sambanova/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 26815dcd0..124d11baf 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -68,46 +68,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db models: +- metadata: {} + model_id: Meta-Llama-3.1-8B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-8B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.1-8B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-70B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-70B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.1-70B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-405B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-405B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: sambanova provider_model_id: Meta-Llama-3.1-405B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-1B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.2-1B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.2-1B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-3B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.2-3B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.2-3B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.3-70B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.3-70B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.3-70B-Instruct model_type: llm +- metadata: {} + model_id: Llama-3.2-11B-Vision-Instruct + provider_id: sambanova + provider_model_id: Llama-3.2-11B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: sambanova provider_model_id: Llama-3.2-11B-Vision-Instruct model_type: llm +- metadata: {} + model_id: Llama-3.2-90B-Vision-Instruct + provider_id: sambanova + provider_model_id: Llama-3.2-90B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: sambanova provider_model_id: Llama-3.2-90B-Vision-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-Guard-3-8B + provider_id: sambanova + provider_model_id: Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: sambanova diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 725c6abc4..0a0b6bd7e 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -7,15 +7,13 @@ from pathlib import Path from llama_stack.distribution.datatypes import ( - ModelInput, Provider, ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate: config=SambaNovaImplConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id=name, - ) - for m in MODEL_ENTRIES - ] - + available_models = { + name: MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index cb5b07be3..2afb84a63 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import ( ) from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]: + models = [] + for provider_id, entries in available_models.items(): + for entry in entries: + ids = [entry.provider_model_id] + entry.aliases + for model_id in ids: + models.append( + ModelInput( + model_id=model_id, + provider_model_id=entry.provider_model_id, + provider_id=provider_id, + model_type=entry.model_type, + metadata=entry.metadata, + ) + ) + return models + + +class DefaultModel(BaseModel): + model_id: str + doc_string: str + + class RunConfigSettings(BaseModel): provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict) default_models: Optional[List[ModelInput]] = None @@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel): run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None container_image: Optional[str] = None - default_models: Optional[List[ModelInput]] = None + available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None def build_config(self) -> BuildConfig: return BuildConfig( @@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel): autoescape=True, ) template = env.from_string(template) + + default_models = [] + if self.available_models_by_provider: + has_multiple_providers = len(self.available_models_by_provider.keys()) > 1 + for provider_id, model_entries in self.available_models_by_provider.items(): + for model_entry in model_entries: + doc_parts = [] + if model_entry.aliases: + doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}") + if has_multiple_providers: + doc_parts.append(f"provider: {provider_id}") + + default_models.append( + DefaultModel( + model_id=model_entry.provider_model_id, + doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "", + ) + ) + return template.render( name=self.name, description=self.description, providers=self.providers, providers_table=providers_table, run_config_env_vars=self.run_config_env_vars, - default_models=self.default_models, + default_models=default_models, ) def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None: diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 9b80414f9..eb49871a0 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/together/doc_template.md b/llama_stack/templates/together/doc_template.md index 405d68f91..be055a43e 100644 --- a/llama_stack/templates/together/doc_template.md +++ b/llama_stack/templates/together/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }}` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 9193a3ef6..26d879802 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -99,46 +99,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: together diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 32ddf7b16..0969cfe56 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -93,46 +93,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: together diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 8d0e2353c..24c395e1e 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate: provider_type="inline::sentence-transformers", config=SentenceTransformersInferenceConfig.sample_run_config(), ) - - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="together", - metadata=m.metadata, - model_type=m.model_type, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "together": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 8cdec589e..27a16b93d 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ From 264c2c46dbb5d2489f770125ffdafb1dc85e756b Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Thu, 27 Feb 2025 19:42:55 -0500 Subject: [PATCH 04/13] build: Add dotenv file for running tests with uv (#1251) This will be useful for testing instead of having to manually pass them every time. --------- Signed-off-by: Yuan Tang --- CONTRIBUTING.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1e4a88f13..ef73dadc3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -70,6 +70,19 @@ $ uv pip install -e . $ source .venv/bin/activate ``` +Note that you can create a dotenv file `.env` that includes necessary environment variables: +``` +LLAMA_STACK_BASE_URL=http://localhost:8321 +LLAMA_STACK_CLIENT_LOG=debug +LLAMA_STACK_PORT=8321 +LLAMA_STACK_CONFIG= +``` + +And then use this dotenv file when running client SDK tests via the following: +```bash +$ uv run --env-file .env -- pytest -v tests/client-sdk/inference/test_text_inference.py +``` + ## Pre-commit Hooks We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running: From c2d2a80b0ae9caefb77fd78f9799493e06f9f5b5 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Fri, 28 Feb 2025 08:46:38 +0800 Subject: [PATCH 05/13] docs: update the output of llama-stack-client models list (#1271) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- .../self_hosted_distro/ollama.md | 20 +++++++++++-------- docs/source/getting_started/index.md | 16 ++++++++++----- .../llama_stack_client_cli_reference.md | 14 ++++++++----- llama_stack/templates/ollama/doc_template.md | 20 +++++++++++-------- 4 files changed, 44 insertions(+), 26 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index 80d84b402..8f23cef43 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -141,17 +141,21 @@ ollama run To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama. ``` $ ollama ps - -NAME ID SIZE PROCESSOR UNTIL -llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now +NAME ID SIZE PROCESSOR UNTIL +llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now ``` To verify that the model served by ollama is correctly connected to Llama Stack server ```bash $ llama-stack-client models list -+----------------------+----------------------+---------------+-----------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+===============================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} | -+----------------------+----------------------+---------------+-----------------------------------------------+ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index f017a9723..ecef20d55 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -102,12 +102,18 @@ Let's use the `llama-stack-client` CLI to check the connectivity to the server. $ llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT > Enter the API key (leave empty if no key is needed): Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321 + $ llama-stack-client models list -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ -┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ -│ meta-llama/Llama-3.2-3B-Instruct │ ollama │ llama3.2:3b-instruct-fp16 │ │ -└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` You can test basic Llama inference completion using the CLI too. diff --git a/docs/source/references/llama_stack_client_cli_reference.md b/docs/source/references/llama_stack_client_cli_reference.md index bf99f2b57..26b81cf92 100644 --- a/docs/source/references/llama_stack_client_cli_reference.md +++ b/docs/source/references/llama_stack_client_cli_reference.md @@ -58,11 +58,15 @@ llama-stack-client providers list llama-stack-client models list ``` ``` -+----------------------+----------------------+---------------+----------------------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+==========================================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | tgi0 | {'huggingface_repo': 'meta-llama/Llama-3.1-8B-Instruct'} | -+----------------------+----------------------+---------------+----------------------------------------------------------+ +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` ### `llama-stack-client models get` diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md index 1d95e4b65..e5444d3da 100644 --- a/llama_stack/templates/ollama/doc_template.md +++ b/llama_stack/templates/ollama/doc_template.md @@ -130,17 +130,21 @@ ollama run To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama. ``` $ ollama ps - -NAME ID SIZE PROCESSOR UNTIL -llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now +NAME ID SIZE PROCESSOR UNTIL +llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now ``` To verify that the model served by ollama is correctly connected to Llama Stack server ```bash $ llama-stack-client models list -+----------------------+----------------------+---------------+-----------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+===============================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} | -+----------------------+----------------------+---------------+-----------------------------------------------+ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` From 73c6f6126f969e630c4d5f0a305308e53dc13183 Mon Sep 17 00:00:00 2001 From: Luis Tomas Bolivar Date: Fri, 28 Feb 2025 01:47:26 +0100 Subject: [PATCH 06/13] fix: Avoid unexpected keyword argument for sentence_transformers (#1269) Now that remote-vllm include inline::sentence_transformers there is an issue building the image: Error building stack: SentenceTransformersInferenceConfig.sample_run_config() got an unexpected keyword argument '__distro_dir__' To avoid that issue this fix extends the sample_run_config to accept extra kwargs --- .../providers/inline/inference/sentence_transformers/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/inline/inference/sentence_transformers/config.py b/llama_stack/providers/inline/inference/sentence_transformers/config.py index 232e4bf32..93e0afe11 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/config.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/config.py @@ -11,5 +11,5 @@ from pydantic import BaseModel class SentenceTransformersInferenceConfig(BaseModel): @classmethod - def sample_run_config(cls) -> Dict[str, Any]: + def sample_run_config(cls, **kwargs) -> Dict[str, Any]: return {} From e28cedd83331a822332235f02c634a8c6660e034 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Thu, 27 Feb 2025 18:58:11 -0600 Subject: [PATCH 07/13] feat: add nvidia embedding implementation for new signature, task_type, output_dimention, text_truncation (#1213) # What does this PR do? updates nvidia inference provider's embedding implementation to use new signature add support for task_type, output_dimensions, text_truncation parameters ## Test Plan `LLAMA_STACK_BASE_URL=http://localhost:8321 pytest -v tests/client-sdk/inference/test_embedding.py --embedding-model baai/bge-m3` --- .../remote/inference/nvidia/nvidia.py | 41 ++++-- tests/client-sdk/inference/test_embedding.py | 134 +++++++++++++++++- 2 files changed, 161 insertions(+), 14 deletions(-) diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 2d93bb445..2ca7dd578 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -8,7 +8,7 @@ import logging import warnings from typing import AsyncIterator, List, Optional, Union -from openai import APIConnectionError, AsyncOpenAI +from openai import APIConnectionError, AsyncOpenAI, BadRequestError from llama_stack.apis.common.content_types import ( InterleavedContent, @@ -144,19 +144,38 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): # # we can ignore str and always pass List[str] to OpenAI # - flat_contents = [ - item.text if isinstance(item, TextContentItem) else item - for content in contents - for item in (content if isinstance(content, list) else [content]) - ] + flat_contents = [content.text if isinstance(content, TextContentItem) else content for content in contents] input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents] model = self.get_provider_model_id(model_id) - response = await self._client.embeddings.create( - model=model, - input=input, - # extra_body={"input_type": "passage"|"query"}, # TODO(mf): how to tell caller's intent? - ) + extra_body = {} + + if text_truncation is not None: + text_truncation_options = { + TextTruncation.none: "NONE", + TextTruncation.end: "END", + TextTruncation.start: "START", + } + extra_body["truncate"] = text_truncation_options[text_truncation] + + if output_dimension is not None: + extra_body["dimensions"] = output_dimension + + if task_type is not None: + task_type_options = { + EmbeddingTaskType.document: "passage", + EmbeddingTaskType.query: "query", + } + extra_body["input_type"] = task_type_options[task_type] + + try: + response = await self._client.embeddings.create( + model=model, + input=input, + extra_body=extra_body, + ) + except BadRequestError as e: + raise ValueError(f"Failed to get embeddings: {e}") from e # # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=List[float], ...)], ...) diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py index 3304406a9..46a901d62 100644 --- a/tests/client-sdk/inference/test_embedding.py +++ b/tests/client-sdk/inference/test_embedding.py @@ -14,6 +14,23 @@ # - array of a text (TextContentItem) # Types of output: # - list of list of floats +# Params: +# - text_truncation +# - absent w/ long text -> error +# - none w/ long text -> error +# - absent w/ short text -> ok +# - none w/ short text -> ok +# - end w/ long text -> ok +# - end w/ short text -> ok +# - start w/ long text -> ok +# - start w/ short text -> ok +# - output_dimension +# - response dimension matches +# - task_type, only for asymmetric models +# - query embedding != passage embedding +# Negative: +# - long string +# - long text # # Todo: # - negative tests @@ -23,8 +40,6 @@ # - empty text # - empty image # - long -# - long string -# - long text # - large image # - appropriate combinations # - batch size @@ -40,6 +55,7 @@ # import pytest +from llama_stack_client import BadRequestError from llama_stack_client.types import EmbeddingsResponse from llama_stack_client.types.shared.interleaved_content import ( ImageContentItem, @@ -50,8 +66,10 @@ from llama_stack_client.types.shared.interleaved_content import ( DUMMY_STRING = "hello" DUMMY_STRING2 = "world" +DUMMY_LONG_STRING = "NVDA " * 10240 DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text") DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text") +DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text") # TODO(mf): add a real image URL and base64 string DUMMY_IMAGE_URL = ImageContentItem( image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image" @@ -89,10 +107,120 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents): "list[url,string,base64,text]", ], ) -@pytest.mark.skip(reason="Media is not supported") +@pytest.mark.xfail(reason="Media is not supported") def test_embedding_image(llama_stack_client, embedding_model_id, contents): response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert isinstance(response.embeddings[0], list) assert isinstance(response.embeddings[0][0], float) + + +@pytest.mark.parametrize( + "text_truncation", + [ + "end", + "start", + ], +) +@pytest.mark.parametrize( + "contents", + [ + [DUMMY_LONG_TEXT], + [DUMMY_STRING], + ], + ids=[ + "long", + "short", + ], +) +def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents): + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=contents, text_truncation=text_truncation + ) + assert isinstance(response, EmbeddingsResponse) + assert len(response.embeddings) == 1 + assert isinstance(response.embeddings[0], list) + assert isinstance(response.embeddings[0][0], float) + + +@pytest.mark.parametrize( + "text_truncation", + [ + None, + "none", + ], +) +@pytest.mark.parametrize( + "contents", + [ + [DUMMY_LONG_TEXT], + [DUMMY_LONG_STRING], + ], + ids=[ + "long-text", + "long-str", + ], +) +def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents): + with pytest.raises(BadRequestError) as excinfo: + llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation + ) + + +@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction") +def test_embedding_output_dimension(llama_stack_client, embedding_model_id): + base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING]) + test_response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32 + ) + assert len(base_response.embeddings[0]) != len(test_response.embeddings[0]) + assert len(test_response.embeddings[0]) == 32 + + +@pytest.mark.xfail(reason="Only valid for model supporting task type") +def test_embedding_task_type(llama_stack_client, embedding_model_id): + query_embedding = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query" + ) + document_embedding = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document" + ) + assert query_embedding.embeddings != document_embedding.embeddings + + +@pytest.mark.parametrize( + "text_truncation", + [ + None, + "none", + "end", + "start", + ], +) +def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation): + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + ) + assert isinstance(response, EmbeddingsResponse) + assert len(response.embeddings) == 1 + assert isinstance(response.embeddings[0], list) + assert isinstance(response.embeddings[0][0], float) + + +@pytest.mark.parametrize( + "text_truncation", + [ + "NONE", + "END", + "START", + "left", + "right", + ], +) +def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation): + with pytest.raises(BadRequestError) as excinfo: + llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + ) From 94e2186bb838d7e2ec540f5be8288b9daa8de769 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Fri, 28 Feb 2025 09:00:27 +0800 Subject: [PATCH 08/13] chore: add subcommands description in help (#1219) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] ``` before: $ llama usage: llama [-h] {model,stack,download,verify-download} ... Welcome to the Llama CLI options: -h, --help show this help message and exit subcommands: {model,stack,download,verify-download} $ llama model --help usage: llama model [-h] {download,list,prompt-format,describe,verify-download,remove} ... Work with llama models options: -h, --help show this help message and exit model_subcommands: {download,list,prompt-format,describe,verify-download,remove} $ llama stack --help usage: llama stack [-h] [--version] {build,list-apis,list-providers,run} ... Operations for the Llama Stack / Distributions options: -h, --help show this help message and exit --version show program's version number and exit stack_subcommands: {build,list-apis,list-providers,run} =================== after: $ llama usage: llama [-h] {model,stack,download,verify-download} ... Welcome to the Llama CLI options: -h, --help show this help message and exit subcommands: {model,stack,download,verify-download} model Work with llama models stack Operations for the Llama Stack / Distributions download Download a model from llama.meta.com or Hugging Face Hub verify-download Verify integrity of downloaded model files $ llama model --help usage: llama model [-h] {download,list,prompt-format,describe,verify-download,remove} ... Work with llama models options: -h, --help show this help message and exit model_subcommands: {download,list,prompt-format,describe,verify-download,remove} download Download a model from llama.meta.com or Hugging Face Hub list Show available llama models prompt-format Show llama model message formats describe Show details about a llama model verify-download Verify the downloaded checkpoints' checksums for models downloaded from Meta remove Remove the downloaded llama model $ llama stack --help usage: llama stack [-h] [--version] {build,list-apis,list-providers,run} ... Operations for the Llama Stack / Distributions options: -h, --help show this help message and exit --version show program's version number and exit stack_subcommands: {build,list-apis,list-providers,run} build Build a Llama stack container list-apis List APIs part of the Llama Stack implementation list-providers Show available Llama Stack Providers for an API run Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. ``` [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) --------- Signed-off-by: reidliu Co-authored-by: reidliu --- llama_stack/cli/llama.py | 4 ++++ llama_stack/cli/model/model.py | 4 ++++ llama_stack/cli/stack/stack.py | 4 ++++ llama_stack/cli/stack/utils.py | 14 ++++++++++++++ 4 files changed, 26 insertions(+) create mode 100644 llama_stack/cli/stack/utils.py diff --git a/llama_stack/cli/llama.py b/llama_stack/cli/llama.py index f0466facd..fb9eae236 100644 --- a/llama_stack/cli/llama.py +++ b/llama_stack/cli/llama.py @@ -9,6 +9,7 @@ import argparse from .download import Download from .model import ModelParser from .stack import StackParser +from .utils import print_subcommand_description from .verify_download import VerifyDownload @@ -20,6 +21,7 @@ class LlamaCLIParser: prog="llama", description="Welcome to the Llama CLI", add_help=True, + formatter_class=argparse.RawTextHelpFormatter, ) # Default command is to print help @@ -33,6 +35,8 @@ class LlamaCLIParser: Download.create(subparsers) VerifyDownload.create(subparsers) + print_subcommand_description(self.parser, subparsers) + def parse_args(self) -> argparse.Namespace: return self.parser.parse_args() diff --git a/llama_stack/cli/model/model.py b/llama_stack/cli/model/model.py index 2f4065b83..ec1fc8cf1 100644 --- a/llama_stack/cli/model/model.py +++ b/llama_stack/cli/model/model.py @@ -13,6 +13,7 @@ from llama_stack.cli.model.prompt_format import ModelPromptFormat from llama_stack.cli.model.remove import ModelRemove from llama_stack.cli.model.verify_download import ModelVerifyDownload from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.utils import print_subcommand_description class ModelParser(Subcommand): @@ -24,6 +25,7 @@ class ModelParser(Subcommand): "model", prog="llama model", description="Work with llama models", + formatter_class=argparse.RawTextHelpFormatter, ) self.parser.set_defaults(func=lambda args: self.parser.print_help()) @@ -37,3 +39,5 @@ class ModelParser(Subcommand): ModelDescribe.create(subparsers) ModelVerifyDownload.create(subparsers) ModelRemove.create(subparsers) + + print_subcommand_description(self.parser, subparsers) diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py index 431f7b98e..7b6215ef4 100644 --- a/llama_stack/cli/stack/stack.py +++ b/llama_stack/cli/stack/stack.py @@ -8,6 +8,7 @@ import argparse from importlib.metadata import version from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.utils import print_subcommand_description from .build import StackBuild from .list_apis import StackListApis @@ -22,6 +23,7 @@ class StackParser(Subcommand): "stack", prog="llama stack", description="Operations for the Llama Stack / Distributions", + formatter_class=argparse.RawTextHelpFormatter, ) self.parser.add_argument( @@ -39,3 +41,5 @@ class StackParser(Subcommand): StackListApis.create(subparsers) StackListProviders.create(subparsers) StackRun.create(subparsers) + + print_subcommand_description(self.parser, subparsers) diff --git a/llama_stack/cli/stack/utils.py b/llama_stack/cli/stack/utils.py new file mode 100644 index 000000000..1e83a5cc8 --- /dev/null +++ b/llama_stack/cli/stack/utils.py @@ -0,0 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +def print_subcommand_description(parser, subparsers): + """Print descriptions of subcommands.""" + description_text = "" + for name, subcommand in subparsers.choices.items(): + description = subcommand.description + description_text += f" {name:<21} {description}\n" + parser.epilog = description_text From 2f7683bc5fc33192fe34533d47d47328ff522fee Mon Sep 17 00:00:00 2001 From: Hardik Shah Date: Thu, 27 Feb 2025 17:31:53 -0800 Subject: [PATCH 09/13] fix: Structured outputs for recursive models (#1311) Handle recursive nature in the structured response_formats. Update test to include 1 nested model. ``` LLAMA_STACK_CONFIG=dev pytest -s -v tests/client-sdk/inference/test_text_inference.py --inference-model "openai/gpt-4o-mini" -k test_text_chat_completion_structured_output ``` --------- Co-authored-by: Ashwin Bharambe --- .../test_cases/inference/chat_completion.json | 3 +- .../utils/inference/litellm_openai_mixin.py | 35 +++++++++++++++++++ .../inference/test_text_inference.py | 10 ++++-- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/tests/test_cases/inference/chat_completion.json b/llama_stack/providers/tests/test_cases/inference/chat_completion.json index 50f6b1c15..dcc767e4e 100644 --- a/llama_stack/providers/tests/test_cases/inference/chat_completion.json +++ b/llama_stack/providers/tests/test_cases/inference/chat_completion.json @@ -111,7 +111,8 @@ "first_name": "Michael", "last_name": "Jordan", "year_of_birth": 1963, - "num_seasons_in_nba": 15 + "num_seasons_in_nba": 15, + "year_for_draft": 1984 } } }, diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index ecb6961da..ddf7f193f 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -126,6 +126,37 @@ class LiteLLMOpenAIMixin( ): yield chunk + def _add_additional_properties_recursive(self, schema): + """ + Recursively add additionalProperties: False to all object schemas + """ + if isinstance(schema, dict): + if schema.get("type") == "object": + schema["additionalProperties"] = False + + # Add required field with all property keys if properties exist + if "properties" in schema and schema["properties"]: + schema["required"] = list(schema["properties"].keys()) + + if "properties" in schema: + for prop_schema in schema["properties"].values(): + self._add_additional_properties_recursive(prop_schema) + + for key in ["anyOf", "allOf", "oneOf"]: + if key in schema: + for sub_schema in schema[key]: + self._add_additional_properties_recursive(sub_schema) + + if "not" in schema: + self._add_additional_properties_recursive(schema["not"]) + + # Handle $defs/$ref + if "$defs" in schema: + for def_schema in schema["$defs"].values(): + self._add_additional_properties_recursive(def_schema) + + return schema + async def _get_params(self, request: ChatCompletionRequest) -> dict: input_dict = {} @@ -140,6 +171,10 @@ class LiteLLMOpenAIMixin( name = fmt["title"] del fmt["title"] fmt["additionalProperties"] = False + + # Apply additionalProperties: False recursively to all objects + fmt = self._add_additional_properties_recursive(fmt) + input_dict["response_format"] = { "type": "json_schema", "json_schema": { diff --git a/tests/client-sdk/inference/test_text_inference.py b/tests/client-sdk/inference/test_text_inference.py index 577d995ad..7850d2d57 100644 --- a/tests/client-sdk/inference/test_text_inference.py +++ b/tests/client-sdk/inference/test_text_inference.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. + import pytest from pydantic import BaseModel @@ -342,11 +343,15 @@ def test_text_chat_completion_with_tool_choice_none(client_with_models, text_mod ], ) def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case): + class NBAStats(BaseModel): + year_for_draft: int + num_seasons_in_nba: int + class AnswerFormat(BaseModel): first_name: str last_name: str year_of_birth: int - num_seasons_in_nba: int + nba_stats: NBAStats tc = TestCase(test_case) @@ -364,7 +369,8 @@ def test_text_chat_completion_structured_output(client_with_models, text_model_i assert answer.first_name == expected["first_name"] assert answer.last_name == expected["last_name"] assert answer.year_of_birth == expected["year_of_birth"] - assert answer.num_seasons_in_nba == expected["num_seasons_in_nba"] + assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"] + assert answer.nba_stats.year_for_draft == expected["year_for_draft"] @pytest.mark.parametrize("streaming", [True, False]) From 076d2f349da3d7b227f4fae4e32c0e69d58f4e1d Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 27 Feb 2025 18:00:27 -0800 Subject: [PATCH 10/13] fix: litellm tool call parsing event type to in_progress (#1312) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? - Test with script: https://gist.github.com/yanxi0830/64699f3604766ac2319421b750c5bf9c - Agent with tool calls does not get correctly parsed with LiteLLM provider b/c we skip processing `ChatCompletionResponseEventType.complete`. - However, LiteLLM spits out event_type="complete" with ToolCallDelta https://github.com/meta-llama/llama-stack/blob/2f7683bc5fc33192fe34533d47d47328ff522fee/llama_stack/providers/inline/agents/meta_reference/agent_instance.py#L570-L577 - Llama Model ``` ChatCompletionResponseStreamChunk( │ event=Event( │ │ delta=ToolCallDelta( │ │ │ parse_status='succeeded', │ │ │ tool_call=ToolCall( │ │ │ │ arguments={'kind': 'pod', 'namespace': 'openshift-lightspeed'}, │ │ │ │ call_id='call_tIjWTUdsQXhQ2XHC5ke4EQY5', │ │ │ │ tool_name='get_object_namespace_list' │ │ │ ), │ │ │ type='tool_call' │ │ ), │ │ event_type='progress', │ │ logprobs=None, │ │ stop_reason='end_of_turn' │ ), │ metrics=None ) ChatCompletionResponseStreamChunk( │ event=Event( │ │ delta=TextDelta(text='', type='text'), │ │ event_type='complete', │ │ logprobs=None, │ │ stop_reason='end_of_turn' │ ), │ metrics=None ) ``` - LiteLLM model ``` ChatCompletionResponseStreamChunk( │ event=Event( │ │ delta=ToolCallDelta( │ │ │ parse_status='succeeded', │ │ │ tool_call=ToolCall( │ │ │ │ arguments={'kind': 'pod', 'namespace': 'openshift-lightspeed'}, │ │ │ │ call_id='call_tIjWTUdsQXhQ2XHC5ke4EQY5', │ │ │ │ tool_name='get_object_namespace_list' │ │ │ ), │ │ │ type='tool_call' │ │ ), │ │ event_type='complete', │ │ logprobs=None, │ │ stop_reason='end_of_turn' │ ), │ metrics=None ) ChatCompletionResponseStreamChunk( │ event=Event( │ │ delta=TextDelta(text='', type='text'), │ │ event_type='complete', │ │ logprobs=None, │ │ stop_reason='end_of_turn' │ ), │ metrics=None ) ``` [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - Test with script: https://gist.github.com/yanxi0830/64699f3604766ac2319421b750c5bf9c [//]: # (## Documentation) --- .../providers/utils/inference/openai_compat.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 1309e72a6..eaf5ad2e1 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -27,7 +27,9 @@ from openai.types.chat import ( from openai.types.chat import ( ChatCompletionMessageParam as OpenAIChatCompletionMessage, ) -from openai.types.chat import ChatCompletionMessageToolCall +from openai.types.chat import ( + ChatCompletionMessageToolCall, +) from openai.types.chat import ( ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall, ) @@ -199,7 +201,9 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: Optional[Unio return None -def process_completion_response(response: OpenAICompatCompletionResponse) -> CompletionResponse: +def process_completion_response( + response: OpenAICompatCompletionResponse, +) -> CompletionResponse: choice = response.choices[0] # drop suffix if present and return stop reason as end of turn if choice.text.endswith("<|eot_id|>"): @@ -492,7 +496,9 @@ class UnparseableToolCall(BaseModel): arguments: str = "" -async def convert_message_to_openai_dict_new(message: Message | Dict) -> OpenAIChatCompletionMessage: +async def convert_message_to_openai_dict_new( + message: Message | Dict, +) -> OpenAIChatCompletionMessage: """ Convert a Message to an OpenAI API-compatible dictionary. """ @@ -942,7 +948,7 @@ async def convert_openai_chat_completion_stream( ) yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, + event_type=ChatCompletionResponseEventType.progress, delta=ToolCallDelta( tool_call=tool_call, parse_status=ToolCallParseStatus.succeeded, From 356727418366bac0db482847abb3dfb9135c407e Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Thu, 27 Feb 2025 21:24:01 -0500 Subject: [PATCH 11/13] fix: Incorrect import path for print_subcommand_description() (#1313) # What does this PR do? This fixes release build failure: https://github.com/meta-llama/llama-stack-ops/actions/runs/13579787331/job/37963565001 ``` + llama model prompt-format -m Llama3.2-11B-Vision-Instruct Traceback (most recent call last): File "/tmp/tmp.PXMDlmD0x5/.venv/bin/llama", line 4, in from llama_stack.cli.llama import main File "/tmp/tmp.PXMDlmD0x5/.venv/lib/python3.10/site-packages/llama_stack/cli/llama.py", line 10, in from .model import ModelParser File "/tmp/tmp.PXMDlmD0x5/.venv/lib/python3.10/site-packages/llama_stack/cli/model/__init__.py", line 7, in from .model import ModelParser # noqa File "/tmp/tmp.PXMDlmD0x5/.venv/lib/python3.10/site-packages/llama_stack/cli/model/model.py", line 16, in from llama_stack.cli.utils import print_subcommand_description ModuleNotFoundError: No module named 'llama_stack.cli.utils' ``` ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: Yuan Tang --- llama_stack/cli/model/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/cli/model/model.py b/llama_stack/cli/model/model.py index ec1fc8cf1..808029945 100644 --- a/llama_stack/cli/model/model.py +++ b/llama_stack/cli/model/model.py @@ -12,8 +12,8 @@ from llama_stack.cli.model.list import ModelList from llama_stack.cli.model.prompt_format import ModelPromptFormat from llama_stack.cli.model.remove import ModelRemove from llama_stack.cli.model.verify_download import ModelVerifyDownload +from llama_stack.cli.stack.utils import print_subcommand_description from llama_stack.cli.subcommand import Subcommand -from llama_stack.cli.utils import print_subcommand_description class ModelParser(Subcommand): From f4df3a76d99d929fa1e463ac8b40af2f7b3b6d53 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Thu, 27 Feb 2025 21:35:49 -0500 Subject: [PATCH 12/13] fix: Incorrect import path for print_subcommand_description() (#1314) # What does this PR do? Missed this one additional import in https://github.com/meta-llama/llama-stack/pull/1313 ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: Yuan Tang --- llama_stack/cli/stack/stack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py index 7b6215ef4..ccf1a5ffc 100644 --- a/llama_stack/cli/stack/stack.py +++ b/llama_stack/cli/stack/stack.py @@ -7,8 +7,8 @@ import argparse from importlib.metadata import version +from llama_stack.cli.stack.utils import print_subcommand_description from llama_stack.cli.subcommand import Subcommand -from llama_stack.cli.utils import print_subcommand_description from .build import StackBuild from .list_apis import StackListApis From a9f5c5bfcaaec9a3debf6e82b19aaf152ceb60fe Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Thu, 27 Feb 2025 21:50:41 -0500 Subject: [PATCH 13/13] fix: Incorrect import path for print_subcommand_description() (#1315) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: Yuan Tang --- llama_stack/cli/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/cli/llama.py b/llama_stack/cli/llama.py index fb9eae236..8ff580029 100644 --- a/llama_stack/cli/llama.py +++ b/llama_stack/cli/llama.py @@ -9,7 +9,7 @@ import argparse from .download import Download from .model import ModelParser from .stack import StackParser -from .utils import print_subcommand_description +from .stack.utils import print_subcommand_description from .verify_download import VerifyDownload