diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1e4a88f13..ef73dadc3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -70,6 +70,19 @@ $ uv pip install -e . $ source .venv/bin/activate ``` +Note that you can create a dotenv file `.env` that includes necessary environment variables: +``` +LLAMA_STACK_BASE_URL=http://localhost:8321 +LLAMA_STACK_CLIENT_LOG=debug +LLAMA_STACK_PORT=8321 +LLAMA_STACK_CONFIG= +``` + +And then use this dotenv file when running client SDK tests via the following: +```bash +$ uv run --env-file .env -- pytest -v tests/client-sdk/inference/test_text_inference.py +``` + ## Pre-commit Hooks We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running: diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index d4975b7a8..8ae6fed24 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -84,10 +84,8 @@ "outputs": [], "source": [ "# NBVAL_SKIP\n", - "\n", "!apt-get install -y bubblewrap\n", - "!pip install uv\n", - "!uv pip install llama-stack --system" + "!pip install -U llama-stack" ] }, { @@ -126,7 +124,7 @@ "source": [ "# NBVAL_SKIP\n", "# This will build all the dependencies you will need\n", - "!llama stack build --template together --image-type venv --image-name __system__" + "!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv" ] }, { @@ -4328,7 +4326,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "master", + "display_name": "toolchain", "language": "python", "name": "python3" }, diff --git a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb index 4cfccd44a..174cbcce6 100644 --- a/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb +++ b/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb @@ -45,65 +45,7 @@ "id": "O9pGVlPIjpix", "outputId": "e1fbe723-ae31-4630-eb80-4c4f6476d56f" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: llama-stack in /usr/local/lib/python3.10/dist-packages (0.0.61)\n", - "Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.0)\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.7.0)\n", - "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.28.1)\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.26.5)\n", - "Requirement already satisfied: llama-models>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\n", - "Requirement already satisfied: llama-stack-client>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\n", - "Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.48)\n", - "Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (from llama-stack) (1.0.1)\n", - "Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.10.3)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.32.3)\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from llama-stack) (13.9.4)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from llama-stack) (75.1.0)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.5.0)\n", - "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (6.0.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (3.1.4)\n", - "Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (0.8.0)\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (10.4.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (3.7.1)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.9.0)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (2.2.2)\n", - "Requirement already satisfied: pyaml in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (24.12.1)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.3.1)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.66.6)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.12.2)\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (1.0.7)\n", - "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (3.10)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (2.27.1)\n", - "Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.21.0)\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (2.2.3)\n", - "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (5.3.0)\n", - "Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.16.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (2024.9.0)\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (24.2)\n", - "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llama-stack) (3.4.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (2.18.0)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.61->llama-stack) (1.2.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->llama-models>=0.0.61->llama-stack) (3.0.2)\n", - "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (1.26.4)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n", - "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->llama-models>=0.0.61->llama-stack) (2024.9.11)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.61->llama-stack) (1.17.0)\n" - ] - } - ], + "outputs": [], "source": [ "# NBVAL_SKIP\n", "!pip install -U llama-stack" @@ -120,198 +62,10 @@ "id": "JQpLUSNjlGAM", "outputId": "2f7fec97-5511-4cae-d51e-6d262fbca19c" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: llama-stack in /usr/local/lib/python3.10/dist-packages (0.0.61)\r\n", - "Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.0)\r\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.7.0)\r\n", - "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.28.1)\r\n", - "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.26.5)\r\n", - "Requirement already satisfied: llama-models>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\r\n", - "Requirement already satisfied: llama-stack-client>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\r\n", - "Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.48)\r\n", - "Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (from llama-stack) (1.0.1)\r\n", - "Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.10.3)\r\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.32.3)\r\n", - "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from llama-stack) (13.9.4)\r\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from llama-stack) (75.1.0)\r\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.5.0)\r\n", - "Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (6.0.2)\r\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (3.1.4)\r\n", - "Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (0.8.0)\r\n", - "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (10.4.0)\r\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (3.7.1)\r\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (8.1.7)\r\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.9.0)\r\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (2.2.2)\r\n", - "Requirement already satisfied: pyaml in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (24.12.1)\r\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.3.1)\r\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.66.6)\r\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.12.2)\r\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (2024.8.30)\r\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (1.0.7)\r\n", - "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (3.10)\r\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\r\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\r\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (2.27.1)\r\n", - "Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.21.0)\r\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (2.2.3)\r\n", - "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (5.3.0)\r\n", - "Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.16.1)\r\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (2024.9.0)\r\n", - "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (24.2)\r\n", - "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\r\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llama-stack) (3.4.0)\r\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (3.0.0)\r\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (2.18.0)\r\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.61->llama-stack) (1.2.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->llama-models>=0.0.61->llama-stack) (3.0.2)\n", - "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (1.26.4)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n", - "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->llama-models>=0.0.61->llama-stack) (2024.9.11)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.61->llama-stack) (1.17.0)\n", - "Installing pip dependencies\n", - "Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (3.0.0)\n", - "Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (5.2.0)\n", - "Requirement already satisfied: opentelemetry-sdk in /usr/local/lib/python3.10/dist-packages (1.28.2)\n", - "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.13.1)\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.2.2)\n", - "Requirement already satisfied: autoevals in /usr/local/lib/python3.10/dist-packages (0.0.109)\n", - "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (0.2.0)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.5.2)\n", - "Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (10.4.0)\n", - "Requirement already satisfied: pypdf in /usr/local/lib/python3.10/dist-packages (5.1.0)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (4.66.6)\n", - "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (3.9.1)\n", - "Requirement already satisfied: aiosqlite in /usr/local/lib/python3.10/dist-packages (0.20.0)\n", - "Requirement already satisfied: psycopg2-binary in /usr/local/lib/python3.10/dist-packages (2.9.10)\n", - "Requirement already satisfied: faiss-cpu in /usr/local/lib/python3.10/dist-packages (1.9.0.post1)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http in /usr/local/lib/python3.10/dist-packages (1.28.2)\n", - "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.46.3)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.26.4)\n", - "Requirement already satisfied: chromadb-client in /usr/local/lib/python3.10/dist-packages (0.5.23)\n", - "Requirement already satisfied: openai in /usr/local/lib/python3.10/dist-packages (1.54.5)\n", - "Requirement already satisfied: redis in /usr/local/lib/python3.10/dist-packages (5.2.1)\n", - "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.2.0)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.8.0)\n", - "Requirement already satisfied: together in /usr/local/lib/python3.10/dist-packages (1.3.5)\n", - "Requirement already satisfied: fastapi in /usr/local/lib/python3.10/dist-packages (0.115.6)\n", - "Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (0.7.0)\n", - "Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (0.28.1)\n", - "Requirement already satisfied: uvicorn in /usr/local/lib/python3.10/dist-packages (0.32.1)\n", - "Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile) (3.21.0)\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile) (2.2.3)\n", - "Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile) (5.3.0)\n", - "Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile) (3.16.1)\n", - "Requirement already satisfied: opentelemetry-api==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (0.49b2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (4.12.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n", - "Requirement already satisfied: chevron in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.14.0)\n", - "Requirement already satisfied: levenshtein in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.26.1)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from autoevals) (6.0.2)\n", - "Requirement already satisfied: braintrust_core==0.0.54 in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.0.54)\n", - "Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (from autoevals) (4.23.0)\n", - "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.7)\n", - "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk) (2024.9.11)\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from faiss-cpu) (24.2)\n", - "Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.66.0)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-proto==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.28.2)\n", - "Requirement already satisfied: requests~=2.7 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (2.32.3)\n", - "Requirement already satisfied: protobuf<6.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http) (5.29.1)\n", - "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.26.5)\n", - "Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n", - "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (1.28.2)\n", - "Requirement already satisfied: overrides>=7.3.1 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (7.7.0)\n", - "Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (3.7.4)\n", - "Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (2.10.3)\n", - "Requirement already satisfied: tenacity>=8.2.3 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (9.0.0)\n", - "Requirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (3.10.12)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from openai) (1.9.0)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from openai) (0.8.2)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.1)\n", - "Requirement already satisfied: async-timeout>=4.0.3 in /usr/local/lib/python3.10/dist-packages (from redis) (4.0.3)\n", - "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n", - "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n", - "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\n", - "Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n", - "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.11.10)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.3.1)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.55.2)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.7)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.2.0)\n", - "Requirement already satisfied: eval-type-backport<0.3.0,>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from together) (0.2.0)\n", - "Requirement already satisfied: rich<14.0.0,>=13.8.1 in /usr/local/lib/python3.10/dist-packages (from together) (13.9.4)\n", - "Requirement already satisfied: tabulate<0.10.0,>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from together) (0.9.0)\n", - "Requirement already satisfied: typer<0.14,>=0.9 in /usr/local/lib/python3.10/dist-packages (from together) (0.13.1)\n", - "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /usr/local/lib/python3.10/dist-packages (from fastapi) (0.41.3)\n", - "Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire) (2.5.0)\n", - "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx) (1.0.7)\n", - "Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx) (3.10)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx) (0.14.0)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.4)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n", - "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (0.2.1)\n", - "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.18.3)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.2)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n", - "Requirement already satisfied: grpcio<2.0.0,>=1.63.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb-client) (1.68.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (1.17.0)\n", - "Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (1.6)\n", - "Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (2.2.1)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb-client) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb-client) (2.27.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests~=2.7->opentelemetry-exporter-otlp-proto-http) (3.4.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=13.8.1->together) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=13.8.1->together) (2.18.0)\n", - "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<0.14,>=0.9->together) (1.5.4)\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (2024.10.1)\n", - "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (0.35.1)\n", - "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (0.22.3)\n", - "Requirement already satisfied: rapidfuzz<4.0.0,>=3.9.0 in /usr/local/lib/python3.10/dist-packages (from levenshtein->autoevals) (3.10.1)\n", - "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14.0.0,>=13.8.1->together) (0.1.2)\n", - "sentence-transformers --no-deps\n", - "Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (3.2.1)\n", - "torch --index-url https://download.pytorch.org/whl/cpu\n", - "Looking in indexes: https://download.pytorch.org/whl/cpu\n", - "Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.5.1+cu121)\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.16.1)\n", - "Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.2)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.9.0)\n", - "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n", - "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.2)\n", - "\u001b[32mBuild Successful!\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "# NBVAL_SKIP\n", - "!llama stack build --template together --image-type venv --image-name __system__" + "!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv" ] }, { diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 20a10ba4d..efa0a2d74 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -27,19 +27,19 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)` -- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)` -- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)` -- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)` -- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)` -- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)` -- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)` +- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)` +- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)` +- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `nvidia/llama-3.2-nv-embedqa-1b-v2 ` +- `nvidia/nv-embedqa-e5-v5 ` +- `nvidia/nv-embedqa-mistral-7b-v2 ` +- `snowflake/arctic-embed-l ` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 14f004926..623ab6848 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -34,9 +34,9 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)` -- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)` +- `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index 6e2af14fd..8f14ae7cc 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -27,8 +27,8 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)` -- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)` +- `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index f69e6d963..1fcd6f7af 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -37,17 +37,17 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)` -- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)` -- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)` -- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)` -- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)` +- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)` +- `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `nomic-ai/nomic-embed-text-v1.5 ` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md index 9fb7b2619..ce3f8aecc 100644 --- a/docs/source/distributions/self_hosted_distro/groq.md +++ b/docs/source/distributions/self_hosted_distro/groq.md @@ -37,11 +37,11 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)` -- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)` -- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)` -- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)` -- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)` +- `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `groq/llama-3.1-8b-instant ` +- `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)` +- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index 80d84b402..8f23cef43 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -141,17 +141,21 @@ ollama run To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama. ``` $ ollama ps - -NAME ID SIZE PROCESSOR UNTIL -llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now +NAME ID SIZE PROCESSOR UNTIL +llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now ``` To verify that the model served by ollama is correctly connected to Llama Stack server ```bash $ llama-stack-client models list -+----------------------+----------------------+---------------+-----------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+===============================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} | -+----------------------+----------------------+---------------+-----------------------------------------------+ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index e6ac616be..a7f738261 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -34,15 +34,15 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)` -- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)` -- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` -- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` -- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` -- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)` -- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` -- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` -- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)` +- `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)` +- `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` ### Prerequisite: API Keys diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 7af0dcf4d..f361e93c7 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -37,17 +37,17 @@ The following environment variables can be configured: The following models are available by default: -- `meta-llama/Llama-3.1-8B-Instruct` -- `meta-llama/Llama-3.1-70B-Instruct` -- `meta-llama/Llama-3.1-405B-Instruct-FP8` -- `meta-llama/Llama-3.2-3B-Instruct` -- `meta-llama/Llama-3.2-11B-Vision-Instruct` -- `meta-llama/Llama-3.2-90B-Vision-Instruct` -- `meta-llama/Llama-3.3-70B-Instruct` -- `meta-llama/Llama-Guard-3-8B` -- `meta-llama/Llama-Guard-3-11B-Vision` -- `togethercomputer/m2-bert-80M-8k-retrieval` -- `togethercomputer/m2-bert-80M-32k-retrieval` +- `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)` +- `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)` +- `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)` +- `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)` +- `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)` +- `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)` +- `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)` +- `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)` +- `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)` +- `togethercomputer/m2-bert-80M-8k-retrieval ` +- `togethercomputer/m2-bert-80M-32k-retrieval ` ### Prerequisite: API Keys diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index f017a9723..ecef20d55 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -102,12 +102,18 @@ Let's use the `llama-stack-client` CLI to check the connectivity to the server. $ llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT > Enter the API key (leave empty if no key is needed): Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321 + $ llama-stack-client models list -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ -┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ -│ meta-llama/Llama-3.2-3B-Instruct │ ollama │ llama3.2:3b-instruct-fp16 │ │ -└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` You can test basic Llama inference completion using the CLI too. diff --git a/docs/source/references/llama_stack_client_cli_reference.md b/docs/source/references/llama_stack_client_cli_reference.md index bf99f2b57..26b81cf92 100644 --- a/docs/source/references/llama_stack_client_cli_reference.md +++ b/docs/source/references/llama_stack_client_cli_reference.md @@ -58,11 +58,15 @@ llama-stack-client providers list llama-stack-client models list ``` ``` -+----------------------+----------------------+---------------+----------------------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+==========================================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | tgi0 | {'huggingface_repo': 'meta-llama/Llama-3.1-8B-Instruct'} | -+----------------------+----------------------+---------------+----------------------------------------------------------+ +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` ### `llama-stack-client models get` diff --git a/llama_stack/cli/llama.py b/llama_stack/cli/llama.py index f0466facd..8ff580029 100644 --- a/llama_stack/cli/llama.py +++ b/llama_stack/cli/llama.py @@ -9,6 +9,7 @@ import argparse from .download import Download from .model import ModelParser from .stack import StackParser +from .stack.utils import print_subcommand_description from .verify_download import VerifyDownload @@ -20,6 +21,7 @@ class LlamaCLIParser: prog="llama", description="Welcome to the Llama CLI", add_help=True, + formatter_class=argparse.RawTextHelpFormatter, ) # Default command is to print help @@ -33,6 +35,8 @@ class LlamaCLIParser: Download.create(subparsers) VerifyDownload.create(subparsers) + print_subcommand_description(self.parser, subparsers) + def parse_args(self) -> argparse.Namespace: return self.parser.parse_args() diff --git a/llama_stack/cli/model/model.py b/llama_stack/cli/model/model.py index 2f4065b83..808029945 100644 --- a/llama_stack/cli/model/model.py +++ b/llama_stack/cli/model/model.py @@ -12,6 +12,7 @@ from llama_stack.cli.model.list import ModelList from llama_stack.cli.model.prompt_format import ModelPromptFormat from llama_stack.cli.model.remove import ModelRemove from llama_stack.cli.model.verify_download import ModelVerifyDownload +from llama_stack.cli.stack.utils import print_subcommand_description from llama_stack.cli.subcommand import Subcommand @@ -24,6 +25,7 @@ class ModelParser(Subcommand): "model", prog="llama model", description="Work with llama models", + formatter_class=argparse.RawTextHelpFormatter, ) self.parser.set_defaults(func=lambda args: self.parser.print_help()) @@ -37,3 +39,5 @@ class ModelParser(Subcommand): ModelDescribe.create(subparsers) ModelVerifyDownload.create(subparsers) ModelRemove.create(subparsers) + + print_subcommand_description(self.parser, subparsers) diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 96382d428..89db368db 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -38,7 +38,7 @@ from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.resolver import InvalidProviderError from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.distribution.utils.dynamic import instantiate_class_type -from llama_stack.distribution.utils.exec import formulate_run_args, in_notebook, run_with_pty +from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty from llama_stack.distribution.utils.image_types import ImageType from llama_stack.providers.datatypes import Api @@ -65,8 +65,6 @@ def run_stack_build_command(args: argparse.Namespace) -> None: if args.image_type == "venv": current_venv = os.environ.get("VIRTUAL_ENV") image_name = args.image_name or current_venv - if not image_name and in_notebook(): - image_name = "__system__" elif args.image_type == "conda": current_conda_env = os.environ.get("CONDA_DEFAULT_ENV") image_name = args.image_name or current_conda_env @@ -291,6 +289,8 @@ def _run_stack_build_command_from_build_config( if not image_name: raise ValueError("Please specify an image name when building a conda image") elif build_config.image_type == ImageType.venv.value: + if not image_name and os.environ.get("UV_SYSTEM_PYTHON"): + image_name = "__system__" if not image_name: raise ValueError("Please specify an image name when building a venv image") diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py index 431f7b98e..ccf1a5ffc 100644 --- a/llama_stack/cli/stack/stack.py +++ b/llama_stack/cli/stack/stack.py @@ -7,6 +7,7 @@ import argparse from importlib.metadata import version +from llama_stack.cli.stack.utils import print_subcommand_description from llama_stack.cli.subcommand import Subcommand from .build import StackBuild @@ -22,6 +23,7 @@ class StackParser(Subcommand): "stack", prog="llama stack", description="Operations for the Llama Stack / Distributions", + formatter_class=argparse.RawTextHelpFormatter, ) self.parser.add_argument( @@ -39,3 +41,5 @@ class StackParser(Subcommand): StackListApis.create(subparsers) StackListProviders.create(subparsers) StackRun.create(subparsers) + + print_subcommand_description(self.parser, subparsers) diff --git a/llama_stack/cli/stack/utils.py b/llama_stack/cli/stack/utils.py new file mode 100644 index 000000000..1e83a5cc8 --- /dev/null +++ b/llama_stack/cli/stack/utils.py @@ -0,0 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +def print_subcommand_description(parser, subparsers): + """Print descriptions of subcommands.""" + description_text = "" + for name, subcommand in subparsers.choices.items(): + description = subcommand.description + description_text += f" {name:<21} {description}\n" + parser.epilog = description_text diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 3847b4cba..842523486 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -1059,9 +1059,6 @@ async def execute_tool_call_maybe( group_name = tool_to_group.get(name, None) if group_name is None: raise ValueError(f"Tool {name} not found in any tool group") - # get the arguments generated by the model and augment with toolgroup arg overrides for the agent - tool_call_args = tool_call.arguments - tool_call_args.update(toolgroup_args.get(group_name, {})) if isinstance(name, BuiltinTool): if name == BuiltinTool.brave_search: name = WEB_SEARCH_TOOL @@ -1070,10 +1067,12 @@ async def execute_tool_call_maybe( result = await tool_runtime_api.invoke_tool( tool_name=name, - kwargs=dict( - session_id=session_id, - **tool_call_args, - ), + kwargs={ + "session_id": session_id, + # get the arguments generated by the model and augment with toolgroup arg overrides for the agent + **tool_call.arguments, + **toolgroup_args.get(group_name, {}), + }, ) return result diff --git a/llama_stack/providers/inline/inference/sentence_transformers/config.py b/llama_stack/providers/inline/inference/sentence_transformers/config.py index 232e4bf32..93e0afe11 100644 --- a/llama_stack/providers/inline/inference/sentence_transformers/config.py +++ b/llama_stack/providers/inline/inference/sentence_transformers/config.py @@ -11,5 +11,5 @@ from pydantic import BaseModel class SentenceTransformersInferenceConfig(BaseModel): @classmethod - def sample_run_config(cls) -> Dict[str, Any]: + def sample_run_config(cls, **kwargs) -> Dict[str, Any]: return {} diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 4deeea630..748c5237a 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import CerebrasImplConfig -from .models import model_entries +from .models import MODEL_ENTRIES class CerebrasInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: CerebrasImplConfig) -> None: ModelRegistryHelper.__init__( self, - model_entries=model_entries, + model_entries=MODEL_ENTRIES, ) self.config = config diff --git a/llama_stack/providers/remote/inference/cerebras/models.py b/llama_stack/providers/remote/inference/cerebras/models.py index a48864d49..37419bf4c 100644 --- a/llama_stack/providers/remote/inference/cerebras/models.py +++ b/llama_stack/providers/remote/inference/cerebras/models.py @@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -model_entries = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "llama3.1-8b", CoreModelId.llama3_1_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/groq/models.py b/llama_stack/providers/remote/inference/groq/models.py index 4364edffa..08b9b4dc4 100644 --- a/llama_stack/providers/remote/inference/groq/models.py +++ b/llama_stack/providers/remote/inference/groq/models.py @@ -5,10 +5,13 @@ # the root directory of this source tree. from llama_stack.models.llama.sku_list import CoreModelId -from llama_stack.providers.utils.inference.model_registry import build_model_entry +from llama_stack.providers.utils.inference.model_registry import ( + build_hf_repo_model_entry, + build_model_entry, +) MODEL_ENTRIES = [ - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-8b-8192", CoreModelId.llama3_1_8b_instruct.value, ), @@ -16,11 +19,11 @@ MODEL_ENTRIES = [ "groq/llama-3.1-8b-instant", CoreModelId.llama3_1_8b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama3-70b-8192", CoreModelId.llama3_70b_instruct.value, ), - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.3-70b-versatile", CoreModelId.llama3_3_70b_instruct.value, ), @@ -28,7 +31,7 @@ MODEL_ENTRIES = [ # Preview models aren't recommended for production use, but we include this one # to pass the test fixture # TODO(aidand): Replace this with a stable model once Groq supports it - build_model_entry( + build_hf_repo_model_entry( "groq/llama-3.2-3b-preview", CoreModelId.llama3_2_3b_instruct.value, ), diff --git a/llama_stack/providers/remote/inference/nvidia/models.py b/llama_stack/providers/remote/inference/nvidia/models.py index a855566bc..879855003 100644 --- a/llama_stack/providers/remote/inference/nvidia/models.py +++ b/llama_stack/providers/remote/inference/nvidia/models.py @@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import ( build_hf_repo_model_entry, ) -_MODEL_ENTRIES = [ +MODEL_ENTRIES = [ build_hf_repo_model_entry( "meta/llama3-8b-instruct", CoreModelId.llama3_8b_instruct.value, diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index cc3bd85bb..2ca7dd578 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -8,7 +8,7 @@ import logging import warnings from typing import AsyncIterator, List, Optional, Union -from openai import APIConnectionError, AsyncOpenAI +from openai import APIConnectionError, AsyncOpenAI, BadRequestError from llama_stack.apis.common.content_types import ( InterleavedContent, @@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import ( from llama_stack.providers.utils.inference.prompt_adapter import content_has_media from . import NVIDIAConfig -from .models import _MODEL_ENTRIES +from .models import MODEL_ENTRIES from .openai_utils import ( convert_chat_completion_request, convert_completion_request, @@ -62,7 +62,7 @@ logger = logging.getLogger(__name__) class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): def __init__(self, config: NVIDIAConfig) -> None: # TODO(mf): filter by available models - ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) + ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") @@ -144,19 +144,38 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): # # we can ignore str and always pass List[str] to OpenAI # - flat_contents = [ - item.text if isinstance(item, TextContentItem) else item - for content in contents - for item in (content if isinstance(content, list) else [content]) - ] + flat_contents = [content.text if isinstance(content, TextContentItem) else content for content in contents] input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents] model = self.get_provider_model_id(model_id) - response = await self._client.embeddings.create( - model=model, - input=input, - # extra_body={"input_type": "passage"|"query"}, # TODO(mf): how to tell caller's intent? - ) + extra_body = {} + + if text_truncation is not None: + text_truncation_options = { + TextTruncation.none: "NONE", + TextTruncation.end: "END", + TextTruncation.start: "START", + } + extra_body["truncate"] = text_truncation_options[text_truncation] + + if output_dimension is not None: + extra_body["dimensions"] = output_dimension + + if task_type is not None: + task_type_options = { + EmbeddingTaskType.document: "passage", + EmbeddingTaskType.query: "query", + } + extra_body["input_type"] = task_type_options[task_type] + + try: + response = await self._client.embeddings.create( + model=model, + input=input, + extra_body=extra_body, + ) + except BadRequestError as e: + raise ValueError(f"Failed to get embeddings: {e}") from e # # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=List[float], ...)], ...) diff --git a/llama_stack/providers/tests/test_cases/inference/chat_completion.json b/llama_stack/providers/tests/test_cases/inference/chat_completion.json index 50f6b1c15..dcc767e4e 100644 --- a/llama_stack/providers/tests/test_cases/inference/chat_completion.json +++ b/llama_stack/providers/tests/test_cases/inference/chat_completion.json @@ -111,7 +111,8 @@ "first_name": "Michael", "last_name": "Jordan", "year_of_birth": 1963, - "num_seasons_in_nba": 15 + "num_seasons_in_nba": 15, + "year_for_draft": 1984 } } }, diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index ecb6961da..ddf7f193f 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -126,6 +126,37 @@ class LiteLLMOpenAIMixin( ): yield chunk + def _add_additional_properties_recursive(self, schema): + """ + Recursively add additionalProperties: False to all object schemas + """ + if isinstance(schema, dict): + if schema.get("type") == "object": + schema["additionalProperties"] = False + + # Add required field with all property keys if properties exist + if "properties" in schema and schema["properties"]: + schema["required"] = list(schema["properties"].keys()) + + if "properties" in schema: + for prop_schema in schema["properties"].values(): + self._add_additional_properties_recursive(prop_schema) + + for key in ["anyOf", "allOf", "oneOf"]: + if key in schema: + for sub_schema in schema[key]: + self._add_additional_properties_recursive(sub_schema) + + if "not" in schema: + self._add_additional_properties_recursive(schema["not"]) + + # Handle $defs/$ref + if "$defs" in schema: + for def_schema in schema["$defs"].values(): + self._add_additional_properties_recursive(def_schema) + + return schema + async def _get_params(self, request: ChatCompletionRequest) -> dict: input_dict = {} @@ -140,6 +171,10 @@ class LiteLLMOpenAIMixin( name = fmt["title"] del fmt["title"] fmt["additionalProperties"] = False + + # Apply additionalProperties: False recursively to all objects + fmt = self._add_additional_properties_recursive(fmt) + input_dict["response_format"] = { "type": "json_schema", "json_schema": { diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 1309e72a6..eaf5ad2e1 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -27,7 +27,9 @@ from openai.types.chat import ( from openai.types.chat import ( ChatCompletionMessageParam as OpenAIChatCompletionMessage, ) -from openai.types.chat import ChatCompletionMessageToolCall +from openai.types.chat import ( + ChatCompletionMessageToolCall, +) from openai.types.chat import ( ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall, ) @@ -199,7 +201,9 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: Optional[Unio return None -def process_completion_response(response: OpenAICompatCompletionResponse) -> CompletionResponse: +def process_completion_response( + response: OpenAICompatCompletionResponse, +) -> CompletionResponse: choice = response.choices[0] # drop suffix if present and return stop reason as end of turn if choice.text.endswith("<|eot_id|>"): @@ -492,7 +496,9 @@ class UnparseableToolCall(BaseModel): arguments: str = "" -async def convert_message_to_openai_dict_new(message: Message | Dict) -> OpenAIChatCompletionMessage: +async def convert_message_to_openai_dict_new( + message: Message | Dict, +) -> OpenAIChatCompletionMessage: """ Convert a Message to an OpenAI API-compatible dictionary. """ @@ -942,7 +948,7 @@ async def convert_openai_chat_completion_stream( ) yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, + event_type=ChatCompletionResponseEventType.progress, delta=ToolCallDelta( tool_call=tool_call, parse_status=ToolCallParseStatus.succeeded, diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index 628e78612..18e287390 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -6,12 +6,10 @@ from pathlib import Path -from llama_stack.apis.models import ModelInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate: config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} + available_models = { + "bedrock": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id="bedrock", - ) - for m in MODEL_ENTRIES - ] default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/bedrock/doc_template.md b/llama_stack/templates/bedrock/doc_template.md index 357638ea5..24106525a 100644 --- a/llama_stack/templates/bedrock/doc_template.md +++ b/llama_stack/templates/bedrock/doc_template.md @@ -28,7 +28,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 7d03b7c29..00a02e0d5 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -88,16 +88,31 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db models: +- metadata: {} + model_id: meta.llama3-1-8b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-8b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: bedrock provider_model_id: meta.llama3-1-8b-instruct-v1:0 model_type: llm +- metadata: {} + model_id: meta.llama3-1-70b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-70b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: bedrock provider_model_id: meta.llama3-1-70b-instruct-v1:0 model_type: llm +- metadata: {} + model_id: meta.llama3-1-405b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-405b-instruct-v1:0 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: bedrock diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index 544a50c03..bda22a498 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -8,14 +8,13 @@ from pathlib import Path from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig -from llama_stack.providers.remote.inference.cerebras.models import model_entries -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate: config=SentenceTransformersInferenceConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id="cerebras", - ) - for m in model_entries - ] + available_models = { + "cerebras": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/cerebras/doc_template.md b/llama_stack/templates/cerebras/doc_template.md index 77fc6f478..3f5645958 100644 --- a/llama_stack/templates/cerebras/doc_template.md +++ b/llama_stack/templates/cerebras/doc_template.md @@ -20,7 +20,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 6afff2be2..43d3158ba 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -90,11 +90,21 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db models: +- metadata: {} + model_id: llama3.1-8b + provider_id: cerebras + provider_model_id: llama3.1-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: cerebras provider_model_id: llama3.1-8b model_type: llm +- metadata: {} + model_id: llama-3.3-70b + provider_id: cerebras + provider_model_id: llama-3.3-70b + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: cerebras diff --git a/llama_stack/templates/ci-tests/ci_tests.py b/llama_stack/templates/ci-tests/ci_tests.py index a93cfff9c..979256fa1 100644 --- a/llama_stack/templates/ci-tests/ci_tests.py +++ b/llama_stack/templates/ci-tests/ci_tests.py @@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate: provider_id="code-interpreter", ), ] - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_id="fireworks", - model_type=m.model_type, - metadata=m.metadata, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "fireworks": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=default_models + [embedding_model], + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 295d72e71..3a973cabf 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -90,51 +90,112 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision model_type: llm - metadata: embedding_dimension: 768 context_length: 8192 model_id: nomic-ai/nomic-embed-text-v1.5 provider_id: fireworks + provider_model_id: nomic-ai/nomic-embed-text-v1.5 model_type: embedding - metadata: embedding_dimension: 384 diff --git a/llama_stack/templates/dell/dell.py b/llama_stack/templates/dell/dell.py index 8348beafd..52c5a5476 100644 --- a/llama_stack/templates/dell/dell.py +++ b/llama_stack/templates/dell/dell.py @@ -3,7 +3,6 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from pathlib import Path from llama_stack.apis.models.models import ModelType from llama_stack.distribution.datatypes import ( @@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate: distro_type="self_hosted", description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container", container_image=None, - template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, embedding_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/dev/dev.py b/llama_stack/templates/dev/dev.py index fe80c3842..694913119 100644 --- a/llama_stack/templates/dev/dev.py +++ b/llama_stack/templates/dev/dev.py @@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) @@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: @@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: ), ] inference_providers = [] - default_models = [] - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} + available_models = {} for provider_id, model_entries, config in providers: inference_providers.append( Provider( @@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: config=config, ) ) - default_models.extend( - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id=provider_id, - model_type=m.model_type, - metadata=m.metadata, - ) - for m in model_entries - ) - return inference_providers, default_models + available_models[provider_id] = model_entries + return inference_providers, available_models def get_distribution_template() -> DistributionTemplate: - inference_providers, default_models = get_inference_providers() + inference_providers, available_models = get_inference_providers() providers = { "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], @@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate: }, ) + default_models = get_model_registry(available_models) return DistributionTemplate( name=name, distro_type="self_hosted", @@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[], + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/dev/run.yaml b/llama_stack/templates/dev/run.yaml index 0ada465e4..f1d72d572 100644 --- a/llama_stack/templates/dev/run.yaml +++ b/llama_stack/templates/dev/run.yaml @@ -136,51 +136,101 @@ models: provider_id: openai provider_model_id: openai/text-embedding-3-large model_type: embedding +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks @@ -247,25 +297,45 @@ models: provider_model_id: gemini/text-embedding-004 model_type: embedding - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/llama3-8b-8192 provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: groq diff --git a/llama_stack/templates/fireworks/doc_template.md b/llama_stack/templates/fireworks/doc_template.md index 48677d571..6c7743cb8 100644 --- a/llama_stack/templates/fireworks/doc_template.md +++ b/llama_stack/templates/fireworks/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index c78664dde..0111bc118 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate: config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="fireworks", - metadata=m.metadata, - model_type=m.model_type, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "fireworks": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) + embedding_model = ModelInput( model_id="all-MiniLM-L6-v2", provider_id="sentence-transformers", @@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 6f622c7d9..0fe5f3026 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -99,51 +99,101 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index e6d21d10d..cbe85c4f7 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -93,51 +93,101 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: fireworks provider_model_id: accounts/fireworks/models/llama-guard-3-8b model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: fireworks diff --git a/llama_stack/templates/groq/doc_template.md b/llama_stack/templates/groq/doc_template.md index 3f9ccbd16..85b916ccd 100644 --- a/llama_stack/templates/groq/doc_template.md +++ b/llama_stack/templates/groq/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/groq/groq.py b/llama_stack/templates/groq/groq.py index b0c7a3804..71c504cde 100644 --- a/llama_stack/templates/groq/groq.py +++ b/llama_stack/templates/groq/groq.py @@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import ( Provider, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.remote.inference.groq import GroqConfig from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate: }, ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id=name, - model_type=m.model_type, - metadata=m.metadata, - ) - for m in MODEL_ENTRIES - ] - + available_models = { + "groq": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate: docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 220aa847b..78212c8d9 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -91,25 +91,45 @@ metadata_store: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db models: - metadata: {} - model_id: meta-llama/Llama-3.1-8B-Instruct + model_id: groq/llama3-8b-8192 provider_id: groq provider_model_id: groq/llama3-8b-8192 model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq provider_model_id: groq/llama-3.1-8b-instant model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: groq provider_model_id: groq/llama3-70b-8192 model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: groq provider_model_id: groq/llama-3.3-70b-versatile model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: groq diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 62584929c..f2849f0bc 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index af04e39d4..cea1075e2 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 9bff981d1..3c38e0edd 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use Meta Reference for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index fca15fcc5..32476f37f 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use Meta Reference with fp8, int4 quantization for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md index 9d9006a27..71b8ac32f 100644 --- a/llama_stack/templates/nvidia/doc_template.md +++ b/llama_stack/templates/nvidia/doc_template.md @@ -20,7 +20,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/nvidia/nvidia.py b/llama_stack/templates/nvidia/nvidia.py index 56d13a09a..cc5e96333 100644 --- a/llama_stack/templates/nvidia/nvidia.py +++ b/llama_stack/templates/nvidia/nvidia.py @@ -6,11 +6,10 @@ from pathlib import Path -from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput -from llama_stack.models.llama.sku_list import all_registered_models +from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig -from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate: config=NVIDIAConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="nvidia", - model_type=m.model_type, - metadata=m.metadata, - ) - for m in _MODEL_ENTRIES - ] + available_models = { + "nvidia": MODEL_ENTRIES, + } default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate: ), ] + default_models = get_model_registry(available_models) return DistributionTemplate( name="nvidia", distro_type="remote_hosted", @@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/nvidia/run.yaml b/llama_stack/templates/nvidia/run.yaml index bfbad749a..52e78df7b 100644 --- a/llama_stack/templates/nvidia/run.yaml +++ b/llama_stack/templates/nvidia/run.yaml @@ -90,46 +90,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db models: +- metadata: {} + model_id: meta/llama3-8b-instruct + provider_id: nvidia + provider_model_id: meta/llama3-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-8B-Instruct provider_id: nvidia provider_model_id: meta/llama3-8b-instruct model_type: llm +- metadata: {} + model_id: meta/llama3-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama3-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3-70B-Instruct provider_id: nvidia provider_model_id: meta/llama3-70b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-8b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-8b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.1-8b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-70b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-70b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.1-70b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.1-405b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.1-405b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: nvidia provider_model_id: meta/llama-3.1-405b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-1b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-1b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-1b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-3b-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-3b-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-3b-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-11b-vision-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-11b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: nvidia provider_model_id: meta/llama-3.2-11b-vision-instruct model_type: llm +- metadata: {} + model_id: meta/llama-3.2-90b-vision-instruct + provider_id: nvidia + provider_model_id: meta/llama-3.2-90b-vision-instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: nvidia diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md index 1d95e4b65..e5444d3da 100644 --- a/llama_stack/templates/ollama/doc_template.md +++ b/llama_stack/templates/ollama/doc_template.md @@ -130,17 +130,21 @@ ollama run To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama. ``` $ ollama ps - -NAME ID SIZE PROCESSOR UNTIL -llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now +NAME ID SIZE PROCESSOR UNTIL +llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now ``` To verify that the model served by ollama is correctly connected to Llama Stack server ```bash $ llama-stack-client models list -+----------------------+----------------------+---------------+-----------------------------------------------+ -| identifier | llama_model | provider_id | metadata | -+======================+======================+===============+===============================================+ -| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} | -+----------------------+----------------------+---------------+-----------------------------------------------+ + +Available Models + +┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓ +┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃ +┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩ +│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │ +└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘ + +Total models: 1 ``` diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index ba3cfe684..83c7b1a63 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index 10d291456..73ee36c3f 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate: description="Use (an external) vLLM server for running LLM inference", template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md index 4b18aa756..b2a295716 100644 --- a/llama_stack/templates/sambanova/doc_template.md +++ b/llama_stack/templates/sambanova/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }} ({{ model.provider_model_id }})` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index 26815dcd0..124d11baf 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -68,46 +68,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db models: +- metadata: {} + model_id: Meta-Llama-3.1-8B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-8B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.1-8B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-70B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-70B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.1-70B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.1-405B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.1-405B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: sambanova provider_model_id: Meta-Llama-3.1-405B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-1B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.2-1B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-1B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.2-1B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.2-3B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.2-3B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.2-3B-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-3.3-70B-Instruct + provider_id: sambanova + provider_model_id: Meta-Llama-3.3-70B-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: sambanova provider_model_id: Meta-Llama-3.3-70B-Instruct model_type: llm +- metadata: {} + model_id: Llama-3.2-11B-Vision-Instruct + provider_id: sambanova + provider_model_id: Llama-3.2-11B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: sambanova provider_model_id: Llama-3.2-11B-Vision-Instruct model_type: llm +- metadata: {} + model_id: Llama-3.2-90B-Vision-Instruct + provider_id: sambanova + provider_model_id: Llama-3.2-90B-Vision-Instruct + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: sambanova provider_model_id: Llama-3.2-90B-Vision-Instruct model_type: llm +- metadata: {} + model_id: Meta-Llama-Guard-3-8B + provider_id: sambanova + provider_model_id: Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: sambanova diff --git a/llama_stack/templates/sambanova/sambanova.py b/llama_stack/templates/sambanova/sambanova.py index 725c6abc4..0a0b6bd7e 100644 --- a/llama_stack/templates/sambanova/sambanova.py +++ b/llama_stack/templates/sambanova/sambanova.py @@ -7,15 +7,13 @@ from pathlib import Path from llama_stack.distribution.datatypes import ( - ModelInput, Provider, ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate: config=SambaNovaImplConfig.sample_run_config(), ) - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model], - provider_model_id=m.provider_model_id, - provider_id=name, - ) - for m in MODEL_ENTRIES - ] - + available_models = { + name: MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate: docker_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py index cb5b07be3..2afb84a63 100644 --- a/llama_stack/templates/template.py +++ b/llama_stack/templates/template.py @@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import ( ) from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.utils.dynamic import instantiate_class_type +from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig +def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]: + models = [] + for provider_id, entries in available_models.items(): + for entry in entries: + ids = [entry.provider_model_id] + entry.aliases + for model_id in ids: + models.append( + ModelInput( + model_id=model_id, + provider_model_id=entry.provider_model_id, + provider_id=provider_id, + model_type=entry.model_type, + metadata=entry.metadata, + ) + ) + return models + + +class DefaultModel(BaseModel): + model_id: str + doc_string: str + + class RunConfigSettings(BaseModel): provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict) default_models: Optional[List[ModelInput]] = None @@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel): run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None container_image: Optional[str] = None - default_models: Optional[List[ModelInput]] = None + available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None def build_config(self) -> BuildConfig: return BuildConfig( @@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel): autoescape=True, ) template = env.from_string(template) + + default_models = [] + if self.available_models_by_provider: + has_multiple_providers = len(self.available_models_by_provider.keys()) > 1 + for provider_id, model_entries in self.available_models_by_provider.items(): + for model_entry in model_entries: + doc_parts = [] + if model_entry.aliases: + doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}") + if has_multiple_providers: + doc_parts.append(f"provider: {provider_id}") + + default_models.append( + DefaultModel( + model_id=model_entry.provider_model_id, + doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "", + ) + ) + return template.render( name=self.name, description=self.description, providers=self.providers, providers_table=providers_table, run_config_env_vars=self.run_config_env_vars, - default_models=self.default_models, + default_models=default_models, ) def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None: diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 9b80414f9..eb49871a0 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=[inference_model, safety_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/together/doc_template.md b/llama_stack/templates/together/doc_template.md index 405d68f91..be055a43e 100644 --- a/llama_stack/templates/together/doc_template.md +++ b/llama_stack/templates/together/doc_template.md @@ -30,7 +30,7 @@ The following environment variables can be configured: The following models are available by default: {% for model in default_models %} -- `{{ model.model_id }}` +- `{{ model.model_id }} {{ model.doc_string }}` {% endfor %} {% endif %} diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index 9193a3ef6..26d879802 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -99,46 +99,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: together diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 32ddf7b16..0969cfe56 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -93,46 +93,91 @@ metadata_store: type: sqlite db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-8B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-70B-Instruct provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 provider_id: together provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-3B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-11B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.2-90B-Vision-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: together provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-8B provider_id: together provider_model_id: meta-llama/Meta-Llama-Guard-3-8B model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm - metadata: {} model_id: meta-llama/Llama-Guard-3-11B-Vision provider_id: together diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index 8d0e2353c..24c395e1e 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import ( ShieldInput, ToolGroupInput, ) -from llama_stack.models.llama.sku_list import all_registered_models from llama_stack.providers.inline.inference.sentence_transformers import ( SentenceTransformersInferenceConfig, ) from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES -from llama_stack.templates.template import DistributionTemplate, RunConfigSettings +from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry def get_distribution_template() -> DistributionTemplate: @@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate: provider_type="inline::sentence-transformers", config=SentenceTransformersInferenceConfig.sample_run_config(), ) - - core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} - default_models = [ - ModelInput( - model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, - provider_model_id=m.provider_model_id, - provider_id="together", - metadata=m.metadata, - model_type=m.model_type, - ) - for m in MODEL_ENTRIES - ] + available_models = { + "together": MODEL_ENTRIES, + } + default_models = get_model_registry(available_models) default_tool_groups = [ ToolGroupInput( toolgroup_id="builtin::websearch", @@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=Path(__file__).parent / "doc_template.md", providers=providers, - default_models=default_models, + available_models_by_provider=available_models, run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 8cdec589e..27a16b93d 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate: container_image=None, template_path=None, providers=providers, - default_models=[inference_model], run_configs={ "run.yaml": RunConfigSettings( provider_overrides={ diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py index 3304406a9..46a901d62 100644 --- a/tests/client-sdk/inference/test_embedding.py +++ b/tests/client-sdk/inference/test_embedding.py @@ -14,6 +14,23 @@ # - array of a text (TextContentItem) # Types of output: # - list of list of floats +# Params: +# - text_truncation +# - absent w/ long text -> error +# - none w/ long text -> error +# - absent w/ short text -> ok +# - none w/ short text -> ok +# - end w/ long text -> ok +# - end w/ short text -> ok +# - start w/ long text -> ok +# - start w/ short text -> ok +# - output_dimension +# - response dimension matches +# - task_type, only for asymmetric models +# - query embedding != passage embedding +# Negative: +# - long string +# - long text # # Todo: # - negative tests @@ -23,8 +40,6 @@ # - empty text # - empty image # - long -# - long string -# - long text # - large image # - appropriate combinations # - batch size @@ -40,6 +55,7 @@ # import pytest +from llama_stack_client import BadRequestError from llama_stack_client.types import EmbeddingsResponse from llama_stack_client.types.shared.interleaved_content import ( ImageContentItem, @@ -50,8 +66,10 @@ from llama_stack_client.types.shared.interleaved_content import ( DUMMY_STRING = "hello" DUMMY_STRING2 = "world" +DUMMY_LONG_STRING = "NVDA " * 10240 DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text") DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text") +DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text") # TODO(mf): add a real image URL and base64 string DUMMY_IMAGE_URL = ImageContentItem( image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image" @@ -89,10 +107,120 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents): "list[url,string,base64,text]", ], ) -@pytest.mark.skip(reason="Media is not supported") +@pytest.mark.xfail(reason="Media is not supported") def test_embedding_image(llama_stack_client, embedding_model_id, contents): response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert isinstance(response.embeddings[0], list) assert isinstance(response.embeddings[0][0], float) + + +@pytest.mark.parametrize( + "text_truncation", + [ + "end", + "start", + ], +) +@pytest.mark.parametrize( + "contents", + [ + [DUMMY_LONG_TEXT], + [DUMMY_STRING], + ], + ids=[ + "long", + "short", + ], +) +def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents): + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=contents, text_truncation=text_truncation + ) + assert isinstance(response, EmbeddingsResponse) + assert len(response.embeddings) == 1 + assert isinstance(response.embeddings[0], list) + assert isinstance(response.embeddings[0][0], float) + + +@pytest.mark.parametrize( + "text_truncation", + [ + None, + "none", + ], +) +@pytest.mark.parametrize( + "contents", + [ + [DUMMY_LONG_TEXT], + [DUMMY_LONG_STRING], + ], + ids=[ + "long-text", + "long-str", + ], +) +def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents): + with pytest.raises(BadRequestError) as excinfo: + llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation + ) + + +@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction") +def test_embedding_output_dimension(llama_stack_client, embedding_model_id): + base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING]) + test_response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32 + ) + assert len(base_response.embeddings[0]) != len(test_response.embeddings[0]) + assert len(test_response.embeddings[0]) == 32 + + +@pytest.mark.xfail(reason="Only valid for model supporting task type") +def test_embedding_task_type(llama_stack_client, embedding_model_id): + query_embedding = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query" + ) + document_embedding = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document" + ) + assert query_embedding.embeddings != document_embedding.embeddings + + +@pytest.mark.parametrize( + "text_truncation", + [ + None, + "none", + "end", + "start", + ], +) +def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation): + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + ) + assert isinstance(response, EmbeddingsResponse) + assert len(response.embeddings) == 1 + assert isinstance(response.embeddings[0], list) + assert isinstance(response.embeddings[0][0], float) + + +@pytest.mark.parametrize( + "text_truncation", + [ + "NONE", + "END", + "START", + "left", + "right", + ], +) +def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation): + with pytest.raises(BadRequestError) as excinfo: + llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + ) diff --git a/tests/client-sdk/inference/test_text_inference.py b/tests/client-sdk/inference/test_text_inference.py index 577d995ad..7850d2d57 100644 --- a/tests/client-sdk/inference/test_text_inference.py +++ b/tests/client-sdk/inference/test_text_inference.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. + import pytest from pydantic import BaseModel @@ -342,11 +343,15 @@ def test_text_chat_completion_with_tool_choice_none(client_with_models, text_mod ], ) def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case): + class NBAStats(BaseModel): + year_for_draft: int + num_seasons_in_nba: int + class AnswerFormat(BaseModel): first_name: str last_name: str year_of_birth: int - num_seasons_in_nba: int + nba_stats: NBAStats tc = TestCase(test_case) @@ -364,7 +369,8 @@ def test_text_chat_completion_structured_output(client_with_models, text_model_i assert answer.first_name == expected["first_name"] assert answer.last_name == expected["last_name"] assert answer.year_of_birth == expected["year_of_birth"] - assert answer.num_seasons_in_nba == expected["num_seasons_in_nba"] + assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"] + assert answer.nba_stats.year_for_draft == expected["year_for_draft"] @pytest.mark.parametrize("streaming", [True, False])