Merge branch 'main' into max_infer_iters

This commit is contained in:
Xi Yan 2025-02-27 19:07:12 -08:00
commit e157f0ac89
68 changed files with 952 additions and 529 deletions

View file

@ -70,6 +70,19 @@ $ uv pip install -e .
$ source .venv/bin/activate $ source .venv/bin/activate
``` ```
Note that you can create a dotenv file `.env` that includes necessary environment variables:
```
LLAMA_STACK_BASE_URL=http://localhost:8321
LLAMA_STACK_CLIENT_LOG=debug
LLAMA_STACK_PORT=8321
LLAMA_STACK_CONFIG=
```
And then use this dotenv file when running client SDK tests via the following:
```bash
$ uv run --env-file .env -- pytest -v tests/client-sdk/inference/test_text_inference.py
```
## Pre-commit Hooks ## Pre-commit Hooks
We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running: We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running:

View file

@ -84,10 +84,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# NBVAL_SKIP\n", "# NBVAL_SKIP\n",
"\n",
"!apt-get install -y bubblewrap\n", "!apt-get install -y bubblewrap\n",
"!pip install uv\n", "!pip install -U llama-stack"
"!uv pip install llama-stack --system"
] ]
}, },
{ {
@ -126,7 +124,7 @@
"source": [ "source": [
"# NBVAL_SKIP\n", "# NBVAL_SKIP\n",
"# This will build all the dependencies you will need\n", "# This will build all the dependencies you will need\n",
"!llama stack build --template together --image-type venv --image-name __system__" "!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv"
] ]
}, },
{ {
@ -4328,7 +4326,7 @@
"provenance": [] "provenance": []
}, },
"kernelspec": { "kernelspec": {
"display_name": "master", "display_name": "toolchain",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View file

@ -45,65 +45,7 @@
"id": "O9pGVlPIjpix", "id": "O9pGVlPIjpix",
"outputId": "e1fbe723-ae31-4630-eb80-4c4f6476d56f" "outputId": "e1fbe723-ae31-4630-eb80-4c4f6476d56f"
}, },
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: llama-stack in /usr/local/lib/python3.10/dist-packages (0.0.61)\n",
"Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.0)\n",
"Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.7.0)\n",
"Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.28.1)\n",
"Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.26.5)\n",
"Requirement already satisfied: llama-models>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\n",
"Requirement already satisfied: llama-stack-client>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\n",
"Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.48)\n",
"Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (from llama-stack) (1.0.1)\n",
"Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.10.3)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.32.3)\n",
"Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from llama-stack) (13.9.4)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from llama-stack) (75.1.0)\n",
"Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.5.0)\n",
"Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (6.0.2)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (3.1.4)\n",
"Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (0.8.0)\n",
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (10.4.0)\n",
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (3.7.1)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (8.1.7)\n",
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.9.0)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (2.2.2)\n",
"Requirement already satisfied: pyaml in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (24.12.1)\n",
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.3.1)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.66.6)\n",
"Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.12.2)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (2024.8.30)\n",
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (1.0.7)\n",
"Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (3.10)\n",
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (2.27.1)\n",
"Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.21.0)\n",
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (2.2.3)\n",
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (5.3.0)\n",
"Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.16.1)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (2024.9.0)\n",
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (24.2)\n",
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llama-stack) (3.4.0)\n",
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (3.0.0)\n",
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (2.18.0)\n",
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.61->llama-stack) (1.2.2)\n",
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->llama-models>=0.0.61->llama-stack) (3.0.2)\n",
"Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (1.26.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n",
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->llama-models>=0.0.61->llama-stack) (2024.9.11)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.61->llama-stack) (1.17.0)\n"
]
}
],
"source": [ "source": [
"# NBVAL_SKIP\n", "# NBVAL_SKIP\n",
"!pip install -U llama-stack" "!pip install -U llama-stack"
@ -120,198 +62,10 @@
"id": "JQpLUSNjlGAM", "id": "JQpLUSNjlGAM",
"outputId": "2f7fec97-5511-4cae-d51e-6d262fbca19c" "outputId": "2f7fec97-5511-4cae-d51e-6d262fbca19c"
}, },
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: llama-stack in /usr/local/lib/python3.10/dist-packages (0.0.61)\r\n",
"Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.0)\r\n",
"Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.7.0)\r\n",
"Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.28.1)\r\n",
"Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.26.5)\r\n",
"Requirement already satisfied: llama-models>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\r\n",
"Requirement already satisfied: llama-stack-client>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\r\n",
"Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.48)\r\n",
"Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (from llama-stack) (1.0.1)\r\n",
"Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.10.3)\r\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.32.3)\r\n",
"Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from llama-stack) (13.9.4)\r\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from llama-stack) (75.1.0)\r\n",
"Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.5.0)\r\n",
"Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (6.0.2)\r\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (3.1.4)\r\n",
"Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (0.8.0)\r\n",
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (10.4.0)\r\n",
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (3.7.1)\r\n",
"Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (8.1.7)\r\n",
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.9.0)\r\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (2.2.2)\r\n",
"Requirement already satisfied: pyaml in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (24.12.1)\r\n",
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.3.1)\r\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.66.6)\r\n",
"Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.12.2)\r\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (2024.8.30)\r\n",
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (1.0.7)\r\n",
"Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (3.10)\r\n",
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\r\n",
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\r\n",
"Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (2.27.1)\r\n",
"Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.21.0)\r\n",
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (2.2.3)\r\n",
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (5.3.0)\r\n",
"Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.16.1)\r\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (2024.9.0)\r\n",
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (24.2)\r\n",
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\r\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llama-stack) (3.4.0)\r\n",
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (3.0.0)\r\n",
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (2.18.0)\r\n",
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.61->llama-stack) (1.2.2)\n",
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->llama-models>=0.0.61->llama-stack) (3.0.2)\n",
"Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (1.26.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n",
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->llama-models>=0.0.61->llama-stack) (2024.9.11)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.61->llama-stack) (1.17.0)\n",
"Installing pip dependencies\n",
"Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (3.0.0)\n",
"Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (5.2.0)\n",
"Requirement already satisfied: opentelemetry-sdk in /usr/local/lib/python3.10/dist-packages (1.28.2)\n",
"Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.13.1)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.2.2)\n",
"Requirement already satisfied: autoevals in /usr/local/lib/python3.10/dist-packages (0.0.109)\n",
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (0.2.0)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.5.2)\n",
"Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (10.4.0)\n",
"Requirement already satisfied: pypdf in /usr/local/lib/python3.10/dist-packages (5.1.0)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (4.66.6)\n",
"Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (3.9.1)\n",
"Requirement already satisfied: aiosqlite in /usr/local/lib/python3.10/dist-packages (0.20.0)\n",
"Requirement already satisfied: psycopg2-binary in /usr/local/lib/python3.10/dist-packages (2.9.10)\n",
"Requirement already satisfied: faiss-cpu in /usr/local/lib/python3.10/dist-packages (1.9.0.post1)\n",
"Requirement already satisfied: opentelemetry-exporter-otlp-proto-http in /usr/local/lib/python3.10/dist-packages (1.28.2)\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.46.3)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.26.4)\n",
"Requirement already satisfied: chromadb-client in /usr/local/lib/python3.10/dist-packages (0.5.23)\n",
"Requirement already satisfied: openai in /usr/local/lib/python3.10/dist-packages (1.54.5)\n",
"Requirement already satisfied: redis in /usr/local/lib/python3.10/dist-packages (5.2.1)\n",
"Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.2.0)\n",
"Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.8.0)\n",
"Requirement already satisfied: together in /usr/local/lib/python3.10/dist-packages (1.3.5)\n",
"Requirement already satisfied: fastapi in /usr/local/lib/python3.10/dist-packages (0.115.6)\n",
"Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (0.7.0)\n",
"Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (0.28.1)\n",
"Requirement already satisfied: uvicorn in /usr/local/lib/python3.10/dist-packages (0.32.1)\n",
"Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile) (3.21.0)\n",
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile) (2.2.3)\n",
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile) (5.3.0)\n",
"Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile) (3.16.1)\n",
"Requirement already satisfied: opentelemetry-api==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (1.28.2)\n",
"Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (0.49b2)\n",
"Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (4.12.2)\n",
"Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n",
"Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n",
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n",
"Requirement already satisfied: chevron in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.14.0)\n",
"Requirement already satisfied: levenshtein in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.26.1)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from autoevals) (6.0.2)\n",
"Requirement already satisfied: braintrust_core==0.0.54 in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.0.54)\n",
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (from autoevals) (4.23.0)\n",
"Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.7)\n",
"Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk) (2024.9.11)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from faiss-cpu) (24.2)\n",
"Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.66.0)\n",
"Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.28.2)\n",
"Requirement already satisfied: opentelemetry-proto==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.28.2)\n",
"Requirement already satisfied: requests~=2.7 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (2.32.3)\n",
"Requirement already satisfied: protobuf<6.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http) (5.29.1)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.26.5)\n",
"Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n",
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n",
"Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (1.28.2)\n",
"Requirement already satisfied: overrides>=7.3.1 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (7.7.0)\n",
"Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (3.7.4)\n",
"Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (2.10.3)\n",
"Requirement already satisfied: tenacity>=8.2.3 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (9.0.0)\n",
"Requirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (3.10.12)\n",
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n",
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from openai) (1.9.0)\n",
"Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from openai) (0.8.2)\n",
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.1)\n",
"Requirement already satisfied: async-timeout>=4.0.3 in /usr/local/lib/python3.10/dist-packages (from redis) (4.0.3)\n",
"Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n",
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n",
"Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\n",
"Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n",
"Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.11.10)\n",
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.3.1)\n",
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.55.2)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.7)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.2.0)\n",
"Requirement already satisfied: eval-type-backport<0.3.0,>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from together) (0.2.0)\n",
"Requirement already satisfied: rich<14.0.0,>=13.8.1 in /usr/local/lib/python3.10/dist-packages (from together) (13.9.4)\n",
"Requirement already satisfied: tabulate<0.10.0,>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from together) (0.9.0)\n",
"Requirement already satisfied: typer<0.14,>=0.9 in /usr/local/lib/python3.10/dist-packages (from together) (0.13.1)\n",
"Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /usr/local/lib/python3.10/dist-packages (from fastapi) (0.41.3)\n",
"Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire) (2.5.0)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx) (2024.8.30)\n",
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx) (1.0.7)\n",
"Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx) (3.10)\n",
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx) (0.14.0)\n",
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.4)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
"Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (0.2.1)\n",
"Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.18.3)\n",
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.2)\n",
"Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n",
"Requirement already satisfied: grpcio<2.0.0,>=1.63.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb-client) (1.68.1)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (1.17.0)\n",
"Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (1.6)\n",
"Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (2.2.1)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb-client) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb-client) (2.27.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests~=2.7->opentelemetry-exporter-otlp-proto-http) (3.4.0)\n",
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=13.8.1->together) (3.0.0)\n",
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=13.8.1->together) (2.18.0)\n",
"Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<0.14,>=0.9->together) (1.5.4)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (2024.10.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (0.35.1)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (0.22.3)\n",
"Requirement already satisfied: rapidfuzz<4.0.0,>=3.9.0 in /usr/local/lib/python3.10/dist-packages (from levenshtein->autoevals) (3.10.1)\n",
"Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n",
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14.0.0,>=13.8.1->together) (0.1.2)\n",
"sentence-transformers --no-deps\n",
"Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (3.2.1)\n",
"torch --index-url https://download.pytorch.org/whl/cpu\n",
"Looking in indexes: https://download.pytorch.org/whl/cpu\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.5.1+cu121)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.16.1)\n",
"Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.2)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.9.0)\n",
"Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n",
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.2)\n",
"\u001b[32mBuild Successful!\u001b[0m\n"
]
}
],
"source": [ "source": [
"# NBVAL_SKIP\n", "# NBVAL_SKIP\n",
"!llama stack build --template together --image-type venv --image-name __system__" "!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv"
] ]
}, },
{ {

View file

@ -27,19 +27,19 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)` - `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)` - `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)` - `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)` - `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)` - `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)` - `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)` - `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)` - `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)` - `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)` - `nvidia/llama-3.2-nv-embedqa-1b-v2 `
- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)` - `nvidia/nv-embedqa-e5-v5 `
- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)` - `nvidia/nv-embedqa-mistral-7b-v2 `
- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)` - `snowflake/arctic-embed-l `
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -34,9 +34,9 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)` - `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)` - `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)` - `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -27,8 +27,8 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)` - `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)` - `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)`
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -37,17 +37,17 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)` - `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)` - `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)` - `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)` - `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)` - `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)` - `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)` - `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)` - `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)` - `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)`
- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)` - `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)` - `nomic-ai/nomic-embed-text-v1.5 `
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -37,11 +37,11 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)` - `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)` - `groq/llama-3.1-8b-instant `
- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)` - `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)` - `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)` - `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -141,17 +141,21 @@ ollama run <model_name>
To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama. To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
``` ```
$ ollama ps $ ollama ps
NAME ID SIZE PROCESSOR UNTIL
NAME ID SIZE PROCESSOR UNTIL llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now
llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now
``` ```
To verify that the model served by ollama is correctly connected to Llama Stack server To verify that the model served by ollama is correctly connected to Llama Stack server
```bash ```bash
$ llama-stack-client models list $ llama-stack-client models list
+----------------------+----------------------+---------------+-----------------------------------------------+
| identifier | llama_model | provider_id | metadata | Available Models
+======================+======================+===============+===============================================+
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} | ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
+----------------------+----------------------+---------------+-----------------------------------------------+ ┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │
└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
Total models: 1
``` ```

View file

@ -34,15 +34,15 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)` - `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)` - `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)` - `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)` - `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)` - `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)` - `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)` - `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)` - `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)` - `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -37,17 +37,17 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
- `meta-llama/Llama-3.1-8B-Instruct` - `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)`
- `meta-llama/Llama-3.1-70B-Instruct` - `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)`
- `meta-llama/Llama-3.1-405B-Instruct-FP8` - `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
- `meta-llama/Llama-3.2-3B-Instruct` - `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)`
- `meta-llama/Llama-3.2-11B-Vision-Instruct` - `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
- `meta-llama/Llama-3.2-90B-Vision-Instruct` - `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
- `meta-llama/Llama-3.3-70B-Instruct` - `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)`
- `meta-llama/Llama-Guard-3-8B` - `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
- `meta-llama/Llama-Guard-3-11B-Vision` - `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
- `togethercomputer/m2-bert-80M-8k-retrieval` - `togethercomputer/m2-bert-80M-8k-retrieval `
- `togethercomputer/m2-bert-80M-32k-retrieval` - `togethercomputer/m2-bert-80M-32k-retrieval `
### Prerequisite: API Keys ### Prerequisite: API Keys

View file

@ -102,12 +102,18 @@ Let's use the `llama-stack-client` CLI to check the connectivity to the server.
$ llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT $ llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT
> Enter the API key (leave empty if no key is needed): > Enter the API key (leave empty if no key is needed):
Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321 Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321
$ llama-stack-client models list $ llama-stack-client models list
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃ Available Models
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
│ meta-llama/Llama-3.2-3B-Instruct │ ollama │ llama3.2:3b-instruct-fp16 │ │ ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘ ┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │
└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
Total models: 1
``` ```
You can test basic Llama inference completion using the CLI too. You can test basic Llama inference completion using the CLI too.

View file

@ -58,11 +58,15 @@ llama-stack-client providers list
llama-stack-client models list llama-stack-client models list
``` ```
``` ```
+----------------------+----------------------+---------------+----------------------------------------------------------+ Available Models
| identifier | llama_model | provider_id | metadata |
+======================+======================+===============+==========================================================+ ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | tgi0 | {'huggingface_repo': 'meta-llama/Llama-3.1-8B-Instruct'} | ┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃
+----------------------+----------------------+---------------+----------------------------------------------------------+ ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │
└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
Total models: 1
``` ```
### `llama-stack-client models get` ### `llama-stack-client models get`

View file

@ -9,6 +9,7 @@ import argparse
from .download import Download from .download import Download
from .model import ModelParser from .model import ModelParser
from .stack import StackParser from .stack import StackParser
from .stack.utils import print_subcommand_description
from .verify_download import VerifyDownload from .verify_download import VerifyDownload
@ -20,6 +21,7 @@ class LlamaCLIParser:
prog="llama", prog="llama",
description="Welcome to the Llama CLI", description="Welcome to the Llama CLI",
add_help=True, add_help=True,
formatter_class=argparse.RawTextHelpFormatter,
) )
# Default command is to print help # Default command is to print help
@ -33,6 +35,8 @@ class LlamaCLIParser:
Download.create(subparsers) Download.create(subparsers)
VerifyDownload.create(subparsers) VerifyDownload.create(subparsers)
print_subcommand_description(self.parser, subparsers)
def parse_args(self) -> argparse.Namespace: def parse_args(self) -> argparse.Namespace:
return self.parser.parse_args() return self.parser.parse_args()

View file

@ -12,6 +12,7 @@ from llama_stack.cli.model.list import ModelList
from llama_stack.cli.model.prompt_format import ModelPromptFormat from llama_stack.cli.model.prompt_format import ModelPromptFormat
from llama_stack.cli.model.remove import ModelRemove from llama_stack.cli.model.remove import ModelRemove
from llama_stack.cli.model.verify_download import ModelVerifyDownload from llama_stack.cli.model.verify_download import ModelVerifyDownload
from llama_stack.cli.stack.utils import print_subcommand_description
from llama_stack.cli.subcommand import Subcommand from llama_stack.cli.subcommand import Subcommand
@ -24,6 +25,7 @@ class ModelParser(Subcommand):
"model", "model",
prog="llama model", prog="llama model",
description="Work with llama models", description="Work with llama models",
formatter_class=argparse.RawTextHelpFormatter,
) )
self.parser.set_defaults(func=lambda args: self.parser.print_help()) self.parser.set_defaults(func=lambda args: self.parser.print_help())
@ -37,3 +39,5 @@ class ModelParser(Subcommand):
ModelDescribe.create(subparsers) ModelDescribe.create(subparsers)
ModelVerifyDownload.create(subparsers) ModelVerifyDownload.create(subparsers)
ModelRemove.create(subparsers) ModelRemove.create(subparsers)
print_subcommand_description(self.parser, subparsers)

View file

@ -38,7 +38,7 @@ from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.resolver import InvalidProviderError from llama_stack.distribution.resolver import InvalidProviderError
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.distribution.utils.exec import formulate_run_args, in_notebook, run_with_pty from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty
from llama_stack.distribution.utils.image_types import ImageType from llama_stack.distribution.utils.image_types import ImageType
from llama_stack.providers.datatypes import Api from llama_stack.providers.datatypes import Api
@ -65,8 +65,6 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
if args.image_type == "venv": if args.image_type == "venv":
current_venv = os.environ.get("VIRTUAL_ENV") current_venv = os.environ.get("VIRTUAL_ENV")
image_name = args.image_name or current_venv image_name = args.image_name or current_venv
if not image_name and in_notebook():
image_name = "__system__"
elif args.image_type == "conda": elif args.image_type == "conda":
current_conda_env = os.environ.get("CONDA_DEFAULT_ENV") current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
image_name = args.image_name or current_conda_env image_name = args.image_name or current_conda_env
@ -291,6 +289,8 @@ def _run_stack_build_command_from_build_config(
if not image_name: if not image_name:
raise ValueError("Please specify an image name when building a conda image") raise ValueError("Please specify an image name when building a conda image")
elif build_config.image_type == ImageType.venv.value: elif build_config.image_type == ImageType.venv.value:
if not image_name and os.environ.get("UV_SYSTEM_PYTHON"):
image_name = "__system__"
if not image_name: if not image_name:
raise ValueError("Please specify an image name when building a venv image") raise ValueError("Please specify an image name when building a venv image")

View file

@ -7,6 +7,7 @@
import argparse import argparse
from importlib.metadata import version from importlib.metadata import version
from llama_stack.cli.stack.utils import print_subcommand_description
from llama_stack.cli.subcommand import Subcommand from llama_stack.cli.subcommand import Subcommand
from .build import StackBuild from .build import StackBuild
@ -22,6 +23,7 @@ class StackParser(Subcommand):
"stack", "stack",
prog="llama stack", prog="llama stack",
description="Operations for the Llama Stack / Distributions", description="Operations for the Llama Stack / Distributions",
formatter_class=argparse.RawTextHelpFormatter,
) )
self.parser.add_argument( self.parser.add_argument(
@ -39,3 +41,5 @@ class StackParser(Subcommand):
StackListApis.create(subparsers) StackListApis.create(subparsers)
StackListProviders.create(subparsers) StackListProviders.create(subparsers)
StackRun.create(subparsers) StackRun.create(subparsers)
print_subcommand_description(self.parser, subparsers)

View file

@ -0,0 +1,14 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
def print_subcommand_description(parser, subparsers):
"""Print descriptions of subcommands."""
description_text = ""
for name, subcommand in subparsers.choices.items():
description = subcommand.description
description_text += f" {name:<21} {description}\n"
parser.epilog = description_text

View file

@ -1059,9 +1059,6 @@ async def execute_tool_call_maybe(
group_name = tool_to_group.get(name, None) group_name = tool_to_group.get(name, None)
if group_name is None: if group_name is None:
raise ValueError(f"Tool {name} not found in any tool group") raise ValueError(f"Tool {name} not found in any tool group")
# get the arguments generated by the model and augment with toolgroup arg overrides for the agent
tool_call_args = tool_call.arguments
tool_call_args.update(toolgroup_args.get(group_name, {}))
if isinstance(name, BuiltinTool): if isinstance(name, BuiltinTool):
if name == BuiltinTool.brave_search: if name == BuiltinTool.brave_search:
name = WEB_SEARCH_TOOL name = WEB_SEARCH_TOOL
@ -1070,10 +1067,12 @@ async def execute_tool_call_maybe(
result = await tool_runtime_api.invoke_tool( result = await tool_runtime_api.invoke_tool(
tool_name=name, tool_name=name,
kwargs=dict( kwargs={
session_id=session_id, "session_id": session_id,
**tool_call_args, # get the arguments generated by the model and augment with toolgroup arg overrides for the agent
), **tool_call.arguments,
**toolgroup_args.get(group_name, {}),
},
) )
return result return result

View file

@ -11,5 +11,5 @@ from pydantic import BaseModel
class SentenceTransformersInferenceConfig(BaseModel): class SentenceTransformersInferenceConfig(BaseModel):
@classmethod @classmethod
def sample_run_config(cls) -> Dict[str, Any]: def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
return {} return {}

View file

@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
) )
from .config import CerebrasImplConfig from .config import CerebrasImplConfig
from .models import model_entries from .models import MODEL_ENTRIES
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference): class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
def __init__(self, config: CerebrasImplConfig) -> None: def __init__(self, config: CerebrasImplConfig) -> None:
ModelRegistryHelper.__init__( ModelRegistryHelper.__init__(
self, self,
model_entries=model_entries, model_entries=MODEL_ENTRIES,
) )
self.config = config self.config = config

View file

@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry, build_hf_repo_model_entry,
) )
model_entries = [ MODEL_ENTRIES = [
build_hf_repo_model_entry( build_hf_repo_model_entry(
"llama3.1-8b", "llama3.1-8b",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,

View file

@ -5,10 +5,13 @@
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.models.llama.sku_list import CoreModelId from llama_stack.models.llama.sku_list import CoreModelId
from llama_stack.providers.utils.inference.model_registry import build_model_entry from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry,
build_model_entry,
)
MODEL_ENTRIES = [ MODEL_ENTRIES = [
build_model_entry( build_hf_repo_model_entry(
"groq/llama3-8b-8192", "groq/llama3-8b-8192",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
@ -16,11 +19,11 @@ MODEL_ENTRIES = [
"groq/llama-3.1-8b-instant", "groq/llama-3.1-8b-instant",
CoreModelId.llama3_1_8b_instruct.value, CoreModelId.llama3_1_8b_instruct.value,
), ),
build_model_entry( build_hf_repo_model_entry(
"groq/llama3-70b-8192", "groq/llama3-70b-8192",
CoreModelId.llama3_70b_instruct.value, CoreModelId.llama3_70b_instruct.value,
), ),
build_model_entry( build_hf_repo_model_entry(
"groq/llama-3.3-70b-versatile", "groq/llama-3.3-70b-versatile",
CoreModelId.llama3_3_70b_instruct.value, CoreModelId.llama3_3_70b_instruct.value,
), ),
@ -28,7 +31,7 @@ MODEL_ENTRIES = [
# Preview models aren't recommended for production use, but we include this one # Preview models aren't recommended for production use, but we include this one
# to pass the test fixture # to pass the test fixture
# TODO(aidand): Replace this with a stable model once Groq supports it # TODO(aidand): Replace this with a stable model once Groq supports it
build_model_entry( build_hf_repo_model_entry(
"groq/llama-3.2-3b-preview", "groq/llama-3.2-3b-preview",
CoreModelId.llama3_2_3b_instruct.value, CoreModelId.llama3_2_3b_instruct.value,
), ),

View file

@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import (
build_hf_repo_model_entry, build_hf_repo_model_entry,
) )
_MODEL_ENTRIES = [ MODEL_ENTRIES = [
build_hf_repo_model_entry( build_hf_repo_model_entry(
"meta/llama3-8b-instruct", "meta/llama3-8b-instruct",
CoreModelId.llama3_8b_instruct.value, CoreModelId.llama3_8b_instruct.value,

View file

@ -8,7 +8,7 @@ import logging
import warnings import warnings
from typing import AsyncIterator, List, Optional, Union from typing import AsyncIterator, List, Optional, Union
from openai import APIConnectionError, AsyncOpenAI from openai import APIConnectionError, AsyncOpenAI, BadRequestError
from llama_stack.apis.common.content_types import ( from llama_stack.apis.common.content_types import (
InterleavedContent, InterleavedContent,
@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
from . import NVIDIAConfig from . import NVIDIAConfig
from .models import _MODEL_ENTRIES from .models import MODEL_ENTRIES
from .openai_utils import ( from .openai_utils import (
convert_chat_completion_request, convert_chat_completion_request,
convert_completion_request, convert_completion_request,
@ -62,7 +62,7 @@ logger = logging.getLogger(__name__)
class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
def __init__(self, config: NVIDIAConfig) -> None: def __init__(self, config: NVIDIAConfig) -> None:
# TODO(mf): filter by available models # TODO(mf): filter by available models
ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES) ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...") logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
@ -144,19 +144,38 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
# #
# we can ignore str and always pass List[str] to OpenAI # we can ignore str and always pass List[str] to OpenAI
# #
flat_contents = [ flat_contents = [content.text if isinstance(content, TextContentItem) else content for content in contents]
item.text if isinstance(item, TextContentItem) else item
for content in contents
for item in (content if isinstance(content, list) else [content])
]
input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents] input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents]
model = self.get_provider_model_id(model_id) model = self.get_provider_model_id(model_id)
response = await self._client.embeddings.create( extra_body = {}
model=model,
input=input, if text_truncation is not None:
# extra_body={"input_type": "passage"|"query"}, # TODO(mf): how to tell caller's intent? text_truncation_options = {
) TextTruncation.none: "NONE",
TextTruncation.end: "END",
TextTruncation.start: "START",
}
extra_body["truncate"] = text_truncation_options[text_truncation]
if output_dimension is not None:
extra_body["dimensions"] = output_dimension
if task_type is not None:
task_type_options = {
EmbeddingTaskType.document: "passage",
EmbeddingTaskType.query: "query",
}
extra_body["input_type"] = task_type_options[task_type]
try:
response = await self._client.embeddings.create(
model=model,
input=input,
extra_body=extra_body,
)
except BadRequestError as e:
raise ValueError(f"Failed to get embeddings: {e}") from e
# #
# OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=List[float], ...)], ...) # OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=List[float], ...)], ...)

View file

@ -111,7 +111,8 @@
"first_name": "Michael", "first_name": "Michael",
"last_name": "Jordan", "last_name": "Jordan",
"year_of_birth": 1963, "year_of_birth": 1963,
"num_seasons_in_nba": 15 "num_seasons_in_nba": 15,
"year_for_draft": 1984
} }
} }
}, },

View file

@ -126,6 +126,37 @@ class LiteLLMOpenAIMixin(
): ):
yield chunk yield chunk
def _add_additional_properties_recursive(self, schema):
"""
Recursively add additionalProperties: False to all object schemas
"""
if isinstance(schema, dict):
if schema.get("type") == "object":
schema["additionalProperties"] = False
# Add required field with all property keys if properties exist
if "properties" in schema and schema["properties"]:
schema["required"] = list(schema["properties"].keys())
if "properties" in schema:
for prop_schema in schema["properties"].values():
self._add_additional_properties_recursive(prop_schema)
for key in ["anyOf", "allOf", "oneOf"]:
if key in schema:
for sub_schema in schema[key]:
self._add_additional_properties_recursive(sub_schema)
if "not" in schema:
self._add_additional_properties_recursive(schema["not"])
# Handle $defs/$ref
if "$defs" in schema:
for def_schema in schema["$defs"].values():
self._add_additional_properties_recursive(def_schema)
return schema
async def _get_params(self, request: ChatCompletionRequest) -> dict: async def _get_params(self, request: ChatCompletionRequest) -> dict:
input_dict = {} input_dict = {}
@ -140,6 +171,10 @@ class LiteLLMOpenAIMixin(
name = fmt["title"] name = fmt["title"]
del fmt["title"] del fmt["title"]
fmt["additionalProperties"] = False fmt["additionalProperties"] = False
# Apply additionalProperties: False recursively to all objects
fmt = self._add_additional_properties_recursive(fmt)
input_dict["response_format"] = { input_dict["response_format"] = {
"type": "json_schema", "type": "json_schema",
"json_schema": { "json_schema": {

View file

@ -27,7 +27,9 @@ from openai.types.chat import (
from openai.types.chat import ( from openai.types.chat import (
ChatCompletionMessageParam as OpenAIChatCompletionMessage, ChatCompletionMessageParam as OpenAIChatCompletionMessage,
) )
from openai.types.chat import ChatCompletionMessageToolCall from openai.types.chat import (
ChatCompletionMessageToolCall,
)
from openai.types.chat import ( from openai.types.chat import (
ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall, ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall,
) )
@ -199,7 +201,9 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: Optional[Unio
return None return None
def process_completion_response(response: OpenAICompatCompletionResponse) -> CompletionResponse: def process_completion_response(
response: OpenAICompatCompletionResponse,
) -> CompletionResponse:
choice = response.choices[0] choice = response.choices[0]
# drop suffix <eot_id> if present and return stop reason as end of turn # drop suffix <eot_id> if present and return stop reason as end of turn
if choice.text.endswith("<|eot_id|>"): if choice.text.endswith("<|eot_id|>"):
@ -492,7 +496,9 @@ class UnparseableToolCall(BaseModel):
arguments: str = "" arguments: str = ""
async def convert_message_to_openai_dict_new(message: Message | Dict) -> OpenAIChatCompletionMessage: async def convert_message_to_openai_dict_new(
message: Message | Dict,
) -> OpenAIChatCompletionMessage:
""" """
Convert a Message to an OpenAI API-compatible dictionary. Convert a Message to an OpenAI API-compatible dictionary.
""" """
@ -942,7 +948,7 @@ async def convert_openai_chat_completion_stream(
) )
yield ChatCompletionResponseStreamChunk( yield ChatCompletionResponseStreamChunk(
event=ChatCompletionResponseEvent( event=ChatCompletionResponseEvent(
event_type=ChatCompletionResponseEventType.complete, event_type=ChatCompletionResponseEventType.progress,
delta=ToolCallDelta( delta=ToolCallDelta(
tool_call=tool_call, tool_call=tool_call,
parse_status=ToolCallParseStatus.succeeded, parse_status=ToolCallParseStatus.succeeded,

View file

@ -6,12 +6,10 @@
from pathlib import Path from pathlib import Path
from llama_stack.apis.models import ModelInput
from llama_stack.distribution.datatypes import Provider, ToolGroupInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate:
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
"bedrock": MODEL_ENTRIES,
}
default_models = get_model_registry(available_models)
default_models = [
ModelInput(
model_id=core_model_to_hf_repo[m.llama_model],
provider_model_id=m.provider_model_id,
provider_id="bedrock",
)
for m in MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -28,7 +28,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -88,16 +88,31 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
models: models:
- metadata: {}
model_id: meta.llama3-1-8b-instruct-v1:0
provider_id: bedrock
provider_model_id: meta.llama3-1-8b-instruct-v1:0
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: bedrock provider_id: bedrock
provider_model_id: meta.llama3-1-8b-instruct-v1:0 provider_model_id: meta.llama3-1-8b-instruct-v1:0
model_type: llm model_type: llm
- metadata: {}
model_id: meta.llama3-1-70b-instruct-v1:0
provider_id: bedrock
provider_model_id: meta.llama3-1-70b-instruct-v1:0
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: bedrock provider_id: bedrock
provider_model_id: meta.llama3-1-70b-instruct-v1:0 provider_model_id: meta.llama3-1-70b-instruct-v1:0
model_type: llm model_type: llm
- metadata: {}
model_id: meta.llama3-1-405b-instruct-v1:0
provider_id: bedrock
provider_model_id: meta.llama3-1-405b-instruct-v1:0
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: bedrock provider_id: bedrock

View file

@ -8,14 +8,13 @@ from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
from llama_stack.providers.remote.inference.cerebras.models import model_entries from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate:
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "cerebras": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model], default_models = get_model_registry(available_models)
provider_model_id=m.provider_model_id,
provider_id="cerebras",
)
for m in model_entries
]
embedding_model = ModelInput( embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2", model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers", provider_id="sentence-transformers",
@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -20,7 +20,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -90,11 +90,21 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
models: models:
- metadata: {}
model_id: llama3.1-8b
provider_id: cerebras
provider_model_id: llama3.1-8b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: cerebras provider_id: cerebras
provider_model_id: llama3.1-8b provider_model_id: llama3.1-8b
model_type: llm model_type: llm
- metadata: {}
model_id: llama-3.3-70b
provider_id: cerebras
provider_model_id: llama-3.3-70b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: cerebras provider_id: cerebras

View file

@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate:
provider_id="code-interpreter", provider_id="code-interpreter",
), ),
] ]
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "fireworks": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, default_models = get_model_registry(available_models)
provider_id="fireworks",
model_type=m.model_type,
metadata=m.metadata,
)
for m in MODEL_ENTRIES
]
embedding_model = ModelInput( embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2", model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers", provider_id="sentence-transformers",
@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=default_models + [embedding_model], available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -90,51 +90,112 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
models: models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm model_type: llm
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768
context_length: 8192 context_length: 8192
model_id: nomic-ai/nomic-embed-text-v1.5 model_id: nomic-ai/nomic-embed-text-v1.5
provider_id: fireworks provider_id: fireworks
provider_model_id: nomic-ai/nomic-embed-text-v1.5
model_type: embedding model_type: embedding
- metadata: - metadata:
embedding_dimension: 384 embedding_dimension: 384

View file

@ -3,7 +3,6 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from pathlib import Path
from llama_stack.apis.models.models import ModelType from llama_stack.apis.models.models import ModelType
from llama_stack.distribution.datatypes import ( from llama_stack.distribution.datatypes import (
@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
distro_type="self_hosted", distro_type="self_hosted",
description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container", description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, embedding_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]: def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
), ),
] ]
inference_providers = [] inference_providers = []
default_models = [] available_models = {}
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
for provider_id, model_entries, config in providers: for provider_id, model_entries, config in providers:
inference_providers.append( inference_providers.append(
Provider( Provider(
@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
config=config, config=config,
) )
) )
default_models.extend( available_models[provider_id] = model_entries
ModelInput( return inference_providers, available_models
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
provider_model_id=m.provider_model_id,
provider_id=provider_id,
model_type=m.model_type,
metadata=m.metadata,
)
for m in model_entries
)
return inference_providers, default_models
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
inference_providers, default_models = get_inference_providers() inference_providers, available_models = get_inference_providers()
providers = { providers = {
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]), "inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"], "vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
}, },
) )
default_models = get_model_registry(available_models)
return DistributionTemplate( return DistributionTemplate(
name=name, name=name,
distro_type="self_hosted", distro_type="self_hosted",
@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=[], available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -136,51 +136,101 @@ models:
provider_id: openai provider_id: openai
provider_model_id: openai/text-embedding-3-large provider_model_id: openai/text-embedding-3-large
model_type: embedding model_type: embedding
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks provider_id: fireworks
@ -247,25 +297,45 @@ models:
provider_model_id: gemini/text-embedding-004 provider_model_id: gemini/text-embedding-004
model_type: embedding model_type: embedding
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: groq/llama3-8b-8192
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-8b-8192 provider_model_id: groq/llama3-8b-8192
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-8b-8192
model_type: llm
- metadata: {}
model_id: groq/llama-3.1-8b-instant
provider_id: groq
provider_model_id: groq/llama-3.1-8b-instant provider_model_id: groq/llama-3.1-8b-instant
model_type: llm model_type: llm
- metadata: {}
model_id: groq/llama3-70b-8192
provider_id: groq
provider_model_id: groq/llama3-70b-8192
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct model_id: meta-llama/Llama-3-70B-Instruct
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-70b-8192 provider_model_id: groq/llama3-70b-8192
model_type: llm model_type: llm
- metadata: {}
model_id: groq/llama-3.3-70b-versatile
provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm model_type: llm
- metadata: {}
model_id: groq/llama-3.2-3b-preview
provider_id: groq
provider_model_id: groq/llama-3.2-3b-preview
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: groq provider_id: groq

View file

@ -30,7 +30,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate:
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"), config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "fireworks": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, default_models = get_model_registry(available_models)
provider_model_id=m.provider_model_id,
provider_id="fireworks",
metadata=m.metadata,
model_type=m.model_type,
)
for m in MODEL_ENTRIES
]
embedding_model = ModelInput( embedding_model = ModelInput(
model_id="all-MiniLM-L6-v2", model_id="all-MiniLM-L6-v2",
provider_id="sentence-transformers", provider_id="sentence-transformers",
@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -99,51 +99,101 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models: models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks provider_id: fireworks

View file

@ -93,51 +93,101 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
models: models:
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-8b
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: fireworks provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-8b provider_model_id: accounts/fireworks/models/llama-guard-3-8b
model_type: llm model_type: llm
- metadata: {}
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
provider_id: fireworks
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: fireworks provider_id: fireworks

View file

@ -30,7 +30,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import (
Provider, Provider,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.remote.inference.groq import GroqConfig from llama_stack.providers.remote.inference.groq import GroqConfig
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate:
}, },
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "groq": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id, default_models = get_model_registry(available_models)
provider_model_id=m.provider_model_id,
provider_id=name,
model_type=m.model_type,
metadata=m.metadata,
)
for m in MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate:
docker_image=None, docker_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -91,25 +91,45 @@ metadata_store:
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
models: models:
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: groq/llama3-8b-8192
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-8b-8192 provider_model_id: groq/llama3-8b-8192
model_type: llm model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-8b-8192
model_type: llm
- metadata: {}
model_id: groq/llama-3.1-8b-instant
provider_id: groq
provider_model_id: groq/llama-3.1-8b-instant provider_model_id: groq/llama-3.1-8b-instant
model_type: llm model_type: llm
- metadata: {}
model_id: groq/llama3-70b-8192
provider_id: groq
provider_model_id: groq/llama3-70b-8192
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct model_id: meta-llama/Llama-3-70B-Instruct
provider_id: groq provider_id: groq
provider_model_id: groq/llama3-70b-8192 provider_model_id: groq/llama3-70b-8192
model_type: llm model_type: llm
- metadata: {}
model_id: groq/llama-3.3-70b-versatile
provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: groq provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm model_type: llm
- metadata: {}
model_id: groq/llama-3.2-3b-preview
provider_id: groq
provider_model_id: groq/llama-3.2-3b-preview
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: groq provider_id: groq

View file

@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate:
description="Use Meta Reference for running LLM inference", description="Use Meta Reference for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
description="Use Meta Reference with fp8, int4 quantization for running LLM inference", description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -20,7 +20,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -6,11 +6,10 @@
from pathlib import Path from pathlib import Path
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput from llama_stack.distribution.datatypes import Provider, ToolGroupInput
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate:
config=NVIDIAConfig.sample_run_config(), config=NVIDIAConfig.sample_run_config(),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ "nvidia": MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
provider_model_id=m.provider_model_id,
provider_id="nvidia",
model_type=m.model_type,
metadata=m.metadata,
)
for m in _MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
), ),
] ]
default_models = get_model_registry(available_models)
return DistributionTemplate( return DistributionTemplate(
name="nvidia", name="nvidia",
distro_type="remote_hosted", distro_type="remote_hosted",
@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -90,46 +90,91 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
models: models:
- metadata: {}
model_id: meta/llama3-8b-instruct
provider_id: nvidia
provider_model_id: meta/llama3-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3-8B-Instruct model_id: meta-llama/Llama-3-8B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama3-8b-instruct provider_model_id: meta/llama3-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama3-70b-instruct
provider_id: nvidia
provider_model_id: meta/llama3-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3-70B-Instruct model_id: meta-llama/Llama-3-70B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama3-70b-instruct provider_model_id: meta/llama3-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.1-8b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.1-8b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.1-8b-instruct provider_model_id: meta/llama-3.1-8b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.1-70b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.1-70b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.1-70b-instruct provider_model_id: meta/llama-3.1-70b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.1-405b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.1-405b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.1-405b-instruct provider_model_id: meta/llama-3.1-405b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.2-1b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.2-1b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.2-1b-instruct provider_model_id: meta/llama-3.2-1b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.2-3b-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.2-3b-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.2-3b-instruct provider_model_id: meta/llama-3.2-3b-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.2-11b-vision-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.2-11b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: nvidia provider_id: nvidia
provider_model_id: meta/llama-3.2-11b-vision-instruct provider_model_id: meta/llama-3.2-11b-vision-instruct
model_type: llm model_type: llm
- metadata: {}
model_id: meta/llama-3.2-90b-vision-instruct
provider_id: nvidia
provider_model_id: meta/llama-3.2-90b-vision-instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: nvidia provider_id: nvidia

View file

@ -130,17 +130,21 @@ ollama run <model_name>
To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama. To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
``` ```
$ ollama ps $ ollama ps
NAME ID SIZE PROCESSOR UNTIL
NAME ID SIZE PROCESSOR UNTIL llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now
llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now
``` ```
To verify that the model served by ollama is correctly connected to Llama Stack server To verify that the model served by ollama is correctly connected to Llama Stack server
```bash ```bash
$ llama-stack-client models list $ llama-stack-client models list
+----------------------+----------------------+---------------+-----------------------------------------------+
| identifier | llama_model | provider_id | metadata | Available Models
+======================+======================+===============+===============================================+
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} | ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
+----------------------+----------------------+---------------+-----------------------------------------------+ ┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │
└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
Total models: 1
``` ```

View file

@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate:
description="Use (an external) vLLM server for running LLM inference", description="Use (an external) vLLM server for running LLM inference",
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -30,7 +30,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }} ({{ model.provider_model_id }})` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -68,46 +68,91 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
models: models:
- metadata: {}
model_id: Meta-Llama-3.1-8B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-8B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.1-8B-Instruct provider_model_id: Meta-Llama-3.1-8B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.1-70B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-70B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.1-70B-Instruct provider_model_id: Meta-Llama-3.1-70B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.1-405B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.1-405B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.1-405B-Instruct provider_model_id: Meta-Llama-3.1-405B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.2-1B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.2-1B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-1B-Instruct model_id: meta-llama/Llama-3.2-1B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.2-1B-Instruct provider_model_id: Meta-Llama-3.2-1B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.2-3B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.2-3B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.2-3B-Instruct provider_model_id: Meta-Llama-3.2-3B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-3.3-70B-Instruct
provider_id: sambanova
provider_model_id: Meta-Llama-3.3-70B-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Meta-Llama-3.3-70B-Instruct provider_model_id: Meta-Llama-3.3-70B-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Llama-3.2-11B-Vision-Instruct
provider_id: sambanova
provider_model_id: Llama-3.2-11B-Vision-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Llama-3.2-11B-Vision-Instruct provider_model_id: Llama-3.2-11B-Vision-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Llama-3.2-90B-Vision-Instruct
provider_id: sambanova
provider_model_id: Llama-3.2-90B-Vision-Instruct
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: sambanova provider_id: sambanova
provider_model_id: Llama-3.2-90B-Vision-Instruct provider_model_id: Llama-3.2-90B-Vision-Instruct
model_type: llm model_type: llm
- metadata: {}
model_id: Meta-Llama-Guard-3-8B
provider_id: sambanova
provider_model_id: Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: sambanova provider_id: sambanova

View file

@ -7,15 +7,13 @@
from pathlib import Path from pathlib import Path
from llama_stack.distribution.datatypes import ( from llama_stack.distribution.datatypes import (
ModelInput,
Provider, Provider,
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate:
config=SambaNovaImplConfig.sample_run_config(), config=SambaNovaImplConfig.sample_run_config(),
) )
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} available_models = {
default_models = [ name: MODEL_ENTRIES,
ModelInput( }
model_id=core_model_to_hf_repo[m.llama_model], default_models = get_model_registry(available_models)
provider_model_id=m.provider_model_id,
provider_id=name,
)
for m in MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
docker_image=None, docker_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import (
) )
from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.distribution import get_provider_registry
from llama_stack.distribution.utils.dynamic import instantiate_class_type from llama_stack.distribution.utils.dynamic import instantiate_class_type
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]:
models = []
for provider_id, entries in available_models.items():
for entry in entries:
ids = [entry.provider_model_id] + entry.aliases
for model_id in ids:
models.append(
ModelInput(
model_id=model_id,
provider_model_id=entry.provider_model_id,
provider_id=provider_id,
model_type=entry.model_type,
metadata=entry.metadata,
)
)
return models
class DefaultModel(BaseModel):
model_id: str
doc_string: str
class RunConfigSettings(BaseModel): class RunConfigSettings(BaseModel):
provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict) provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
default_models: Optional[List[ModelInput]] = None default_models: Optional[List[ModelInput]] = None
@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel):
run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
container_image: Optional[str] = None container_image: Optional[str] = None
default_models: Optional[List[ModelInput]] = None available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None
def build_config(self) -> BuildConfig: def build_config(self) -> BuildConfig:
return BuildConfig( return BuildConfig(
@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel):
autoescape=True, autoescape=True,
) )
template = env.from_string(template) template = env.from_string(template)
default_models = []
if self.available_models_by_provider:
has_multiple_providers = len(self.available_models_by_provider.keys()) > 1
for provider_id, model_entries in self.available_models_by_provider.items():
for model_entry in model_entries:
doc_parts = []
if model_entry.aliases:
doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}")
if has_multiple_providers:
doc_parts.append(f"provider: {provider_id}")
default_models.append(
DefaultModel(
model_id=model_entry.provider_model_id,
doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "",
)
)
return template.render( return template.render(
name=self.name, name=self.name,
description=self.description, description=self.description,
providers=self.providers, providers=self.providers,
providers_table=providers_table, providers_table=providers_table,
run_config_env_vars=self.run_config_env_vars, run_config_env_vars=self.run_config_env_vars,
default_models=self.default_models, default_models=default_models,
) )
def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None: def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:

View file

@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=[inference_model, safety_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -30,7 +30,7 @@ The following environment variables can be configured:
The following models are available by default: The following models are available by default:
{% for model in default_models %} {% for model in default_models %}
- `{{ model.model_id }}` - `{{ model.model_id }} {{ model.doc_string }}`
{% endfor %} {% endfor %}
{% endif %} {% endif %}

View file

@ -99,46 +99,91 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
models: models:
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-Guard-3-8B
provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: together provider_id: together

View file

@ -93,46 +93,91 @@ metadata_store:
type: sqlite type: sqlite
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
models: models:
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-8B-Instruct model_id: meta-llama/Llama-3.1-8B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-70B-Instruct model_id: meta-llama/Llama-3.1-70B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-3B-Instruct model_id: meta-llama/Llama-3.2-3B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: together provider_id: together
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Meta-Llama-Guard-3-8B
provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-8B model_id: meta-llama/Llama-Guard-3-8B
provider_id: together provider_id: together
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
model_type: llm model_type: llm
- metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
provider_id: together
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
model_type: llm
- metadata: {} - metadata: {}
model_id: meta-llama/Llama-Guard-3-11B-Vision model_id: meta-llama/Llama-Guard-3-11B-Vision
provider_id: together provider_id: together

View file

@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
ShieldInput, ShieldInput,
ToolGroupInput, ToolGroupInput,
) )
from llama_stack.models.llama.sku_list import all_registered_models
from llama_stack.providers.inline.inference.sentence_transformers import ( from llama_stack.providers.inline.inference.sentence_transformers import (
SentenceTransformersInferenceConfig, SentenceTransformersInferenceConfig,
) )
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
from llama_stack.providers.remote.inference.together import TogetherImplConfig from llama_stack.providers.remote.inference.together import TogetherImplConfig
from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
def get_distribution_template() -> DistributionTemplate: def get_distribution_template() -> DistributionTemplate:
@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate:
provider_type="inline::sentence-transformers", provider_type="inline::sentence-transformers",
config=SentenceTransformersInferenceConfig.sample_run_config(), config=SentenceTransformersInferenceConfig.sample_run_config(),
) )
available_models = {
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()} "together": MODEL_ENTRIES,
default_models = [ }
ModelInput( default_models = get_model_registry(available_models)
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
provider_model_id=m.provider_model_id,
provider_id="together",
metadata=m.metadata,
model_type=m.model_type,
)
for m in MODEL_ENTRIES
]
default_tool_groups = [ default_tool_groups = [
ToolGroupInput( ToolGroupInput(
toolgroup_id="builtin::websearch", toolgroup_id="builtin::websearch",
@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=Path(__file__).parent / "doc_template.md", template_path=Path(__file__).parent / "doc_template.md",
providers=providers, providers=providers,
default_models=default_models, available_models_by_provider=available_models,
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
container_image=None, container_image=None,
template_path=None, template_path=None,
providers=providers, providers=providers,
default_models=[inference_model],
run_configs={ run_configs={
"run.yaml": RunConfigSettings( "run.yaml": RunConfigSettings(
provider_overrides={ provider_overrides={

View file

@ -14,6 +14,23 @@
# - array of a text (TextContentItem) # - array of a text (TextContentItem)
# Types of output: # Types of output:
# - list of list of floats # - list of list of floats
# Params:
# - text_truncation
# - absent w/ long text -> error
# - none w/ long text -> error
# - absent w/ short text -> ok
# - none w/ short text -> ok
# - end w/ long text -> ok
# - end w/ short text -> ok
# - start w/ long text -> ok
# - start w/ short text -> ok
# - output_dimension
# - response dimension matches
# - task_type, only for asymmetric models
# - query embedding != passage embedding
# Negative:
# - long string
# - long text
# #
# Todo: # Todo:
# - negative tests # - negative tests
@ -23,8 +40,6 @@
# - empty text # - empty text
# - empty image # - empty image
# - long # - long
# - long string
# - long text
# - large image # - large image
# - appropriate combinations # - appropriate combinations
# - batch size # - batch size
@ -40,6 +55,7 @@
# #
import pytest import pytest
from llama_stack_client import BadRequestError
from llama_stack_client.types import EmbeddingsResponse from llama_stack_client.types import EmbeddingsResponse
from llama_stack_client.types.shared.interleaved_content import ( from llama_stack_client.types.shared.interleaved_content import (
ImageContentItem, ImageContentItem,
@ -50,8 +66,10 @@ from llama_stack_client.types.shared.interleaved_content import (
DUMMY_STRING = "hello" DUMMY_STRING = "hello"
DUMMY_STRING2 = "world" DUMMY_STRING2 = "world"
DUMMY_LONG_STRING = "NVDA " * 10240
DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text") DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text") DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text")
# TODO(mf): add a real image URL and base64 string # TODO(mf): add a real image URL and base64 string
DUMMY_IMAGE_URL = ImageContentItem( DUMMY_IMAGE_URL = ImageContentItem(
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image" image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
@ -89,10 +107,120 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents):
"list[url,string,base64,text]", "list[url,string,base64,text]",
], ],
) )
@pytest.mark.skip(reason="Media is not supported") @pytest.mark.xfail(reason="Media is not supported")
def test_embedding_image(llama_stack_client, embedding_model_id, contents): def test_embedding_image(llama_stack_client, embedding_model_id, contents):
response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
assert isinstance(response, EmbeddingsResponse) assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list) assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float) assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
"end",
"start",
],
)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_LONG_TEXT],
[DUMMY_STRING],
],
ids=[
"long",
"short",
],
)
def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents):
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=contents, text_truncation=text_truncation
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
None,
"none",
],
)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_LONG_TEXT],
[DUMMY_LONG_STRING],
],
ids=[
"long-text",
"long-str",
],
)
def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents):
with pytest.raises(BadRequestError) as excinfo:
llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation
)
@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction")
def test_embedding_output_dimension(llama_stack_client, embedding_model_id):
base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING])
test_response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32
)
assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
assert len(test_response.embeddings[0]) == 32
@pytest.mark.xfail(reason="Only valid for model supporting task type")
def test_embedding_task_type(llama_stack_client, embedding_model_id):
query_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
)
document_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document"
)
assert query_embedding.embeddings != document_embedding.embeddings
@pytest.mark.parametrize(
"text_truncation",
[
None,
"none",
"end",
"start",
],
)
def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation):
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
"NONE",
"END",
"START",
"left",
"right",
],
)
def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation):
with pytest.raises(BadRequestError) as excinfo:
llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
)

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import pytest import pytest
from pydantic import BaseModel from pydantic import BaseModel
@ -342,11 +343,15 @@ def test_text_chat_completion_with_tool_choice_none(client_with_models, text_mod
], ],
) )
def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case): def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
class NBAStats(BaseModel):
year_for_draft: int
num_seasons_in_nba: int
class AnswerFormat(BaseModel): class AnswerFormat(BaseModel):
first_name: str first_name: str
last_name: str last_name: str
year_of_birth: int year_of_birth: int
num_seasons_in_nba: int nba_stats: NBAStats
tc = TestCase(test_case) tc = TestCase(test_case)
@ -364,7 +369,8 @@ def test_text_chat_completion_structured_output(client_with_models, text_model_i
assert answer.first_name == expected["first_name"] assert answer.first_name == expected["first_name"]
assert answer.last_name == expected["last_name"] assert answer.last_name == expected["last_name"]
assert answer.year_of_birth == expected["year_of_birth"] assert answer.year_of_birth == expected["year_of_birth"]
assert answer.num_seasons_in_nba == expected["num_seasons_in_nba"] assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"]
assert answer.nba_stats.year_for_draft == expected["year_for_draft"]
@pytest.mark.parametrize("streaming", [True, False]) @pytest.mark.parametrize("streaming", [True, False])