mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-12 04:50:39 +00:00
Merge branch 'main' into max_infer_iters
This commit is contained in:
commit
e157f0ac89
68 changed files with 952 additions and 529 deletions
|
@ -70,6 +70,19 @@ $ uv pip install -e .
|
||||||
$ source .venv/bin/activate
|
$ source .venv/bin/activate
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Note that you can create a dotenv file `.env` that includes necessary environment variables:
|
||||||
|
```
|
||||||
|
LLAMA_STACK_BASE_URL=http://localhost:8321
|
||||||
|
LLAMA_STACK_CLIENT_LOG=debug
|
||||||
|
LLAMA_STACK_PORT=8321
|
||||||
|
LLAMA_STACK_CONFIG=
|
||||||
|
```
|
||||||
|
|
||||||
|
And then use this dotenv file when running client SDK tests via the following:
|
||||||
|
```bash
|
||||||
|
$ uv run --env-file .env -- pytest -v tests/client-sdk/inference/test_text_inference.py
|
||||||
|
```
|
||||||
|
|
||||||
## Pre-commit Hooks
|
## Pre-commit Hooks
|
||||||
|
|
||||||
We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running:
|
We use [pre-commit](https://pre-commit.com/) to run linting and formatting checks on your code. You can install the pre-commit hooks by running:
|
||||||
|
|
|
@ -84,10 +84,8 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"\n",
|
|
||||||
"!apt-get install -y bubblewrap\n",
|
"!apt-get install -y bubblewrap\n",
|
||||||
"!pip install uv\n",
|
"!pip install -U llama-stack"
|
||||||
"!uv pip install llama-stack --system"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -126,7 +124,7 @@
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"# This will build all the dependencies you will need\n",
|
"# This will build all the dependencies you will need\n",
|
||||||
"!llama stack build --template together --image-type venv --image-name __system__"
|
"!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -4328,7 +4326,7 @@
|
||||||
"provenance": []
|
"provenance": []
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "master",
|
"display_name": "toolchain",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
|
|
@ -45,65 +45,7 @@
|
||||||
"id": "O9pGVlPIjpix",
|
"id": "O9pGVlPIjpix",
|
||||||
"outputId": "e1fbe723-ae31-4630-eb80-4c4f6476d56f"
|
"outputId": "e1fbe723-ae31-4630-eb80-4c4f6476d56f"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: llama-stack in /usr/local/lib/python3.10/dist-packages (0.0.61)\n",
|
|
||||||
"Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.0)\n",
|
|
||||||
"Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.7.0)\n",
|
|
||||||
"Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.28.1)\n",
|
|
||||||
"Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.26.5)\n",
|
|
||||||
"Requirement already satisfied: llama-models>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\n",
|
|
||||||
"Requirement already satisfied: llama-stack-client>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\n",
|
|
||||||
"Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.48)\n",
|
|
||||||
"Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (from llama-stack) (1.0.1)\n",
|
|
||||||
"Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.10.3)\n",
|
|
||||||
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.32.3)\n",
|
|
||||||
"Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from llama-stack) (13.9.4)\n",
|
|
||||||
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from llama-stack) (75.1.0)\n",
|
|
||||||
"Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.5.0)\n",
|
|
||||||
"Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (6.0.2)\n",
|
|
||||||
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (3.1.4)\n",
|
|
||||||
"Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (0.8.0)\n",
|
|
||||||
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (10.4.0)\n",
|
|
||||||
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (3.7.1)\n",
|
|
||||||
"Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (8.1.7)\n",
|
|
||||||
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.9.0)\n",
|
|
||||||
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (2.2.2)\n",
|
|
||||||
"Requirement already satisfied: pyaml in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (24.12.1)\n",
|
|
||||||
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.3.1)\n",
|
|
||||||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.66.6)\n",
|
|
||||||
"Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.12.2)\n",
|
|
||||||
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (2024.8.30)\n",
|
|
||||||
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (1.0.7)\n",
|
|
||||||
"Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (3.10)\n",
|
|
||||||
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n",
|
|
||||||
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\n",
|
|
||||||
"Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (2.27.1)\n",
|
|
||||||
"Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.21.0)\n",
|
|
||||||
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (2.2.3)\n",
|
|
||||||
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (5.3.0)\n",
|
|
||||||
"Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.16.1)\n",
|
|
||||||
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (2024.9.0)\n",
|
|
||||||
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (24.2)\n",
|
|
||||||
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\n",
|
|
||||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llama-stack) (3.4.0)\n",
|
|
||||||
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (3.0.0)\n",
|
|
||||||
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (2.18.0)\n",
|
|
||||||
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.61->llama-stack) (1.2.2)\n",
|
|
||||||
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
|
|
||||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->llama-models>=0.0.61->llama-stack) (3.0.2)\n",
|
|
||||||
"Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (1.26.4)\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2.8.2)\n",
|
|
||||||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n",
|
|
||||||
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n",
|
|
||||||
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->llama-models>=0.0.61->llama-stack) (2024.9.11)\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.61->llama-stack) (1.17.0)\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!pip install -U llama-stack"
|
"!pip install -U llama-stack"
|
||||||
|
@ -120,198 +62,10 @@
|
||||||
"id": "JQpLUSNjlGAM",
|
"id": "JQpLUSNjlGAM",
|
||||||
"outputId": "2f7fec97-5511-4cae-d51e-6d262fbca19c"
|
"outputId": "2f7fec97-5511-4cae-d51e-6d262fbca19c"
|
||||||
},
|
},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Requirement already satisfied: llama-stack in /usr/local/lib/python3.10/dist-packages (0.0.61)\r\n",
|
|
||||||
"Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.0)\r\n",
|
|
||||||
"Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.7.0)\r\n",
|
|
||||||
"Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.28.1)\r\n",
|
|
||||||
"Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.26.5)\r\n",
|
|
||||||
"Requirement already satisfied: llama-models>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\r\n",
|
|
||||||
"Requirement already satisfied: llama-stack-client>=0.0.61 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (0.0.61)\r\n",
|
|
||||||
"Requirement already satisfied: prompt-toolkit in /usr/local/lib/python3.10/dist-packages (from llama-stack) (3.0.48)\r\n",
|
|
||||||
"Requirement already satisfied: python-dotenv in /usr/local/lib/python3.10/dist-packages (from llama-stack) (1.0.1)\r\n",
|
|
||||||
"Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.10.3)\r\n",
|
|
||||||
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.32.3)\r\n",
|
|
||||||
"Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from llama-stack) (13.9.4)\r\n",
|
|
||||||
"Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from llama-stack) (75.1.0)\r\n",
|
|
||||||
"Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from llama-stack) (2.5.0)\r\n",
|
|
||||||
"Requirement already satisfied: PyYAML in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (6.0.2)\r\n",
|
|
||||||
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (3.1.4)\r\n",
|
|
||||||
"Requirement already satisfied: tiktoken in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (0.8.0)\r\n",
|
|
||||||
"Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from llama-models>=0.0.61->llama-stack) (10.4.0)\r\n",
|
|
||||||
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (3.7.1)\r\n",
|
|
||||||
"Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (8.1.7)\r\n",
|
|
||||||
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.9.0)\r\n",
|
|
||||||
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (2.2.2)\r\n",
|
|
||||||
"Requirement already satisfied: pyaml in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (24.12.1)\r\n",
|
|
||||||
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (1.3.1)\r\n",
|
|
||||||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.66.6)\r\n",
|
|
||||||
"Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from llama-stack-client>=0.0.61->llama-stack) (4.12.2)\r\n",
|
|
||||||
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (2024.8.30)\r\n",
|
|
||||||
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (1.0.7)\r\n",
|
|
||||||
"Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->llama-stack) (3.10)\r\n",
|
|
||||||
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\r\n",
|
|
||||||
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (0.7.0)\r\n",
|
|
||||||
"Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2->llama-stack) (2.27.1)\r\n",
|
|
||||||
"Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.21.0)\r\n",
|
|
||||||
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (2.2.3)\r\n",
|
|
||||||
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (5.3.0)\r\n",
|
|
||||||
"Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile->llama-stack) (3.16.1)\r\n",
|
|
||||||
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (2024.9.0)\r\n",
|
|
||||||
"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->llama-stack) (24.2)\r\n",
|
|
||||||
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit->llama-stack) (0.2.13)\r\n",
|
|
||||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->llama-stack) (3.4.0)\r\n",
|
|
||||||
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (3.0.0)\r\n",
|
|
||||||
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->llama-stack) (2.18.0)\r\n",
|
|
||||||
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.61->llama-stack) (1.2.2)\n",
|
|
||||||
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
|
|
||||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->llama-models>=0.0.61->llama-stack) (3.0.2)\n",
|
|
||||||
"Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (1.26.4)\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2.8.2)\n",
|
|
||||||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n",
|
|
||||||
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->llama-stack-client>=0.0.61->llama-stack) (2024.2)\n",
|
|
||||||
"Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken->llama-models>=0.0.61->llama-stack) (2024.9.11)\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.61->llama-stack) (1.17.0)\n",
|
|
||||||
"Installing pip dependencies\n",
|
|
||||||
"Requirement already satisfied: blobfile in /usr/local/lib/python3.10/dist-packages (3.0.0)\n",
|
|
||||||
"Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (5.2.0)\n",
|
|
||||||
"Requirement already satisfied: opentelemetry-sdk in /usr/local/lib/python3.10/dist-packages (1.28.2)\n",
|
|
||||||
"Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.13.1)\n",
|
|
||||||
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.2.2)\n",
|
|
||||||
"Requirement already satisfied: autoevals in /usr/local/lib/python3.10/dist-packages (0.0.109)\n",
|
|
||||||
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (0.2.0)\n",
|
|
||||||
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.5.2)\n",
|
|
||||||
"Requirement already satisfied: pillow in /usr/local/lib/python3.10/dist-packages (10.4.0)\n",
|
|
||||||
"Requirement already satisfied: pypdf in /usr/local/lib/python3.10/dist-packages (5.1.0)\n",
|
|
||||||
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (4.66.6)\n",
|
|
||||||
"Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (3.9.1)\n",
|
|
||||||
"Requirement already satisfied: aiosqlite in /usr/local/lib/python3.10/dist-packages (0.20.0)\n",
|
|
||||||
"Requirement already satisfied: psycopg2-binary in /usr/local/lib/python3.10/dist-packages (2.9.10)\n",
|
|
||||||
"Requirement already satisfied: faiss-cpu in /usr/local/lib/python3.10/dist-packages (1.9.0.post1)\n",
|
|
||||||
"Requirement already satisfied: opentelemetry-exporter-otlp-proto-http in /usr/local/lib/python3.10/dist-packages (1.28.2)\n",
|
|
||||||
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.46.3)\n",
|
|
||||||
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (1.26.4)\n",
|
|
||||||
"Requirement already satisfied: chromadb-client in /usr/local/lib/python3.10/dist-packages (0.5.23)\n",
|
|
||||||
"Requirement already satisfied: openai in /usr/local/lib/python3.10/dist-packages (1.54.5)\n",
|
|
||||||
"Requirement already satisfied: redis in /usr/local/lib/python3.10/dist-packages (5.2.1)\n",
|
|
||||||
"Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (3.2.0)\n",
|
|
||||||
"Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (3.8.0)\n",
|
|
||||||
"Requirement already satisfied: together in /usr/local/lib/python3.10/dist-packages (1.3.5)\n",
|
|
||||||
"Requirement already satisfied: fastapi in /usr/local/lib/python3.10/dist-packages (0.115.6)\n",
|
|
||||||
"Requirement already satisfied: fire in /usr/local/lib/python3.10/dist-packages (0.7.0)\n",
|
|
||||||
"Requirement already satisfied: httpx in /usr/local/lib/python3.10/dist-packages (0.28.1)\n",
|
|
||||||
"Requirement already satisfied: uvicorn in /usr/local/lib/python3.10/dist-packages (0.32.1)\n",
|
|
||||||
"Requirement already satisfied: pycryptodomex>=3.8 in /usr/local/lib/python3.10/dist-packages (from blobfile) (3.21.0)\n",
|
|
||||||
"Requirement already satisfied: urllib3<3,>=1.25.3 in /usr/local/lib/python3.10/dist-packages (from blobfile) (2.2.3)\n",
|
|
||||||
"Requirement already satisfied: lxml>=4.9 in /usr/local/lib/python3.10/dist-packages (from blobfile) (5.3.0)\n",
|
|
||||||
"Requirement already satisfied: filelock>=3.0 in /usr/local/lib/python3.10/dist-packages (from blobfile) (3.16.1)\n",
|
|
||||||
"Requirement already satisfied: opentelemetry-api==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (1.28.2)\n",
|
|
||||||
"Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (0.49b2)\n",
|
|
||||||
"Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-sdk) (4.12.2)\n",
|
|
||||||
"Requirement already satisfied: deprecated>=1.2.6 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n",
|
|
||||||
"Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n",
|
|
||||||
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
|
|
||||||
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n",
|
|
||||||
"Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n",
|
|
||||||
"Requirement already satisfied: chevron in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.14.0)\n",
|
|
||||||
"Requirement already satisfied: levenshtein in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.26.1)\n",
|
|
||||||
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from autoevals) (6.0.2)\n",
|
|
||||||
"Requirement already satisfied: braintrust_core==0.0.54 in /usr/local/lib/python3.10/dist-packages (from autoevals) (0.0.54)\n",
|
|
||||||
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.10/dist-packages (from autoevals) (4.23.0)\n",
|
|
||||||
"Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n",
|
|
||||||
"Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n",
|
|
||||||
"Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.7)\n",
|
|
||||||
"Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk) (2024.9.11)\n",
|
|
||||||
"Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from faiss-cpu) (24.2)\n",
|
|
||||||
"Requirement already satisfied: googleapis-common-protos~=1.52 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.66.0)\n",
|
|
||||||
"Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.28.2)\n",
|
|
||||||
"Requirement already satisfied: opentelemetry-proto==1.28.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (1.28.2)\n",
|
|
||||||
"Requirement already satisfied: requests~=2.7 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-http) (2.32.3)\n",
|
|
||||||
"Requirement already satisfied: protobuf<6.0,>=5.0 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http) (5.29.1)\n",
|
|
||||||
"Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.26.5)\n",
|
|
||||||
"Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n",
|
|
||||||
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.5)\n",
|
|
||||||
"Requirement already satisfied: opentelemetry-exporter-otlp-proto-grpc>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (1.28.2)\n",
|
|
||||||
"Requirement already satisfied: overrides>=7.3.1 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (7.7.0)\n",
|
|
||||||
"Requirement already satisfied: posthog>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (3.7.4)\n",
|
|
||||||
"Requirement already satisfied: pydantic>=1.9 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (2.10.3)\n",
|
|
||||||
"Requirement already satisfied: tenacity>=8.2.3 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (9.0.0)\n",
|
|
||||||
"Requirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.10/dist-packages (from chromadb-client) (3.10.12)\n",
|
|
||||||
"Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n",
|
|
||||||
"Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from openai) (1.9.0)\n",
|
|
||||||
"Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from openai) (0.8.2)\n",
|
|
||||||
"Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.1)\n",
|
|
||||||
"Requirement already satisfied: async-timeout>=4.0.3 in /usr/local/lib/python3.10/dist-packages (from redis) (4.0.3)\n",
|
|
||||||
"Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n",
|
|
||||||
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.8)\n",
|
|
||||||
"Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.5.0)\n",
|
|
||||||
"Requirement already satisfied: multiprocess<0.70.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.16)\n",
|
|
||||||
"Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\n",
|
|
||||||
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.11.10)\n",
|
|
||||||
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.3.1)\n",
|
|
||||||
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (0.12.1)\n",
|
|
||||||
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (4.55.2)\n",
|
|
||||||
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (1.4.7)\n",
|
|
||||||
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib) (3.2.0)\n",
|
|
||||||
"Requirement already satisfied: eval-type-backport<0.3.0,>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from together) (0.2.0)\n",
|
|
||||||
"Requirement already satisfied: rich<14.0.0,>=13.8.1 in /usr/local/lib/python3.10/dist-packages (from together) (13.9.4)\n",
|
|
||||||
"Requirement already satisfied: tabulate<0.10.0,>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from together) (0.9.0)\n",
|
|
||||||
"Requirement already satisfied: typer<0.14,>=0.9 in /usr/local/lib/python3.10/dist-packages (from together) (0.13.1)\n",
|
|
||||||
"Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /usr/local/lib/python3.10/dist-packages (from fastapi) (0.41.3)\n",
|
|
||||||
"Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire) (2.5.0)\n",
|
|
||||||
"Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx) (2024.8.30)\n",
|
|
||||||
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx) (1.0.7)\n",
|
|
||||||
"Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx) (3.10)\n",
|
|
||||||
"Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx) (0.14.0)\n",
|
|
||||||
"Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.4)\n",
|
|
||||||
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
|
|
||||||
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.2.0)\n",
|
|
||||||
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n",
|
|
||||||
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n",
|
|
||||||
"Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (0.2.1)\n",
|
|
||||||
"Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.18.3)\n",
|
|
||||||
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.2)\n",
|
|
||||||
"Requirement already satisfied: wrapt<2,>=1.10 in /usr/local/lib/python3.10/dist-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n",
|
|
||||||
"Requirement already satisfied: grpcio<2.0.0,>=1.63.2 in /usr/local/lib/python3.10/dist-packages (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb-client) (1.68.1)\n",
|
|
||||||
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (1.17.0)\n",
|
|
||||||
"Requirement already satisfied: monotonic>=1.5 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (1.6)\n",
|
|
||||||
"Requirement already satisfied: backoff>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from posthog>=2.4.0->chromadb-client) (2.2.1)\n",
|
|
||||||
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb-client) (0.7.0)\n",
|
|
||||||
"Requirement already satisfied: pydantic-core==2.27.1 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.9->chromadb-client) (2.27.1)\n",
|
|
||||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests~=2.7->opentelemetry-exporter-otlp-proto-http) (3.4.0)\n",
|
|
||||||
"Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=13.8.1->together) (3.0.0)\n",
|
|
||||||
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich<14.0.0,>=13.8.1->together) (2.18.0)\n",
|
|
||||||
"Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<0.14,>=0.9->together) (1.5.4)\n",
|
|
||||||
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (2024.10.1)\n",
|
|
||||||
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (0.35.1)\n",
|
|
||||||
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema->autoevals) (0.22.3)\n",
|
|
||||||
"Requirement already satisfied: rapidfuzz<4.0.0,>=3.9.0 in /usr/local/lib/python3.10/dist-packages (from levenshtein->autoevals) (3.10.1)\n",
|
|
||||||
"Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.10/dist-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n",
|
|
||||||
"Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich<14.0.0,>=13.8.1->together) (0.1.2)\n",
|
|
||||||
"sentence-transformers --no-deps\n",
|
|
||||||
"Requirement already satisfied: sentence-transformers in /usr/local/lib/python3.10/dist-packages (3.2.1)\n",
|
|
||||||
"torch --index-url https://download.pytorch.org/whl/cpu\n",
|
|
||||||
"Looking in indexes: https://download.pytorch.org/whl/cpu\n",
|
|
||||||
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.5.1+cu121)\n",
|
|
||||||
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.16.1)\n",
|
|
||||||
"Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.2)\n",
|
|
||||||
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.4.2)\n",
|
|
||||||
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
|
|
||||||
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2024.9.0)\n",
|
|
||||||
"Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch) (1.13.1)\n",
|
|
||||||
"Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n",
|
|
||||||
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (3.0.2)\n",
|
|
||||||
"\u001b[32mBuild Successful!\u001b[0m\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"# NBVAL_SKIP\n",
|
"# NBVAL_SKIP\n",
|
||||||
"!llama stack build --template together --image-type venv --image-name __system__"
|
"!UV_SYSTEM_PYTHON=1 llama stack build --template together --image-type venv"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -27,19 +27,19 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
- `meta-llama/Llama-3-8B-Instruct (meta/llama3-8b-instruct)`
|
- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3-70B-Instruct (meta/llama3-70b-instruct)`
|
- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct (meta/llama-3.1-8b-instruct)`
|
- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-70B-Instruct (meta/llama-3.1-70b-instruct)`
|
- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta/llama-3.1-405b-instruct)`
|
- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
|
||||||
- `meta-llama/Llama-3.2-1B-Instruct (meta/llama-3.2-1b-instruct)`
|
- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct (meta/llama-3.2-3b-instruct)`
|
- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct (meta/llama-3.2-11b-vision-instruct)`
|
- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct (meta/llama-3.2-90b-vision-instruct)`
|
- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
|
||||||
- `nvidia/llama-3.2-nv-embedqa-1b-v2 (nvidia/llama-3.2-nv-embedqa-1b-v2)`
|
- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
|
||||||
- `nvidia/nv-embedqa-e5-v5 (nvidia/nv-embedqa-e5-v5)`
|
- `nvidia/nv-embedqa-e5-v5 `
|
||||||
- `nvidia/nv-embedqa-mistral-7b-v2 (nvidia/nv-embedqa-mistral-7b-v2)`
|
- `nvidia/nv-embedqa-mistral-7b-v2 `
|
||||||
- `snowflake/arctic-embed-l (snowflake/arctic-embed-l)`
|
- `snowflake/arctic-embed-l `
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -34,9 +34,9 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct (meta.llama3-1-8b-instruct-v1:0)`
|
- `meta.llama3-1-8b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-70B-Instruct (meta.llama3-1-70b-instruct-v1:0)`
|
- `meta.llama3-1-70b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (meta.llama3-1-405b-instruct-v1:0)`
|
- `meta.llama3-1-405b-instruct-v1:0 (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -27,8 +27,8 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct (llama3.1-8b)`
|
- `llama3.1-8b (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3.3-70B-Instruct (llama-3.3-70b)`
|
- `llama-3.3-70b (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -37,17 +37,17 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct (accounts/fireworks/models/llama-v3p1-8b-instruct)`
|
- `accounts/fireworks/models/llama-v3p1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-70B-Instruct (accounts/fireworks/models/llama-v3p1-70b-instruct)`
|
- `accounts/fireworks/models/llama-v3p1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (accounts/fireworks/models/llama-v3p1-405b-instruct)`
|
- `accounts/fireworks/models/llama-v3p1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
|
||||||
- `meta-llama/Llama-3.2-1B-Instruct (accounts/fireworks/models/llama-v3p2-1b-instruct)`
|
- `accounts/fireworks/models/llama-v3p2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct (accounts/fireworks/models/llama-v3p2-3b-instruct)`
|
- `accounts/fireworks/models/llama-v3p2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-11b-vision-instruct)`
|
- `accounts/fireworks/models/llama-v3p2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct (accounts/fireworks/models/llama-v3p2-90b-vision-instruct)`
|
- `accounts/fireworks/models/llama-v3p2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-3.3-70B-Instruct (accounts/fireworks/models/llama-v3p3-70b-instruct)`
|
- `accounts/fireworks/models/llama-v3p3-70b-instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||||
- `meta-llama/Llama-Guard-3-8B (accounts/fireworks/models/llama-guard-3-8b)`
|
- `accounts/fireworks/models/llama-guard-3-8b (aliases: meta-llama/Llama-Guard-3-8B)`
|
||||||
- `meta-llama/Llama-Guard-3-11B-Vision (accounts/fireworks/models/llama-guard-3-11b-vision)`
|
- `accounts/fireworks/models/llama-guard-3-11b-vision (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
|
||||||
- `nomic-ai/nomic-embed-text-v1.5 (nomic-ai/nomic-embed-text-v1.5)`
|
- `nomic-ai/nomic-embed-text-v1.5 `
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -37,11 +37,11 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct (groq/llama3-8b-8192)`
|
- `groq/llama3-8b-8192 (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct (groq/llama-3.1-8b-instant)`
|
- `groq/llama-3.1-8b-instant `
|
||||||
- `meta-llama/Llama-3-70B-Instruct (groq/llama3-70b-8192)`
|
- `groq/llama3-70b-8192 (aliases: meta-llama/Llama-3-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.3-70B-Instruct (groq/llama-3.3-70b-versatile)`
|
- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct (groq/llama-3.2-3b-preview)`
|
- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -141,17 +141,21 @@ ollama run <model_name>
|
||||||
To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
|
To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
|
||||||
```
|
```
|
||||||
$ ollama ps
|
$ ollama ps
|
||||||
|
NAME ID SIZE PROCESSOR UNTIL
|
||||||
NAME ID SIZE PROCESSOR UNTIL
|
llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now
|
||||||
llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now
|
|
||||||
```
|
```
|
||||||
|
|
||||||
To verify that the model served by ollama is correctly connected to Llama Stack server
|
To verify that the model served by ollama is correctly connected to Llama Stack server
|
||||||
```bash
|
```bash
|
||||||
$ llama-stack-client models list
|
$ llama-stack-client models list
|
||||||
+----------------------+----------------------+---------------+-----------------------------------------------+
|
|
||||||
| identifier | llama_model | provider_id | metadata |
|
Available Models
|
||||||
+======================+======================+===============+===============================================+
|
|
||||||
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} |
|
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
|
||||||
+----------------------+----------------------+---------------+-----------------------------------------------+
|
┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃
|
||||||
|
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
|
||||||
|
│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │
|
||||||
|
└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
|
||||||
|
|
||||||
|
Total models: 1
|
||||||
```
|
```
|
||||||
|
|
|
@ -34,15 +34,15 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct (Meta-Llama-3.1-8B-Instruct)`
|
- `Meta-Llama-3.1-8B-Instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-70B-Instruct (Meta-Llama-3.1-70B-Instruct)`
|
- `Meta-Llama-3.1-70B-Instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-405B-Instruct-FP8 (Meta-Llama-3.1-405B-Instruct)`
|
- `Meta-Llama-3.1-405B-Instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
|
||||||
- `meta-llama/Llama-3.2-1B-Instruct (Meta-Llama-3.2-1B-Instruct)`
|
- `Meta-Llama-3.2-1B-Instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct (Meta-Llama-3.2-3B-Instruct)`
|
- `Meta-Llama-3.2-3B-Instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||||
- `meta-llama/Llama-3.3-70B-Instruct (Meta-Llama-3.3-70B-Instruct)`
|
- `Meta-Llama-3.3-70B-Instruct (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct (Llama-3.2-11B-Vision-Instruct)`
|
- `Llama-3.2-11B-Vision-Instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct (Llama-3.2-90B-Vision-Instruct)`
|
- `Llama-3.2-90B-Vision-Instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-Guard-3-8B (Meta-Llama-Guard-3-8B)`
|
- `Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -37,17 +37,17 @@ The following environment variables can be configured:
|
||||||
|
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
- `meta-llama/Llama-3.1-8B-Instruct`
|
- `meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-70B-Instruct`
|
- `meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-70B-Instruct)`
|
||||||
- `meta-llama/Llama-3.1-405B-Instruct-FP8`
|
- `meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
|
||||||
- `meta-llama/Llama-3.2-3B-Instruct`
|
- `meta-llama/Llama-3.2-3B-Instruct-Turbo (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-11B-Vision-Instruct`
|
- `meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-3.2-90B-Vision-Instruct`
|
- `meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
|
||||||
- `meta-llama/Llama-3.3-70B-Instruct`
|
- `meta-llama/Llama-3.3-70B-Instruct-Turbo (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||||
- `meta-llama/Llama-Guard-3-8B`
|
- `meta-llama/Meta-Llama-Guard-3-8B (aliases: meta-llama/Llama-Guard-3-8B)`
|
||||||
- `meta-llama/Llama-Guard-3-11B-Vision`
|
- `meta-llama/Llama-Guard-3-11B-Vision-Turbo (aliases: meta-llama/Llama-Guard-3-11B-Vision)`
|
||||||
- `togethercomputer/m2-bert-80M-8k-retrieval`
|
- `togethercomputer/m2-bert-80M-8k-retrieval `
|
||||||
- `togethercomputer/m2-bert-80M-32k-retrieval`
|
- `togethercomputer/m2-bert-80M-32k-retrieval `
|
||||||
|
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
|
@ -102,12 +102,18 @@ Let's use the `llama-stack-client` CLI to check the connectivity to the server.
|
||||||
$ llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT
|
$ llama-stack-client configure --endpoint http://localhost:$LLAMA_STACK_PORT
|
||||||
> Enter the API key (leave empty if no key is needed):
|
> Enter the API key (leave empty if no key is needed):
|
||||||
Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321
|
Done! You can now use the Llama Stack Client CLI with endpoint http://localhost:8321
|
||||||
|
|
||||||
$ llama-stack-client models list
|
$ llama-stack-client models list
|
||||||
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
|
|
||||||
┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃
|
Available Models
|
||||||
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
|
|
||||||
│ meta-llama/Llama-3.2-3B-Instruct │ ollama │ llama3.2:3b-instruct-fp16 │ │
|
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
|
||||||
└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘
|
┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃
|
||||||
|
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
|
||||||
|
│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │
|
||||||
|
└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
|
||||||
|
|
||||||
|
Total models: 1
|
||||||
```
|
```
|
||||||
|
|
||||||
You can test basic Llama inference completion using the CLI too.
|
You can test basic Llama inference completion using the CLI too.
|
||||||
|
|
|
@ -58,11 +58,15 @@ llama-stack-client providers list
|
||||||
llama-stack-client models list
|
llama-stack-client models list
|
||||||
```
|
```
|
||||||
```
|
```
|
||||||
+----------------------+----------------------+---------------+----------------------------------------------------------+
|
Available Models
|
||||||
| identifier | llama_model | provider_id | metadata |
|
|
||||||
+======================+======================+===============+==========================================================+
|
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
|
||||||
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | tgi0 | {'huggingface_repo': 'meta-llama/Llama-3.1-8B-Instruct'} |
|
┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃
|
||||||
+----------------------+----------------------+---------------+----------------------------------------------------------+
|
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
|
||||||
|
│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │
|
||||||
|
└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
|
||||||
|
|
||||||
|
Total models: 1
|
||||||
```
|
```
|
||||||
|
|
||||||
### `llama-stack-client models get`
|
### `llama-stack-client models get`
|
||||||
|
|
|
@ -9,6 +9,7 @@ import argparse
|
||||||
from .download import Download
|
from .download import Download
|
||||||
from .model import ModelParser
|
from .model import ModelParser
|
||||||
from .stack import StackParser
|
from .stack import StackParser
|
||||||
|
from .stack.utils import print_subcommand_description
|
||||||
from .verify_download import VerifyDownload
|
from .verify_download import VerifyDownload
|
||||||
|
|
||||||
|
|
||||||
|
@ -20,6 +21,7 @@ class LlamaCLIParser:
|
||||||
prog="llama",
|
prog="llama",
|
||||||
description="Welcome to the Llama CLI",
|
description="Welcome to the Llama CLI",
|
||||||
add_help=True,
|
add_help=True,
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Default command is to print help
|
# Default command is to print help
|
||||||
|
@ -33,6 +35,8 @@ class LlamaCLIParser:
|
||||||
Download.create(subparsers)
|
Download.create(subparsers)
|
||||||
VerifyDownload.create(subparsers)
|
VerifyDownload.create(subparsers)
|
||||||
|
|
||||||
|
print_subcommand_description(self.parser, subparsers)
|
||||||
|
|
||||||
def parse_args(self) -> argparse.Namespace:
|
def parse_args(self) -> argparse.Namespace:
|
||||||
return self.parser.parse_args()
|
return self.parser.parse_args()
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,7 @@ from llama_stack.cli.model.list import ModelList
|
||||||
from llama_stack.cli.model.prompt_format import ModelPromptFormat
|
from llama_stack.cli.model.prompt_format import ModelPromptFormat
|
||||||
from llama_stack.cli.model.remove import ModelRemove
|
from llama_stack.cli.model.remove import ModelRemove
|
||||||
from llama_stack.cli.model.verify_download import ModelVerifyDownload
|
from llama_stack.cli.model.verify_download import ModelVerifyDownload
|
||||||
|
from llama_stack.cli.stack.utils import print_subcommand_description
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,6 +25,7 @@ class ModelParser(Subcommand):
|
||||||
"model",
|
"model",
|
||||||
prog="llama model",
|
prog="llama model",
|
||||||
description="Work with llama models",
|
description="Work with llama models",
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.parser.set_defaults(func=lambda args: self.parser.print_help())
|
self.parser.set_defaults(func=lambda args: self.parser.print_help())
|
||||||
|
@ -37,3 +39,5 @@ class ModelParser(Subcommand):
|
||||||
ModelDescribe.create(subparsers)
|
ModelDescribe.create(subparsers)
|
||||||
ModelVerifyDownload.create(subparsers)
|
ModelVerifyDownload.create(subparsers)
|
||||||
ModelRemove.create(subparsers)
|
ModelRemove.create(subparsers)
|
||||||
|
|
||||||
|
print_subcommand_description(self.parser, subparsers)
|
||||||
|
|
|
@ -38,7 +38,7 @@ from llama_stack.distribution.distribution import get_provider_registry
|
||||||
from llama_stack.distribution.resolver import InvalidProviderError
|
from llama_stack.distribution.resolver import InvalidProviderError
|
||||||
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||||
from llama_stack.distribution.utils.exec import formulate_run_args, in_notebook, run_with_pty
|
from llama_stack.distribution.utils.exec import formulate_run_args, run_with_pty
|
||||||
from llama_stack.distribution.utils.image_types import ImageType
|
from llama_stack.distribution.utils.image_types import ImageType
|
||||||
from llama_stack.providers.datatypes import Api
|
from llama_stack.providers.datatypes import Api
|
||||||
|
|
||||||
|
@ -65,8 +65,6 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||||
if args.image_type == "venv":
|
if args.image_type == "venv":
|
||||||
current_venv = os.environ.get("VIRTUAL_ENV")
|
current_venv = os.environ.get("VIRTUAL_ENV")
|
||||||
image_name = args.image_name or current_venv
|
image_name = args.image_name or current_venv
|
||||||
if not image_name and in_notebook():
|
|
||||||
image_name = "__system__"
|
|
||||||
elif args.image_type == "conda":
|
elif args.image_type == "conda":
|
||||||
current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
|
current_conda_env = os.environ.get("CONDA_DEFAULT_ENV")
|
||||||
image_name = args.image_name or current_conda_env
|
image_name = args.image_name or current_conda_env
|
||||||
|
@ -291,6 +289,8 @@ def _run_stack_build_command_from_build_config(
|
||||||
if not image_name:
|
if not image_name:
|
||||||
raise ValueError("Please specify an image name when building a conda image")
|
raise ValueError("Please specify an image name when building a conda image")
|
||||||
elif build_config.image_type == ImageType.venv.value:
|
elif build_config.image_type == ImageType.venv.value:
|
||||||
|
if not image_name and os.environ.get("UV_SYSTEM_PYTHON"):
|
||||||
|
image_name = "__system__"
|
||||||
if not image_name:
|
if not image_name:
|
||||||
raise ValueError("Please specify an image name when building a venv image")
|
raise ValueError("Please specify an image name when building a venv image")
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
|
|
||||||
|
from llama_stack.cli.stack.utils import print_subcommand_description
|
||||||
from llama_stack.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
from .build import StackBuild
|
from .build import StackBuild
|
||||||
|
@ -22,6 +23,7 @@ class StackParser(Subcommand):
|
||||||
"stack",
|
"stack",
|
||||||
prog="llama stack",
|
prog="llama stack",
|
||||||
description="Operations for the Llama Stack / Distributions",
|
description="Operations for the Llama Stack / Distributions",
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
|
@ -39,3 +41,5 @@ class StackParser(Subcommand):
|
||||||
StackListApis.create(subparsers)
|
StackListApis.create(subparsers)
|
||||||
StackListProviders.create(subparsers)
|
StackListProviders.create(subparsers)
|
||||||
StackRun.create(subparsers)
|
StackRun.create(subparsers)
|
||||||
|
|
||||||
|
print_subcommand_description(self.parser, subparsers)
|
||||||
|
|
14
llama_stack/cli/stack/utils.py
Normal file
14
llama_stack/cli/stack/utils.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
|
def print_subcommand_description(parser, subparsers):
|
||||||
|
"""Print descriptions of subcommands."""
|
||||||
|
description_text = ""
|
||||||
|
for name, subcommand in subparsers.choices.items():
|
||||||
|
description = subcommand.description
|
||||||
|
description_text += f" {name:<21} {description}\n"
|
||||||
|
parser.epilog = description_text
|
|
@ -1059,9 +1059,6 @@ async def execute_tool_call_maybe(
|
||||||
group_name = tool_to_group.get(name, None)
|
group_name = tool_to_group.get(name, None)
|
||||||
if group_name is None:
|
if group_name is None:
|
||||||
raise ValueError(f"Tool {name} not found in any tool group")
|
raise ValueError(f"Tool {name} not found in any tool group")
|
||||||
# get the arguments generated by the model and augment with toolgroup arg overrides for the agent
|
|
||||||
tool_call_args = tool_call.arguments
|
|
||||||
tool_call_args.update(toolgroup_args.get(group_name, {}))
|
|
||||||
if isinstance(name, BuiltinTool):
|
if isinstance(name, BuiltinTool):
|
||||||
if name == BuiltinTool.brave_search:
|
if name == BuiltinTool.brave_search:
|
||||||
name = WEB_SEARCH_TOOL
|
name = WEB_SEARCH_TOOL
|
||||||
|
@ -1070,10 +1067,12 @@ async def execute_tool_call_maybe(
|
||||||
|
|
||||||
result = await tool_runtime_api.invoke_tool(
|
result = await tool_runtime_api.invoke_tool(
|
||||||
tool_name=name,
|
tool_name=name,
|
||||||
kwargs=dict(
|
kwargs={
|
||||||
session_id=session_id,
|
"session_id": session_id,
|
||||||
**tool_call_args,
|
# get the arguments generated by the model and augment with toolgroup arg overrides for the agent
|
||||||
),
|
**tool_call.arguments,
|
||||||
|
**toolgroup_args.get(group_name, {}),
|
||||||
|
},
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
@ -11,5 +11,5 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
class SentenceTransformersInferenceConfig(BaseModel):
|
class SentenceTransformersInferenceConfig(BaseModel):
|
||||||
@classmethod
|
@classmethod
|
||||||
def sample_run_config(cls) -> Dict[str, Any]:
|
def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
|
||||||
return {}
|
return {}
|
||||||
|
|
|
@ -46,14 +46,14 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from .config import CerebrasImplConfig
|
from .config import CerebrasImplConfig
|
||||||
from .models import model_entries
|
from .models import MODEL_ENTRIES
|
||||||
|
|
||||||
|
|
||||||
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
def __init__(self, config: CerebrasImplConfig) -> None:
|
def __init__(self, config: CerebrasImplConfig) -> None:
|
||||||
ModelRegistryHelper.__init__(
|
ModelRegistryHelper.__init__(
|
||||||
self,
|
self,
|
||||||
model_entries=model_entries,
|
model_entries=MODEL_ENTRIES,
|
||||||
)
|
)
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_hf_repo_model_entry,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
model_entries = [
|
MODEL_ENTRIES = [
|
||||||
build_hf_repo_model_entry(
|
build_hf_repo_model_entry(
|
||||||
"llama3.1-8b",
|
"llama3.1-8b",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
|
|
|
@ -5,10 +5,13 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.models.llama.sku_list import CoreModelId
|
from llama_stack.models.llama.sku_list import CoreModelId
|
||||||
from llama_stack.providers.utils.inference.model_registry import build_model_entry
|
from llama_stack.providers.utils.inference.model_registry import (
|
||||||
|
build_hf_repo_model_entry,
|
||||||
|
build_model_entry,
|
||||||
|
)
|
||||||
|
|
||||||
MODEL_ENTRIES = [
|
MODEL_ENTRIES = [
|
||||||
build_model_entry(
|
build_hf_repo_model_entry(
|
||||||
"groq/llama3-8b-8192",
|
"groq/llama3-8b-8192",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
|
@ -16,11 +19,11 @@ MODEL_ENTRIES = [
|
||||||
"groq/llama-3.1-8b-instant",
|
"groq/llama-3.1-8b-instant",
|
||||||
CoreModelId.llama3_1_8b_instruct.value,
|
CoreModelId.llama3_1_8b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_entry(
|
build_hf_repo_model_entry(
|
||||||
"groq/llama3-70b-8192",
|
"groq/llama3-70b-8192",
|
||||||
CoreModelId.llama3_70b_instruct.value,
|
CoreModelId.llama3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
build_model_entry(
|
build_hf_repo_model_entry(
|
||||||
"groq/llama-3.3-70b-versatile",
|
"groq/llama-3.3-70b-versatile",
|
||||||
CoreModelId.llama3_3_70b_instruct.value,
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
),
|
),
|
||||||
|
@ -28,7 +31,7 @@ MODEL_ENTRIES = [
|
||||||
# Preview models aren't recommended for production use, but we include this one
|
# Preview models aren't recommended for production use, but we include this one
|
||||||
# to pass the test fixture
|
# to pass the test fixture
|
||||||
# TODO(aidand): Replace this with a stable model once Groq supports it
|
# TODO(aidand): Replace this with a stable model once Groq supports it
|
||||||
build_model_entry(
|
build_hf_repo_model_entry(
|
||||||
"groq/llama-3.2-3b-preview",
|
"groq/llama-3.2-3b-preview",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
|
|
|
@ -11,7 +11,7 @@ from llama_stack.providers.utils.inference.model_registry import (
|
||||||
build_hf_repo_model_entry,
|
build_hf_repo_model_entry,
|
||||||
)
|
)
|
||||||
|
|
||||||
_MODEL_ENTRIES = [
|
MODEL_ENTRIES = [
|
||||||
build_hf_repo_model_entry(
|
build_hf_repo_model_entry(
|
||||||
"meta/llama3-8b-instruct",
|
"meta/llama3-8b-instruct",
|
||||||
CoreModelId.llama3_8b_instruct.value,
|
CoreModelId.llama3_8b_instruct.value,
|
||||||
|
|
|
@ -8,7 +8,7 @@ import logging
|
||||||
import warnings
|
import warnings
|
||||||
from typing import AsyncIterator, List, Optional, Union
|
from typing import AsyncIterator, List, Optional, Union
|
||||||
|
|
||||||
from openai import APIConnectionError, AsyncOpenAI
|
from openai import APIConnectionError, AsyncOpenAI, BadRequestError
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
from llama_stack.apis.common.content_types import (
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
|
@ -47,7 +47,7 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
||||||
|
|
||||||
from . import NVIDIAConfig
|
from . import NVIDIAConfig
|
||||||
from .models import _MODEL_ENTRIES
|
from .models import MODEL_ENTRIES
|
||||||
from .openai_utils import (
|
from .openai_utils import (
|
||||||
convert_chat_completion_request,
|
convert_chat_completion_request,
|
||||||
convert_completion_request,
|
convert_completion_request,
|
||||||
|
@ -62,7 +62,7 @@ logger = logging.getLogger(__name__)
|
||||||
class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
||||||
def __init__(self, config: NVIDIAConfig) -> None:
|
def __init__(self, config: NVIDIAConfig) -> None:
|
||||||
# TODO(mf): filter by available models
|
# TODO(mf): filter by available models
|
||||||
ModelRegistryHelper.__init__(self, model_entries=_MODEL_ENTRIES)
|
ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES)
|
||||||
|
|
||||||
logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
|
logger.info(f"Initializing NVIDIAInferenceAdapter({config.url})...")
|
||||||
|
|
||||||
|
@ -144,19 +144,38 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
||||||
#
|
#
|
||||||
# we can ignore str and always pass List[str] to OpenAI
|
# we can ignore str and always pass List[str] to OpenAI
|
||||||
#
|
#
|
||||||
flat_contents = [
|
flat_contents = [content.text if isinstance(content, TextContentItem) else content for content in contents]
|
||||||
item.text if isinstance(item, TextContentItem) else item
|
|
||||||
for content in contents
|
|
||||||
for item in (content if isinstance(content, list) else [content])
|
|
||||||
]
|
|
||||||
input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents]
|
input = [content.text if isinstance(content, TextContentItem) else content for content in flat_contents]
|
||||||
model = self.get_provider_model_id(model_id)
|
model = self.get_provider_model_id(model_id)
|
||||||
|
|
||||||
response = await self._client.embeddings.create(
|
extra_body = {}
|
||||||
model=model,
|
|
||||||
input=input,
|
if text_truncation is not None:
|
||||||
# extra_body={"input_type": "passage"|"query"}, # TODO(mf): how to tell caller's intent?
|
text_truncation_options = {
|
||||||
)
|
TextTruncation.none: "NONE",
|
||||||
|
TextTruncation.end: "END",
|
||||||
|
TextTruncation.start: "START",
|
||||||
|
}
|
||||||
|
extra_body["truncate"] = text_truncation_options[text_truncation]
|
||||||
|
|
||||||
|
if output_dimension is not None:
|
||||||
|
extra_body["dimensions"] = output_dimension
|
||||||
|
|
||||||
|
if task_type is not None:
|
||||||
|
task_type_options = {
|
||||||
|
EmbeddingTaskType.document: "passage",
|
||||||
|
EmbeddingTaskType.query: "query",
|
||||||
|
}
|
||||||
|
extra_body["input_type"] = task_type_options[task_type]
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await self._client.embeddings.create(
|
||||||
|
model=model,
|
||||||
|
input=input,
|
||||||
|
extra_body=extra_body,
|
||||||
|
)
|
||||||
|
except BadRequestError as e:
|
||||||
|
raise ValueError(f"Failed to get embeddings: {e}") from e
|
||||||
|
|
||||||
#
|
#
|
||||||
# OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=List[float], ...)], ...)
|
# OpenAI: CreateEmbeddingResponse(data=[Embedding(embedding=List[float], ...)], ...)
|
||||||
|
|
|
@ -111,7 +111,8 @@
|
||||||
"first_name": "Michael",
|
"first_name": "Michael",
|
||||||
"last_name": "Jordan",
|
"last_name": "Jordan",
|
||||||
"year_of_birth": 1963,
|
"year_of_birth": 1963,
|
||||||
"num_seasons_in_nba": 15
|
"num_seasons_in_nba": 15,
|
||||||
|
"year_for_draft": 1984
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
@ -126,6 +126,37 @@ class LiteLLMOpenAIMixin(
|
||||||
):
|
):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
|
def _add_additional_properties_recursive(self, schema):
|
||||||
|
"""
|
||||||
|
Recursively add additionalProperties: False to all object schemas
|
||||||
|
"""
|
||||||
|
if isinstance(schema, dict):
|
||||||
|
if schema.get("type") == "object":
|
||||||
|
schema["additionalProperties"] = False
|
||||||
|
|
||||||
|
# Add required field with all property keys if properties exist
|
||||||
|
if "properties" in schema and schema["properties"]:
|
||||||
|
schema["required"] = list(schema["properties"].keys())
|
||||||
|
|
||||||
|
if "properties" in schema:
|
||||||
|
for prop_schema in schema["properties"].values():
|
||||||
|
self._add_additional_properties_recursive(prop_schema)
|
||||||
|
|
||||||
|
for key in ["anyOf", "allOf", "oneOf"]:
|
||||||
|
if key in schema:
|
||||||
|
for sub_schema in schema[key]:
|
||||||
|
self._add_additional_properties_recursive(sub_schema)
|
||||||
|
|
||||||
|
if "not" in schema:
|
||||||
|
self._add_additional_properties_recursive(schema["not"])
|
||||||
|
|
||||||
|
# Handle $defs/$ref
|
||||||
|
if "$defs" in schema:
|
||||||
|
for def_schema in schema["$defs"].values():
|
||||||
|
self._add_additional_properties_recursive(def_schema)
|
||||||
|
|
||||||
|
return schema
|
||||||
|
|
||||||
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
async def _get_params(self, request: ChatCompletionRequest) -> dict:
|
||||||
input_dict = {}
|
input_dict = {}
|
||||||
|
|
||||||
|
@ -140,6 +171,10 @@ class LiteLLMOpenAIMixin(
|
||||||
name = fmt["title"]
|
name = fmt["title"]
|
||||||
del fmt["title"]
|
del fmt["title"]
|
||||||
fmt["additionalProperties"] = False
|
fmt["additionalProperties"] = False
|
||||||
|
|
||||||
|
# Apply additionalProperties: False recursively to all objects
|
||||||
|
fmt = self._add_additional_properties_recursive(fmt)
|
||||||
|
|
||||||
input_dict["response_format"] = {
|
input_dict["response_format"] = {
|
||||||
"type": "json_schema",
|
"type": "json_schema",
|
||||||
"json_schema": {
|
"json_schema": {
|
||||||
|
|
|
@ -27,7 +27,9 @@ from openai.types.chat import (
|
||||||
from openai.types.chat import (
|
from openai.types.chat import (
|
||||||
ChatCompletionMessageParam as OpenAIChatCompletionMessage,
|
ChatCompletionMessageParam as OpenAIChatCompletionMessage,
|
||||||
)
|
)
|
||||||
from openai.types.chat import ChatCompletionMessageToolCall
|
from openai.types.chat import (
|
||||||
|
ChatCompletionMessageToolCall,
|
||||||
|
)
|
||||||
from openai.types.chat import (
|
from openai.types.chat import (
|
||||||
ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall,
|
ChatCompletionMessageToolCallParam as OpenAIChatCompletionMessageToolCall,
|
||||||
)
|
)
|
||||||
|
@ -199,7 +201,9 @@ def convert_openai_completion_logprobs_stream(text: str, logprobs: Optional[Unio
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def process_completion_response(response: OpenAICompatCompletionResponse) -> CompletionResponse:
|
def process_completion_response(
|
||||||
|
response: OpenAICompatCompletionResponse,
|
||||||
|
) -> CompletionResponse:
|
||||||
choice = response.choices[0]
|
choice = response.choices[0]
|
||||||
# drop suffix <eot_id> if present and return stop reason as end of turn
|
# drop suffix <eot_id> if present and return stop reason as end of turn
|
||||||
if choice.text.endswith("<|eot_id|>"):
|
if choice.text.endswith("<|eot_id|>"):
|
||||||
|
@ -492,7 +496,9 @@ class UnparseableToolCall(BaseModel):
|
||||||
arguments: str = ""
|
arguments: str = ""
|
||||||
|
|
||||||
|
|
||||||
async def convert_message_to_openai_dict_new(message: Message | Dict) -> OpenAIChatCompletionMessage:
|
async def convert_message_to_openai_dict_new(
|
||||||
|
message: Message | Dict,
|
||||||
|
) -> OpenAIChatCompletionMessage:
|
||||||
"""
|
"""
|
||||||
Convert a Message to an OpenAI API-compatible dictionary.
|
Convert a Message to an OpenAI API-compatible dictionary.
|
||||||
"""
|
"""
|
||||||
|
@ -942,7 +948,7 @@ async def convert_openai_chat_completion_stream(
|
||||||
)
|
)
|
||||||
yield ChatCompletionResponseStreamChunk(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event=ChatCompletionResponseEvent(
|
event=ChatCompletionResponseEvent(
|
||||||
event_type=ChatCompletionResponseEventType.complete,
|
event_type=ChatCompletionResponseEventType.progress,
|
||||||
delta=ToolCallDelta(
|
delta=ToolCallDelta(
|
||||||
tool_call=tool_call,
|
tool_call=tool_call,
|
||||||
parse_status=ToolCallParseStatus.succeeded,
|
parse_status=ToolCallParseStatus.succeeded,
|
||||||
|
|
|
@ -6,12 +6,10 @@
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.apis.models import ModelInput
|
|
||||||
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.bedrock.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
@ -39,16 +37,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
|
||||||
)
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
available_models = {
|
||||||
|
"bedrock": MODEL_ENTRIES,
|
||||||
|
}
|
||||||
|
default_models = get_model_registry(available_models)
|
||||||
|
|
||||||
default_models = [
|
|
||||||
ModelInput(
|
|
||||||
model_id=core_model_to_hf_repo[m.llama_model],
|
|
||||||
provider_model_id=m.provider_model_id,
|
|
||||||
provider_id="bedrock",
|
|
||||||
)
|
|
||||||
for m in MODEL_ENTRIES
|
|
||||||
]
|
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
toolgroup_id="builtin::websearch",
|
toolgroup_id="builtin::websearch",
|
||||||
|
@ -71,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=default_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -28,7 +28,7 @@ The following environment variables can be configured:
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
{% for model in default_models %}
|
{% for model in default_models %}
|
||||||
- `{{ model.model_id }} ({{ model.provider_model_id }})`
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
|
@ -88,16 +88,31 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/bedrock}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta.llama3-1-8b-instruct-v1:0
|
||||||
|
provider_id: bedrock
|
||||||
|
provider_model_id: meta.llama3-1-8b-instruct-v1:0
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: bedrock
|
provider_id: bedrock
|
||||||
provider_model_id: meta.llama3-1-8b-instruct-v1:0
|
provider_model_id: meta.llama3-1-8b-instruct-v1:0
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta.llama3-1-70b-instruct-v1:0
|
||||||
|
provider_id: bedrock
|
||||||
|
provider_model_id: meta.llama3-1-70b-instruct-v1:0
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: bedrock
|
provider_id: bedrock
|
||||||
provider_model_id: meta.llama3-1-70b-instruct-v1:0
|
provider_model_id: meta.llama3-1-70b-instruct-v1:0
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta.llama3-1-405b-instruct-v1:0
|
||||||
|
provider_id: bedrock
|
||||||
|
provider_model_id: meta.llama3-1-405b-instruct-v1:0
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: bedrock
|
provider_id: bedrock
|
||||||
|
|
|
@ -8,14 +8,13 @@ from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
from llama_stack.providers.remote.inference.cerebras import CerebrasImplConfig
|
||||||
from llama_stack.providers.remote.inference.cerebras.models import model_entries
|
from llama_stack.providers.remote.inference.cerebras.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
@ -48,15 +47,10 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
available_models = {
|
||||||
default_models = [
|
"cerebras": MODEL_ENTRIES,
|
||||||
ModelInput(
|
}
|
||||||
model_id=core_model_to_hf_repo[m.llama_model],
|
default_models = get_model_registry(available_models)
|
||||||
provider_model_id=m.provider_model_id,
|
|
||||||
provider_id="cerebras",
|
|
||||||
)
|
|
||||||
for m in model_entries
|
|
||||||
]
|
|
||||||
embedding_model = ModelInput(
|
embedding_model = ModelInput(
|
||||||
model_id="all-MiniLM-L6-v2",
|
model_id="all-MiniLM-L6-v2",
|
||||||
provider_id="sentence-transformers",
|
provider_id="sentence-transformers",
|
||||||
|
@ -92,7 +86,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=default_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -20,7 +20,7 @@ The following environment variables can be configured:
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
{% for model in default_models %}
|
{% for model in default_models %}
|
||||||
- `{{ model.model_id }} ({{ model.provider_model_id }})`
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
|
@ -90,11 +90,21 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/cerebras}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: llama3.1-8b
|
||||||
|
provider_id: cerebras
|
||||||
|
provider_model_id: llama3.1-8b
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: cerebras
|
provider_id: cerebras
|
||||||
provider_model_id: llama3.1-8b
|
provider_model_id: llama3.1-8b
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: llama-3.3-70b
|
||||||
|
provider_id: cerebras
|
||||||
|
provider_model_id: llama-3.3-70b
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: cerebras
|
provider_id: cerebras
|
||||||
|
|
|
@ -12,14 +12,13 @@ from llama_stack.distribution.datatypes import (
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
|
from llama_stack.providers.inline.vector_io.sqlite_vec.config import SQLiteVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
|
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
@ -71,16 +70,10 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_id="code-interpreter",
|
provider_id="code-interpreter",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
available_models = {
|
||||||
default_models = [
|
"fireworks": MODEL_ENTRIES,
|
||||||
ModelInput(
|
}
|
||||||
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
|
default_models = get_model_registry(available_models)
|
||||||
provider_id="fireworks",
|
|
||||||
model_type=m.model_type,
|
|
||||||
metadata=m.metadata,
|
|
||||||
)
|
|
||||||
for m in MODEL_ENTRIES
|
|
||||||
]
|
|
||||||
embedding_model = ModelInput(
|
embedding_model = ModelInput(
|
||||||
model_id="all-MiniLM-L6-v2",
|
model_id="all-MiniLM-L6-v2",
|
||||||
provider_id="sentence-transformers",
|
provider_id="sentence-transformers",
|
||||||
|
@ -97,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=default_models + [embedding_model],
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -90,51 +90,112 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ci-tests}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-8B
|
model_id: meta-llama/Llama-Guard-3-8B
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
context_length: 8192
|
context_length: 8192
|
||||||
model_id: nomic-ai/nomic-embed-text-v1.5
|
model_id: nomic-ai/nomic-embed-text-v1.5
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
provider_model_id: nomic-ai/nomic-embed-text-v1.5
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 384
|
embedding_dimension: 384
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from llama_stack.apis.models.models import ModelType
|
from llama_stack.apis.models.models import ModelType
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
|
@ -99,9 +98,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
|
description="Dell's distribution of Llama Stack. TGI inference via Dell's custom container",
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model, embedding_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -13,7 +13,6 @@ from llama_stack.distribution.datatypes import (
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
|
@ -28,7 +27,7 @@ from llama_stack.providers.remote.inference.groq.config import GroqConfig
|
||||||
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES as GROQ_MODEL_ENTRIES
|
||||||
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
|
from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
|
||||||
from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.openai.models import MODEL_ENTRIES as OPENAI_MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
|
def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
|
||||||
|
@ -61,8 +60,7 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
inference_providers = []
|
inference_providers = []
|
||||||
default_models = []
|
available_models = {}
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
|
||||||
for provider_id, model_entries, config in providers:
|
for provider_id, model_entries, config in providers:
|
||||||
inference_providers.append(
|
inference_providers.append(
|
||||||
Provider(
|
Provider(
|
||||||
|
@ -71,21 +69,12 @@ def get_inference_providers() -> Tuple[List[Provider], List[ModelInput]]:
|
||||||
config=config,
|
config=config,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
default_models.extend(
|
available_models[provider_id] = model_entries
|
||||||
ModelInput(
|
return inference_providers, available_models
|
||||||
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
|
|
||||||
provider_model_id=m.provider_model_id,
|
|
||||||
provider_id=provider_id,
|
|
||||||
model_type=m.model_type,
|
|
||||||
metadata=m.metadata,
|
|
||||||
)
|
|
||||||
for m in model_entries
|
|
||||||
)
|
|
||||||
return inference_providers, default_models
|
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
inference_providers, default_models = get_inference_providers()
|
inference_providers, available_models = get_inference_providers()
|
||||||
providers = {
|
providers = {
|
||||||
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
|
"inference": ([p.provider_type for p in inference_providers] + ["inline::sentence-transformers"]),
|
||||||
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
|
"vector_io": ["inline::sqlite-vec", "remote::chromadb", "remote::pgvector"],
|
||||||
|
@ -139,6 +128,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
default_models = get_model_registry(available_models)
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name=name,
|
name=name,
|
||||||
distro_type="self_hosted",
|
distro_type="self_hosted",
|
||||||
|
@ -146,7 +136,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[],
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -136,51 +136,101 @@ models:
|
||||||
provider_id: openai
|
provider_id: openai
|
||||||
provider_model_id: openai/text-embedding-3-large
|
provider_model_id: openai/text-embedding-3-large
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-8B
|
model_id: meta-llama/Llama-Guard-3-8B
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
@ -247,25 +297,45 @@ models:
|
||||||
provider_model_id: gemini/text-embedding-004
|
provider_model_id: gemini/text-embedding-004
|
||||||
model_type: embedding
|
model_type: embedding
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: groq/llama3-8b-8192
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama3-8b-8192
|
provider_model_id: groq/llama3-8b-8192
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama3-8b-8192
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-3.1-8b-instant
|
||||||
|
provider_id: groq
|
||||||
provider_model_id: groq/llama-3.1-8b-instant
|
provider_model_id: groq/llama-3.1-8b-instant
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama3-70b-8192
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama3-70b-8192
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3-70B-Instruct
|
model_id: meta-llama/Llama-3-70B-Instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama3-70b-8192
|
provider_model_id: groq/llama3-70b-8192
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-3.3-70b-versatile
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama-3.3-70b-versatile
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama-3.3-70b-versatile
|
provider_model_id: groq/llama-3.3-70b-versatile
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-3.2-3b-preview
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama-3.2-3b-preview
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
|
|
|
@ -30,7 +30,7 @@ The following environment variables can be configured:
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
{% for model in default_models %}
|
{% for model in default_models %}
|
||||||
- `{{ model.model_id }} ({{ model.provider_model_id }})`
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
|
@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
|
from llama_stack.providers.remote.inference.fireworks.config import FireworksImplConfig
|
||||||
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.fireworks.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
@ -60,17 +59,11 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
|
config=FaissVectorIOConfig.sample_run_config(f"distributions/{name}"),
|
||||||
)
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
available_models = {
|
||||||
default_models = [
|
"fireworks": MODEL_ENTRIES,
|
||||||
ModelInput(
|
}
|
||||||
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
|
default_models = get_model_registry(available_models)
|
||||||
provider_model_id=m.provider_model_id,
|
|
||||||
provider_id="fireworks",
|
|
||||||
metadata=m.metadata,
|
|
||||||
model_type=m.model_type,
|
|
||||||
)
|
|
||||||
for m in MODEL_ENTRIES
|
|
||||||
]
|
|
||||||
embedding_model = ModelInput(
|
embedding_model = ModelInput(
|
||||||
model_id="all-MiniLM-L6-v2",
|
model_id="all-MiniLM-L6-v2",
|
||||||
provider_id="sentence-transformers",
|
provider_id="sentence-transformers",
|
||||||
|
@ -101,7 +94,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=default_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -99,51 +99,101 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-8B
|
model_id: meta-llama/Llama-Guard-3-8B
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
|
|
@ -93,51 +93,101 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-1b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-8B
|
model_id: meta-llama/Llama-Guard-3-8B
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
provider_model_id: accounts/fireworks/models/llama-guard-3-8b
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
|
provider_id: fireworks
|
||||||
|
provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||||
provider_id: fireworks
|
provider_id: fireworks
|
||||||
|
|
|
@ -30,7 +30,7 @@ The following environment variables can be configured:
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
{% for model in default_models %}
|
{% for model in default_models %}
|
||||||
- `{{ model.model_id }} ({{ model.provider_model_id }})`
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
|
@ -12,13 +12,12 @@ from llama_stack.distribution.datatypes import (
|
||||||
Provider,
|
Provider,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.remote.inference.groq import GroqConfig
|
from llama_stack.providers.remote.inference.groq import GroqConfig
|
||||||
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.groq.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
@ -60,18 +59,10 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
available_models = {
|
||||||
default_models = [
|
"groq": MODEL_ENTRIES,
|
||||||
ModelInput(
|
}
|
||||||
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
|
default_models = get_model_registry(available_models)
|
||||||
provider_model_id=m.provider_model_id,
|
|
||||||
provider_id=name,
|
|
||||||
model_type=m.model_type,
|
|
||||||
metadata=m.metadata,
|
|
||||||
)
|
|
||||||
for m in MODEL_ENTRIES
|
|
||||||
]
|
|
||||||
|
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
toolgroup_id="builtin::websearch",
|
toolgroup_id="builtin::websearch",
|
||||||
|
@ -94,7 +85,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=default_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -91,25 +91,45 @@ metadata_store:
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/groq}/registry.db
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: groq/llama3-8b-8192
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama3-8b-8192
|
provider_model_id: groq/llama3-8b-8192
|
||||||
model_type: llm
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama3-8b-8192
|
||||||
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-3.1-8b-instant
|
||||||
|
provider_id: groq
|
||||||
provider_model_id: groq/llama-3.1-8b-instant
|
provider_model_id: groq/llama-3.1-8b-instant
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama3-70b-8192
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama3-70b-8192
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3-70B-Instruct
|
model_id: meta-llama/Llama-3-70B-Instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama3-70b-8192
|
provider_model_id: groq/llama3-70b-8192
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-3.3-70b-versatile
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama-3.3-70b-versatile
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
provider_model_id: groq/llama-3.3-70b-versatile
|
provider_model_id: groq/llama-3.3-70b-versatile
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: groq/llama-3.2-3b-preview
|
||||||
|
provider_id: groq
|
||||||
|
provider_model_id: groq/llama-3.2-3b-preview
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: groq
|
provider_id: groq
|
||||||
|
|
|
@ -92,7 +92,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model, safety_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -93,7 +93,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model, safety_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -98,7 +98,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
description="Use Meta Reference for running LLM inference",
|
description="Use Meta Reference for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model, safety_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
|
description="Use Meta Reference with fp8, int4 quantization for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -20,7 +20,7 @@ The following environment variables can be configured:
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
{% for model in default_models %}
|
{% for model in default_models %}
|
||||||
- `{{ model.model_id }} ({{ model.provider_model_id }})`
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
|
@ -6,11 +6,10 @@
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import ModelInput, Provider, ToolGroupInput
|
from llama_stack.distribution.datatypes import Provider, ToolGroupInput
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
from llama_stack.providers.remote.inference.nvidia import NVIDIAConfig
|
||||||
from llama_stack.providers.remote.inference.nvidia.models import _MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.nvidia.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
@ -38,17 +37,9 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
config=NVIDIAConfig.sample_run_config(),
|
config=NVIDIAConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
available_models = {
|
||||||
default_models = [
|
"nvidia": MODEL_ENTRIES,
|
||||||
ModelInput(
|
}
|
||||||
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
|
|
||||||
provider_model_id=m.provider_model_id,
|
|
||||||
provider_id="nvidia",
|
|
||||||
model_type=m.model_type,
|
|
||||||
metadata=m.metadata,
|
|
||||||
)
|
|
||||||
for m in _MODEL_ENTRIES
|
|
||||||
]
|
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
toolgroup_id="builtin::websearch",
|
toolgroup_id="builtin::websearch",
|
||||||
|
@ -64,6 +55,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
default_models = get_model_registry(available_models)
|
||||||
return DistributionTemplate(
|
return DistributionTemplate(
|
||||||
name="nvidia",
|
name="nvidia",
|
||||||
distro_type="remote_hosted",
|
distro_type="remote_hosted",
|
||||||
|
@ -71,7 +63,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=default_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -90,46 +90,91 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/nvidia}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama3-8b-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama3-8b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3-8B-Instruct
|
model_id: meta-llama/Llama-3-8B-Instruct
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama3-8b-instruct
|
provider_model_id: meta/llama3-8b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama3-70b-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama3-70b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3-70B-Instruct
|
model_id: meta-llama/Llama-3-70B-Instruct
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama3-70b-instruct
|
provider_model_id: meta/llama3-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama-3.1-8b-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama-3.1-8b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama-3.1-8b-instruct
|
provider_model_id: meta/llama-3.1-8b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama-3.1-70b-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama-3.1-70b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama-3.1-70b-instruct
|
provider_model_id: meta/llama-3.1-70b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama-3.1-405b-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama-3.1-405b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama-3.1-405b-instruct
|
provider_model_id: meta/llama-3.1-405b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama-3.2-1b-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama-3.2-1b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama-3.2-1b-instruct
|
provider_model_id: meta/llama-3.2-1b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama-3.2-3b-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama-3.2-3b-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama-3.2-3b-instruct
|
provider_model_id: meta/llama-3.2-3b-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama-3.2-11b-vision-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama-3.2-11b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
provider_model_id: meta/llama-3.2-11b-vision-instruct
|
provider_model_id: meta/llama-3.2-11b-vision-instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta/llama-3.2-90b-vision-instruct
|
||||||
|
provider_id: nvidia
|
||||||
|
provider_model_id: meta/llama-3.2-90b-vision-instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: nvidia
|
provider_id: nvidia
|
||||||
|
|
|
@ -130,17 +130,21 @@ ollama run <model_name>
|
||||||
To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
|
To make sure that the model is being served correctly, run `ollama ps` to get a list of models being served by ollama.
|
||||||
```
|
```
|
||||||
$ ollama ps
|
$ ollama ps
|
||||||
|
NAME ID SIZE PROCESSOR UNTIL
|
||||||
NAME ID SIZE PROCESSOR UNTIL
|
llama3.2:3b-instruct-fp16 195a8c01d91e 8.6 GB 100% GPU 9 minutes from now
|
||||||
llama3.1:8b-instruct-fp16 4aacac419454 17 GB 100% GPU 4 minutes from now
|
|
||||||
```
|
```
|
||||||
|
|
||||||
To verify that the model served by ollama is correctly connected to Llama Stack server
|
To verify that the model served by ollama is correctly connected to Llama Stack server
|
||||||
```bash
|
```bash
|
||||||
$ llama-stack-client models list
|
$ llama-stack-client models list
|
||||||
+----------------------+----------------------+---------------+-----------------------------------------------+
|
|
||||||
| identifier | llama_model | provider_id | metadata |
|
Available Models
|
||||||
+======================+======================+===============+===============================================+
|
|
||||||
| Llama3.1-8B-Instruct | Llama3.1-8B-Instruct | ollama0 | {'ollama_model': 'llama3.1:8b-instruct-fp16'} |
|
┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━┓
|
||||||
+----------------------+----------------------+---------------+-----------------------------------------------+
|
┃ model_type ┃ identifier ┃ provider_resource_id ┃ metadata ┃ provider_id ┃
|
||||||
|
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━┩
|
||||||
|
│ llm │ meta-llama/Llama-3.2-3B-Instruct │ llama3.2:3b-instruct-fp16 │ │ ollama │
|
||||||
|
└──────────────┴──────────────────────────────────────┴──────────────────────────────┴───────────┴─────────────┘
|
||||||
|
|
||||||
|
Total models: 1
|
||||||
```
|
```
|
||||||
|
|
|
@ -87,7 +87,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model, safety_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -95,7 +95,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
description="Use (an external) vLLM server for running LLM inference",
|
description="Use (an external) vLLM server for running LLM inference",
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model, safety_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -30,7 +30,7 @@ The following environment variables can be configured:
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
{% for model in default_models %}
|
{% for model in default_models %}
|
||||||
- `{{ model.model_id }} ({{ model.provider_model_id }})`
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
|
@ -68,46 +68,91 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/sambanova}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Meta-Llama-3.1-8B-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
provider_model_id: Meta-Llama-3.1-8B-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Meta-Llama-3.1-70B-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
provider_model_id: Meta-Llama-3.1-70B-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Meta-Llama-3.1-405B-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
provider_model_id: Meta-Llama-3.1-405B-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Meta-Llama-3.2-1B-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-1B-Instruct
|
model_id: meta-llama/Llama-3.2-1B-Instruct
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
provider_model_id: Meta-Llama-3.2-1B-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Meta-Llama-3.2-3B-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Meta-Llama-3.3-70B-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Meta-Llama-3.3-70B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Meta-Llama-3.3-70B-Instruct
|
provider_model_id: Meta-Llama-3.3-70B-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Llama-3.2-11B-Vision-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
provider_model_id: Llama-3.2-11B-Vision-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Llama-3.2-90B-Vision-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
provider_model_id: Llama-3.2-90B-Vision-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: Meta-Llama-Guard-3-8B
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Meta-Llama-Guard-3-8B
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-8B
|
model_id: meta-llama/Llama-Guard-3-8B
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
|
|
|
@ -7,15 +7,13 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_stack.distribution.datatypes import (
|
from llama_stack.distribution.datatypes import (
|
||||||
ModelInput,
|
|
||||||
Provider,
|
Provider,
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
from llama_stack.providers.remote.inference.sambanova import SambaNovaImplConfig
|
||||||
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.sambanova.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
@ -40,16 +38,10 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
config=SambaNovaImplConfig.sample_run_config(),
|
config=SambaNovaImplConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
available_models = {
|
||||||
default_models = [
|
name: MODEL_ENTRIES,
|
||||||
ModelInput(
|
}
|
||||||
model_id=core_model_to_hf_repo[m.llama_model],
|
default_models = get_model_registry(available_models)
|
||||||
provider_model_id=m.provider_model_id,
|
|
||||||
provider_id=name,
|
|
||||||
)
|
|
||||||
for m in MODEL_ENTRIES
|
|
||||||
]
|
|
||||||
|
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
toolgroup_id="builtin::websearch",
|
toolgroup_id="builtin::websearch",
|
||||||
|
@ -72,7 +64,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
docker_image=None,
|
docker_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=default_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -24,9 +24,33 @@ from llama_stack.distribution.datatypes import (
|
||||||
)
|
)
|
||||||
from llama_stack.distribution.distribution import get_provider_registry
|
from llama_stack.distribution.distribution import get_provider_registry
|
||||||
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
from llama_stack.distribution.utils.dynamic import instantiate_class_type
|
||||||
|
from llama_stack.providers.utils.inference.model_registry import ProviderModelEntry
|
||||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_registry(available_models: Dict[str, List[ProviderModelEntry]]) -> List[ModelInput]:
|
||||||
|
models = []
|
||||||
|
for provider_id, entries in available_models.items():
|
||||||
|
for entry in entries:
|
||||||
|
ids = [entry.provider_model_id] + entry.aliases
|
||||||
|
for model_id in ids:
|
||||||
|
models.append(
|
||||||
|
ModelInput(
|
||||||
|
model_id=model_id,
|
||||||
|
provider_model_id=entry.provider_model_id,
|
||||||
|
provider_id=provider_id,
|
||||||
|
model_type=entry.model_type,
|
||||||
|
metadata=entry.metadata,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return models
|
||||||
|
|
||||||
|
|
||||||
|
class DefaultModel(BaseModel):
|
||||||
|
model_id: str
|
||||||
|
doc_string: str
|
||||||
|
|
||||||
|
|
||||||
class RunConfigSettings(BaseModel):
|
class RunConfigSettings(BaseModel):
|
||||||
provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
|
provider_overrides: Dict[str, List[Provider]] = Field(default_factory=dict)
|
||||||
default_models: Optional[List[ModelInput]] = None
|
default_models: Optional[List[ModelInput]] = None
|
||||||
|
@ -110,7 +134,7 @@ class DistributionTemplate(BaseModel):
|
||||||
run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
|
run_config_env_vars: Optional[Dict[str, Tuple[str, str]]] = None
|
||||||
container_image: Optional[str] = None
|
container_image: Optional[str] = None
|
||||||
|
|
||||||
default_models: Optional[List[ModelInput]] = None
|
available_models_by_provider: Optional[Dict[str, List[ProviderModelEntry]]] = None
|
||||||
|
|
||||||
def build_config(self) -> BuildConfig:
|
def build_config(self) -> BuildConfig:
|
||||||
return BuildConfig(
|
return BuildConfig(
|
||||||
|
@ -148,13 +172,32 @@ class DistributionTemplate(BaseModel):
|
||||||
autoescape=True,
|
autoescape=True,
|
||||||
)
|
)
|
||||||
template = env.from_string(template)
|
template = env.from_string(template)
|
||||||
|
|
||||||
|
default_models = []
|
||||||
|
if self.available_models_by_provider:
|
||||||
|
has_multiple_providers = len(self.available_models_by_provider.keys()) > 1
|
||||||
|
for provider_id, model_entries in self.available_models_by_provider.items():
|
||||||
|
for model_entry in model_entries:
|
||||||
|
doc_parts = []
|
||||||
|
if model_entry.aliases:
|
||||||
|
doc_parts.append(f"aliases: {', '.join(model_entry.aliases)}")
|
||||||
|
if has_multiple_providers:
|
||||||
|
doc_parts.append(f"provider: {provider_id}")
|
||||||
|
|
||||||
|
default_models.append(
|
||||||
|
DefaultModel(
|
||||||
|
model_id=model_entry.provider_model_id,
|
||||||
|
doc_string=f"({' -- '.join(doc_parts)})" if doc_parts else "",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return template.render(
|
return template.render(
|
||||||
name=self.name,
|
name=self.name,
|
||||||
description=self.description,
|
description=self.description,
|
||||||
providers=self.providers,
|
providers=self.providers,
|
||||||
providers_table=providers_table,
|
providers_table=providers_table,
|
||||||
run_config_env_vars=self.run_config_env_vars,
|
run_config_env_vars=self.run_config_env_vars,
|
||||||
default_models=self.default_models,
|
default_models=default_models,
|
||||||
)
|
)
|
||||||
|
|
||||||
def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:
|
def save_distribution(self, yaml_output_dir: Path, doc_output_dir: Path) -> None:
|
||||||
|
|
|
@ -96,7 +96,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model, safety_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -30,7 +30,7 @@ The following environment variables can be configured:
|
||||||
The following models are available by default:
|
The following models are available by default:
|
||||||
|
|
||||||
{% for model in default_models %}
|
{% for model in default_models %}
|
||||||
- `{{ model.model_id }}`
|
- `{{ model.model_id }} {{ model.doc_string }}`
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
|
@ -99,46 +99,91 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-8B
|
model_id: meta-llama/Llama-Guard-3-8B
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||||
provider_id: together
|
provider_id: together
|
||||||
|
|
|
@ -93,46 +93,91 @@ metadata_store:
|
||||||
type: sqlite
|
type: sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/together}/registry.db
|
||||||
models:
|
models:
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-8B-Instruct
|
model_id: meta-llama/Llama-3.1-8B-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-70B-Instruct
|
model_id: meta-llama/Llama-3.1-70B-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
model_id: meta-llama/Llama-3.1-405B-Instruct-FP8
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-3B-Instruct
|
model_id: meta-llama/Llama-3.2-3B-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-90B-Vision-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.3-70B-Instruct
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-8B
|
model_id: meta-llama/Llama-Guard-3-8B
|
||||||
provider_id: together
|
provider_id: together
|
||||||
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
provider_model_id: meta-llama/Meta-Llama-Guard-3-8B
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||||
|
provider_id: together
|
||||||
|
provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
model_id: meta-llama/Llama-Guard-3-11B-Vision
|
||||||
provider_id: together
|
provider_id: together
|
||||||
|
|
|
@ -13,14 +13,13 @@ from llama_stack.distribution.datatypes import (
|
||||||
ShieldInput,
|
ShieldInput,
|
||||||
ToolGroupInput,
|
ToolGroupInput,
|
||||||
)
|
)
|
||||||
from llama_stack.models.llama.sku_list import all_registered_models
|
|
||||||
from llama_stack.providers.inline.inference.sentence_transformers import (
|
from llama_stack.providers.inline.inference.sentence_transformers import (
|
||||||
SentenceTransformersInferenceConfig,
|
SentenceTransformersInferenceConfig,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
|
||||||
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
from llama_stack.providers.remote.inference.together import TogetherImplConfig
|
||||||
from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
|
from llama_stack.providers.remote.inference.together.models import MODEL_ENTRIES
|
||||||
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings
|
from llama_stack.templates.template import DistributionTemplate, RunConfigSettings, get_model_registry
|
||||||
|
|
||||||
|
|
||||||
def get_distribution_template() -> DistributionTemplate:
|
def get_distribution_template() -> DistributionTemplate:
|
||||||
|
@ -57,18 +56,10 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
provider_type="inline::sentence-transformers",
|
provider_type="inline::sentence-transformers",
|
||||||
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
config=SentenceTransformersInferenceConfig.sample_run_config(),
|
||||||
)
|
)
|
||||||
|
available_models = {
|
||||||
core_model_to_hf_repo = {m.descriptor(): m.huggingface_repo for m in all_registered_models()}
|
"together": MODEL_ENTRIES,
|
||||||
default_models = [
|
}
|
||||||
ModelInput(
|
default_models = get_model_registry(available_models)
|
||||||
model_id=core_model_to_hf_repo[m.llama_model] if m.llama_model else m.provider_model_id,
|
|
||||||
provider_model_id=m.provider_model_id,
|
|
||||||
provider_id="together",
|
|
||||||
metadata=m.metadata,
|
|
||||||
model_type=m.model_type,
|
|
||||||
)
|
|
||||||
for m in MODEL_ENTRIES
|
|
||||||
]
|
|
||||||
default_tool_groups = [
|
default_tool_groups = [
|
||||||
ToolGroupInput(
|
ToolGroupInput(
|
||||||
toolgroup_id="builtin::websearch",
|
toolgroup_id="builtin::websearch",
|
||||||
|
@ -99,7 +90,7 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=Path(__file__).parent / "doc_template.md",
|
template_path=Path(__file__).parent / "doc_template.md",
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=default_models,
|
available_models_by_provider=available_models,
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -88,7 +88,6 @@ def get_distribution_template() -> DistributionTemplate:
|
||||||
container_image=None,
|
container_image=None,
|
||||||
template_path=None,
|
template_path=None,
|
||||||
providers=providers,
|
providers=providers,
|
||||||
default_models=[inference_model],
|
|
||||||
run_configs={
|
run_configs={
|
||||||
"run.yaml": RunConfigSettings(
|
"run.yaml": RunConfigSettings(
|
||||||
provider_overrides={
|
provider_overrides={
|
||||||
|
|
|
@ -14,6 +14,23 @@
|
||||||
# - array of a text (TextContentItem)
|
# - array of a text (TextContentItem)
|
||||||
# Types of output:
|
# Types of output:
|
||||||
# - list of list of floats
|
# - list of list of floats
|
||||||
|
# Params:
|
||||||
|
# - text_truncation
|
||||||
|
# - absent w/ long text -> error
|
||||||
|
# - none w/ long text -> error
|
||||||
|
# - absent w/ short text -> ok
|
||||||
|
# - none w/ short text -> ok
|
||||||
|
# - end w/ long text -> ok
|
||||||
|
# - end w/ short text -> ok
|
||||||
|
# - start w/ long text -> ok
|
||||||
|
# - start w/ short text -> ok
|
||||||
|
# - output_dimension
|
||||||
|
# - response dimension matches
|
||||||
|
# - task_type, only for asymmetric models
|
||||||
|
# - query embedding != passage embedding
|
||||||
|
# Negative:
|
||||||
|
# - long string
|
||||||
|
# - long text
|
||||||
#
|
#
|
||||||
# Todo:
|
# Todo:
|
||||||
# - negative tests
|
# - negative tests
|
||||||
|
@ -23,8 +40,6 @@
|
||||||
# - empty text
|
# - empty text
|
||||||
# - empty image
|
# - empty image
|
||||||
# - long
|
# - long
|
||||||
# - long string
|
|
||||||
# - long text
|
|
||||||
# - large image
|
# - large image
|
||||||
# - appropriate combinations
|
# - appropriate combinations
|
||||||
# - batch size
|
# - batch size
|
||||||
|
@ -40,6 +55,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from llama_stack_client import BadRequestError
|
||||||
from llama_stack_client.types import EmbeddingsResponse
|
from llama_stack_client.types import EmbeddingsResponse
|
||||||
from llama_stack_client.types.shared.interleaved_content import (
|
from llama_stack_client.types.shared.interleaved_content import (
|
||||||
ImageContentItem,
|
ImageContentItem,
|
||||||
|
@ -50,8 +66,10 @@ from llama_stack_client.types.shared.interleaved_content import (
|
||||||
|
|
||||||
DUMMY_STRING = "hello"
|
DUMMY_STRING = "hello"
|
||||||
DUMMY_STRING2 = "world"
|
DUMMY_STRING2 = "world"
|
||||||
|
DUMMY_LONG_STRING = "NVDA " * 10240
|
||||||
DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
|
DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
|
||||||
DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
|
DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
|
||||||
|
DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text")
|
||||||
# TODO(mf): add a real image URL and base64 string
|
# TODO(mf): add a real image URL and base64 string
|
||||||
DUMMY_IMAGE_URL = ImageContentItem(
|
DUMMY_IMAGE_URL = ImageContentItem(
|
||||||
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
|
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
|
||||||
|
@ -89,10 +107,120 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents):
|
||||||
"list[url,string,base64,text]",
|
"list[url,string,base64,text]",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@pytest.mark.skip(reason="Media is not supported")
|
@pytest.mark.xfail(reason="Media is not supported")
|
||||||
def test_embedding_image(llama_stack_client, embedding_model_id, contents):
|
def test_embedding_image(llama_stack_client, embedding_model_id, contents):
|
||||||
response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
|
response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents)
|
||||||
assert isinstance(response, EmbeddingsResponse)
|
assert isinstance(response, EmbeddingsResponse)
|
||||||
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
|
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
|
||||||
assert isinstance(response.embeddings[0], list)
|
assert isinstance(response.embeddings[0], list)
|
||||||
assert isinstance(response.embeddings[0][0], float)
|
assert isinstance(response.embeddings[0][0], float)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text_truncation",
|
||||||
|
[
|
||||||
|
"end",
|
||||||
|
"start",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"contents",
|
||||||
|
[
|
||||||
|
[DUMMY_LONG_TEXT],
|
||||||
|
[DUMMY_STRING],
|
||||||
|
],
|
||||||
|
ids=[
|
||||||
|
"long",
|
||||||
|
"short",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents):
|
||||||
|
response = llama_stack_client.inference.embeddings(
|
||||||
|
model_id=embedding_model_id, contents=contents, text_truncation=text_truncation
|
||||||
|
)
|
||||||
|
assert isinstance(response, EmbeddingsResponse)
|
||||||
|
assert len(response.embeddings) == 1
|
||||||
|
assert isinstance(response.embeddings[0], list)
|
||||||
|
assert isinstance(response.embeddings[0][0], float)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text_truncation",
|
||||||
|
[
|
||||||
|
None,
|
||||||
|
"none",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"contents",
|
||||||
|
[
|
||||||
|
[DUMMY_LONG_TEXT],
|
||||||
|
[DUMMY_LONG_STRING],
|
||||||
|
],
|
||||||
|
ids=[
|
||||||
|
"long-text",
|
||||||
|
"long-str",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents):
|
||||||
|
with pytest.raises(BadRequestError) as excinfo:
|
||||||
|
llama_stack_client.inference.embeddings(
|
||||||
|
model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction")
|
||||||
|
def test_embedding_output_dimension(llama_stack_client, embedding_model_id):
|
||||||
|
base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING])
|
||||||
|
test_response = llama_stack_client.inference.embeddings(
|
||||||
|
model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32
|
||||||
|
)
|
||||||
|
assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
|
||||||
|
assert len(test_response.embeddings[0]) == 32
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="Only valid for model supporting task type")
|
||||||
|
def test_embedding_task_type(llama_stack_client, embedding_model_id):
|
||||||
|
query_embedding = llama_stack_client.inference.embeddings(
|
||||||
|
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
|
||||||
|
)
|
||||||
|
document_embedding = llama_stack_client.inference.embeddings(
|
||||||
|
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document"
|
||||||
|
)
|
||||||
|
assert query_embedding.embeddings != document_embedding.embeddings
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text_truncation",
|
||||||
|
[
|
||||||
|
None,
|
||||||
|
"none",
|
||||||
|
"end",
|
||||||
|
"start",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation):
|
||||||
|
response = llama_stack_client.inference.embeddings(
|
||||||
|
model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
|
||||||
|
)
|
||||||
|
assert isinstance(response, EmbeddingsResponse)
|
||||||
|
assert len(response.embeddings) == 1
|
||||||
|
assert isinstance(response.embeddings[0], list)
|
||||||
|
assert isinstance(response.embeddings[0][0], float)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text_truncation",
|
||||||
|
[
|
||||||
|
"NONE",
|
||||||
|
"END",
|
||||||
|
"START",
|
||||||
|
"left",
|
||||||
|
"right",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation):
|
||||||
|
with pytest.raises(BadRequestError) as excinfo:
|
||||||
|
llama_stack_client.inference.embeddings(
|
||||||
|
model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation
|
||||||
|
)
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
@ -342,11 +343,15 @@ def test_text_chat_completion_with_tool_choice_none(client_with_models, text_mod
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
|
def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
|
||||||
|
class NBAStats(BaseModel):
|
||||||
|
year_for_draft: int
|
||||||
|
num_seasons_in_nba: int
|
||||||
|
|
||||||
class AnswerFormat(BaseModel):
|
class AnswerFormat(BaseModel):
|
||||||
first_name: str
|
first_name: str
|
||||||
last_name: str
|
last_name: str
|
||||||
year_of_birth: int
|
year_of_birth: int
|
||||||
num_seasons_in_nba: int
|
nba_stats: NBAStats
|
||||||
|
|
||||||
tc = TestCase(test_case)
|
tc = TestCase(test_case)
|
||||||
|
|
||||||
|
@ -364,7 +369,8 @@ def test_text_chat_completion_structured_output(client_with_models, text_model_i
|
||||||
assert answer.first_name == expected["first_name"]
|
assert answer.first_name == expected["first_name"]
|
||||||
assert answer.last_name == expected["last_name"]
|
assert answer.last_name == expected["last_name"]
|
||||||
assert answer.year_of_birth == expected["year_of_birth"]
|
assert answer.year_of_birth == expected["year_of_birth"]
|
||||||
assert answer.num_seasons_in_nba == expected["num_seasons_in_nba"]
|
assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"]
|
||||||
|
assert answer.nba_stats.year_for_draft == expected["year_for_draft"]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("streaming", [True, False])
|
@pytest.mark.parametrize("streaming", [True, False])
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue